mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Merge branch 'develop'
This commit is contained in:
commit
c078d355b6
@ -49,12 +49,8 @@ hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
|
||||
if (!l) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
m128 v = zeroes128();
|
||||
// we don't have a clever way of doing this move yet
|
||||
memcpy(&v, d, l);
|
||||
if (noCase) {
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
m128 mask128 = noCase ? caseMask : ones128();
|
||||
m128 v = and128(loadu128(d), mask128);
|
||||
|
||||
// mask out where we can't match
|
||||
u32 mask = (0xFFFF >> (16 - l));
|
||||
@ -76,11 +72,8 @@ hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
const size_t l = end - start;
|
||||
|
||||
m128 v = loadu128(d);
|
||||
|
||||
if (noCase) {
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
m128 mask128 = noCase ? caseMask : ones128();
|
||||
m128 v = and128(loadu128(d), mask128);
|
||||
|
||||
u32 buf_off = start - offset;
|
||||
u32 mask = ((1 << l) - 1) << buf_off;
|
||||
@ -109,11 +102,8 @@ hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
|
||||
assert(l <= 32);
|
||||
|
||||
DEBUG_PRINTF("d %zu\n", d - buf);
|
||||
m128 v = zeroes128();
|
||||
memcpy(&v, d, l);
|
||||
if (noCase) {
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
m128 mask128 = noCase ? caseMask : ones128();
|
||||
m128 v = and128(loadu128(d), mask128);
|
||||
|
||||
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
|
||||
eq128(mask2, v)));
|
||||
@ -137,11 +127,8 @@ hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
size_t l = end - start;
|
||||
|
||||
m128 v = loadu128(d);
|
||||
|
||||
if (noCase) {
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
m128 mask128 = noCase ? caseMask : ones128();
|
||||
m128 v = and128(loadu128(d), mask128);
|
||||
|
||||
u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1),
|
||||
eq128(mask2, v)));
|
||||
@ -164,9 +151,10 @@ hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
assert(d < e);
|
||||
m128 mask128 = noCase ? caseMask : ones128();
|
||||
|
||||
for (; d < e; d += 16) {
|
||||
m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
|
||||
m128 v = and128(load128(d), mask128);
|
||||
|
||||
u32 z = movemask128(eq128(mask1, v));
|
||||
|
||||
@ -186,9 +174,10 @@ hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf,
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
assert(d < e);
|
||||
m128 lastz1 = zeroes128();
|
||||
m128 mask128 = noCase ? caseMask : ones128();
|
||||
|
||||
for (; d < e; d += 16) {
|
||||
m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
|
||||
m128 v = and128(load128(d), mask128);
|
||||
m128 z1 = eq128(mask1, v);
|
||||
m128 z2 = eq128(mask2, v);
|
||||
u32 z = movemask128(and128(palignr(z1, lastz1, 15), z2));
|
||||
|
@ -82,11 +82,7 @@ u32 findAndClearLSB_64_impl(u64a *v) {
|
||||
|
||||
static really_inline
|
||||
u32 findAndClearMSB_32_impl(u32 *v) {
|
||||
u32 val = *v;
|
||||
u32 offset = 31 - clz32_impl(val);
|
||||
*v = val & ~(1 << offset);
|
||||
assert(offset < 32);
|
||||
return offset;
|
||||
return findAndClearMSB_32_impl_c(v);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -107,20 +103,19 @@ u64a compress64_impl(u64a x, u64a m) {
|
||||
static really_inline
|
||||
m128 compress128_impl(m128 x, m128 m) {
|
||||
m128 one = set1_2x64(1);
|
||||
m128 bitset = one;
|
||||
m128 vres = zeroes128();
|
||||
m128 bb = one;
|
||||
m128 res = zeroes128();
|
||||
while (isnonzero128(m)) {
|
||||
m128 mm = sub_2x64(zeroes128(), m);
|
||||
m128 tv = and128(x, m);
|
||||
tv = and128(tv, mm);
|
||||
|
||||
m128 mask = not128(eq64_m128(tv, zeroes128()));
|
||||
mask = vandq_s64(bitset, mask);
|
||||
vres = or128(vres, mask);
|
||||
m = and128(m, sub_2x64(m, one));
|
||||
bitset = lshift64_m128(bitset, 1);
|
||||
m128 mm = sub_2x64(zeroes128(), m);
|
||||
m128 xm = and128(x, m);
|
||||
xm = and128(xm, mm);
|
||||
|
||||
m128 mask = not128(eq64_m128(xm, zeroes128()));
|
||||
res = or128(res, and128(bb, mask));
|
||||
m = and128(m, sub_2x64(m, one));
|
||||
bb = lshift64_m128(bb, 1);
|
||||
}
|
||||
return vres;
|
||||
return res;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -136,20 +131,18 @@ u64a expand64_impl(u64a x, u64a m) {
|
||||
static really_inline
|
||||
m128 expand128_impl(m128 x, m128 m) {
|
||||
m128 one = set1_2x64(1);
|
||||
m128 bitset = one;
|
||||
m128 vres = zeroes128();
|
||||
m128 bb = one;
|
||||
m128 res = zeroes128();
|
||||
while (isnonzero128(m)) {
|
||||
m128 tv = and128(x, m);
|
||||
|
||||
m128 mm = sub_2x64(zeroes128(), m);
|
||||
m128 mask = not128(eq64_m128(tv, zeroes128()));
|
||||
mask = vandq_s64(bitset, mask);
|
||||
mask = and128(mask, mm);
|
||||
vres = or128(vres, mask);
|
||||
m = and128(m, sub_2x64(m, one));
|
||||
bitset = lshift64_m128(bitset, 1);
|
||||
m128 xm = and128(x, bb);
|
||||
m128 mm = sub_2x64(zeroes128(), m);
|
||||
m128 mask = not128(eq64_m128(xm, zeroes128()));
|
||||
mask = and128(mask, and128(m, mm));
|
||||
res = or128(res, mask);
|
||||
m = and128(m, sub_2x64(m, one));
|
||||
bb = lshift64_m128(bb, 1);
|
||||
}
|
||||
return vres;
|
||||
return res;
|
||||
}
|
||||
|
||||
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
||||
|
@ -62,7 +62,7 @@ static really_inline int diff128(m128 a, m128 b) {
|
||||
}
|
||||
|
||||
static really_inline int isnonzero128(m128 a) {
|
||||
return !!diff128(a, zeroes128());
|
||||
return diff128(a, zeroes128());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -121,7 +121,6 @@ static really_inline m128 eq64_m128(m128 a, m128 b) {
|
||||
return (m128) vceqq_u64((int64x2_t)a, (int64x2_t)b);
|
||||
}
|
||||
|
||||
|
||||
static really_inline u32 movemask128(m128 a) {
|
||||
static const uint8x16_t powers = { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 };
|
||||
|
||||
@ -311,22 +310,28 @@ m128 palignr(m128 r, m128 l, int offset) {
|
||||
|
||||
static really_really_inline
|
||||
m128 rshiftbyte_m128(m128 a, unsigned b) {
|
||||
if (b == 0) {
|
||||
return a;
|
||||
}
|
||||
return palignr(zeroes128(), a, b);
|
||||
}
|
||||
|
||||
static really_really_inline
|
||||
m128 lshiftbyte_m128(m128 a, unsigned b) {
|
||||
if (b == 0) {
|
||||
return a;
|
||||
}
|
||||
return palignr(a, zeroes128(), 16 - b);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
m128 variable_byte_shift_m128(m128 in, s32 amount) {
|
||||
assert(amount >= -16 && amount <= 16);
|
||||
static const uint8x16_t vbs_mask = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f };
|
||||
const uint8x16_t outside_mask = set1_16x8(0xf0);
|
||||
|
||||
m128 shift_mask = palignr_imm(vbs_mask, outside_mask, 16 - amount);
|
||||
return vqtbl1q_s8(in, shift_mask);
|
||||
if (amount < 0) {
|
||||
return palignr_imm(zeroes128(), in, -amount);
|
||||
} else {
|
||||
return palignr_imm(in, zeroes128(), 16 - amount);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -72,7 +72,7 @@ static inline void print_m128_4x32(const char *label, m128 vector) {
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static inline void print_m128_2x64(char *label, m128 vector) {
|
||||
static inline void print_m128_2x64(const char *label, m128 vector) {
|
||||
uint64_t ALIGN_ATTR(16) data[2];
|
||||
store128(data, vector);
|
||||
DEBUG_PRINTF("%s: ", label);
|
||||
|
@ -294,6 +294,39 @@ TEST(BitUtils, compress64) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BitUtils, compress128) {
|
||||
const m128 all_zeroes = zeroes128();
|
||||
const m128 all_ones = ones128();
|
||||
const m128 odd_bits = set1_2x64(0x5555555555555555ull);
|
||||
const m128 even_bits = set1_2x64(0xaaaaaaaaaaaaaaaaull);
|
||||
|
||||
EXPECT_EQ(0, diff128(all_zeroes, compress128(all_zeroes, all_zeroes)));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, compress128(all_zeroes, set1_4x32(1))));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, compress128(all_zeroes, all_ones)));
|
||||
EXPECT_EQ(0, diff128(all_ones, compress128(all_ones, all_ones)));
|
||||
EXPECT_EQ(0, diff128(set1_2x64(0xffffffffull), compress128(odd_bits, odd_bits)));
|
||||
EXPECT_EQ(0, diff128(set1_2x64(0xffffffffull), compress128(even_bits, even_bits)));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, compress128(odd_bits, even_bits)));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, compress128(even_bits, odd_bits)));
|
||||
|
||||
// Some single-bit tests.
|
||||
for (u32 i = 0; i < 64; i++) {
|
||||
const m128 one_bit = set1_2x64(1ull << i);
|
||||
|
||||
EXPECT_EQ(0, diff128(all_zeroes, compress128(all_zeroes, one_bit)));
|
||||
EXPECT_EQ(0, diff128(set1_2x64(1ull), compress128(one_bit, one_bit)));
|
||||
EXPECT_EQ(0, diff128(one_bit, compress128(one_bit, all_ones)));
|
||||
|
||||
if (i % 2) {
|
||||
EXPECT_EQ(0, diff128(set1_2x64(1ull << (i / 2)), compress128(one_bit, even_bits)));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, compress128(one_bit, odd_bits)));
|
||||
} else {
|
||||
EXPECT_EQ(0, diff128(set1_2x64(1ull << (i / 2)), compress128(one_bit, odd_bits)));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, compress128(one_bit, even_bits)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BitUtils, expand32) {
|
||||
const u32 all_ones = 0xffffffffu;
|
||||
const u32 odd_bits = 0x55555555u;
|
||||
@ -352,6 +385,35 @@ TEST(BitUtils, expand64) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BitUtils, expand128) {
|
||||
const m128 all_zeroes = zeroes128();
|
||||
const m128 all_ones = ones128();
|
||||
const m128 odd_bits = set1_2x64(0x5555555555555555ull);
|
||||
const m128 even_bits = set1_2x64(0xaaaaaaaaaaaaaaaaull);
|
||||
|
||||
EXPECT_EQ(0, diff128(all_zeroes, expand128(all_zeroes, all_zeroes)));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, expand128(all_zeroes, set1_2x64(1ull))));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, expand128(all_zeroes, all_ones)));
|
||||
EXPECT_EQ(0, diff128(all_ones, expand128(all_ones, all_ones)));
|
||||
EXPECT_EQ(0, diff128(odd_bits, expand128(set1_2x64(0xffffffffull), odd_bits)));
|
||||
EXPECT_EQ(0, diff128(even_bits, expand128(set1_2x64(0xffffffffull), even_bits)));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, expand128(set1_2x64(0xffffffff00000000ull), even_bits)));
|
||||
EXPECT_EQ(0, diff128(all_zeroes, expand128(set1_2x64(0xffffffff00000000ull), odd_bits)));
|
||||
EXPECT_EQ(0, diff128(set1_2x64(1u), expand128(set1_2x64(1u), odd_bits)));
|
||||
EXPECT_EQ(0, diff128(set1_2x64(2u), expand128(set1_2x64(1u), even_bits)));
|
||||
|
||||
// Some single-bit tests.
|
||||
for (u32 i = 0; i < 64; i++) {
|
||||
const m128 one_bit = set1_2x64(1ull << i);
|
||||
|
||||
EXPECT_EQ(0, diff128(all_zeroes, expand128(all_zeroes, one_bit)));
|
||||
EXPECT_EQ(0, diff128(one_bit, expand128(set1_2x64(1ull), one_bit)));
|
||||
EXPECT_EQ(0, diff128(one_bit, expand128(one_bit, all_ones)));
|
||||
|
||||
EXPECT_EQ(0, diff128(one_bit, expand128(set1_2x64(1ull << (i / 2)), i % 2 ? even_bits : odd_bits)));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(BitUtils, bf_op_1) {
|
||||
u64a a = 0;
|
||||
for (u32 i = 0; i < 64; i++) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user