mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
add expand128() implementation for NEON
This commit is contained in:
parent
0372a8120a
commit
6a11c83630
@ -106,7 +106,6 @@ u64a compress64_impl(u64a x, u64a m) {
|
|||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
m128 compress128_impl(m128 x, m128 m) {
|
m128 compress128_impl(m128 x, m128 m) {
|
||||||
|
|
||||||
m128 one = set1_2x64(1);
|
m128 one = set1_2x64(1);
|
||||||
m128 bitset = one;
|
m128 bitset = one;
|
||||||
m128 vres = zeroes128();
|
m128 vres = zeroes128();
|
||||||
@ -118,7 +117,7 @@ m128 compress128_impl(m128 x, m128 m) {
|
|||||||
m128 mask = not128(eq64_m128(tv, zeroes128()));
|
m128 mask = not128(eq64_m128(tv, zeroes128()));
|
||||||
mask = vandq_s64(bitset, mask);
|
mask = vandq_s64(bitset, mask);
|
||||||
vres = or128(vres, mask);
|
vres = or128(vres, mask);
|
||||||
m = and128(m, sub_2x64(m, set1_2x64(1)));
|
m = and128(m, sub_2x64(m, one));
|
||||||
bitset = lshift64_m128(bitset, 1);
|
bitset = lshift64_m128(bitset, 1);
|
||||||
}
|
}
|
||||||
return vres;
|
return vres;
|
||||||
@ -134,6 +133,25 @@ u64a expand64_impl(u64a x, u64a m) {
|
|||||||
return expand64_impl_c(x, m);
|
return expand64_impl_c(x, m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m128 expand128_impl(m128 x, m128 m) {
|
||||||
|
m128 one = set1_2x64(1);
|
||||||
|
m128 bitset = one;
|
||||||
|
m128 vres = zeroes128();
|
||||||
|
while (isnonzero128(m)) {
|
||||||
|
m128 tv = and128(x, m);
|
||||||
|
|
||||||
|
m128 mm = sub_2x64(zeroes128(), m);
|
||||||
|
m128 mask = not128(eq64_m128(tv, zeroes128()));
|
||||||
|
mask = vandq_s64(bitset, mask);
|
||||||
|
mask = and128(mask, mm);
|
||||||
|
vres = or128(vres, mask);
|
||||||
|
m = and128(m, sub_2x64(m, one));
|
||||||
|
bitset = lshift64_m128(bitset, 1);
|
||||||
|
}
|
||||||
|
return vres;
|
||||||
|
}
|
||||||
|
|
||||||
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
||||||
* begin returns ~0U
|
* begin returns ~0U
|
||||||
*/
|
*/
|
||||||
|
@ -301,6 +301,18 @@ u64a expand64_impl_c(u64a x, u64a m) {
|
|||||||
return x & m0; // clear out extraneous bits*/
|
return x & m0; // clear out extraneous bits*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m128 expand128_impl_c(m128 xvec, m128 mvec) {
|
||||||
|
u64a ALIGN_ATTR(16) x[2];
|
||||||
|
u64a ALIGN_ATTR(16) m[2];
|
||||||
|
store128(x, xvec);
|
||||||
|
store128(m, mvec);
|
||||||
|
|
||||||
|
expand64_impl_c(x[0], m[0]);
|
||||||
|
expand64_impl_c(x[1], m[1]);
|
||||||
|
|
||||||
|
return xvec;
|
||||||
|
}
|
||||||
|
|
||||||
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
||||||
* begin returns ~0U
|
* begin returns ~0U
|
||||||
|
@ -239,6 +239,11 @@ u64a expand64_impl(u64a x, u64a m) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m128 expand128_impl(m128 x, m128 m) {
|
||||||
|
return expand128_impl_c(x, m);
|
||||||
|
}
|
||||||
|
|
||||||
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
||||||
* begin returns ~0U
|
* begin returns ~0U
|
||||||
*/
|
*/
|
||||||
|
@ -135,6 +135,10 @@ u64a expand64(u64a x, u64a m) {
|
|||||||
return expand64_impl(x, m);
|
return expand64_impl(x, m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
m128 expand128(m128 x, m128 m) {
|
||||||
|
return expand128_impl(x, m);
|
||||||
|
}
|
||||||
|
|
||||||
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
/* returns the first set bit after begin (if not ~0U). If no bit is set after
|
||||||
* begin returns ~0U
|
* begin returns ~0U
|
||||||
|
@ -162,14 +162,16 @@ m128 loadcompressed128_64bit(const void *ptr, m128 mvec) {
|
|||||||
u64a ALIGN_ATTR(16) m[2];
|
u64a ALIGN_ATTR(16) m[2];
|
||||||
store128(m, mvec);
|
store128(m, mvec);
|
||||||
|
|
||||||
u32 bits[2] = { popcount64(m[0]), popcount64(m[1]) };
|
// Count the number of bits of compressed state we're writing out per
|
||||||
|
// chunk.
|
||||||
|
u32 ALIGN_ATTR(16) bits[2] = { popcount64(m[0]), popcount64(m[1]) };
|
||||||
|
|
||||||
u64a ALIGN_ATTR(16) v[2];
|
u64a ALIGN_ATTR(16) v[2];
|
||||||
|
|
||||||
unpack_bits_64(v, (const u8 *)ptr, bits, 2);
|
unpack_bits_64(v, (const u8 *)ptr, bits, 2);
|
||||||
|
m128 xvec = load128(v);
|
||||||
|
|
||||||
u64a x[2] = { expand64(v[0], m[0]), expand64(v[1], m[1]) };
|
// Expand vector
|
||||||
|
return expand128(xvec, mvec);
|
||||||
return set2x64(x[1], x[0]);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user