add expand128() implementation for NEON

This commit is contained in:
Konstantinos Margaritis
2021-01-15 17:33:41 +02:00
committed by Konstantinos Margaritis
parent dccb0db1cb
commit 0b14b24616
5 changed files with 48 additions and 7 deletions

View File

@@ -106,7 +106,6 @@ u64a compress64_impl(u64a x, u64a m) {
static really_inline
m128 compress128_impl(m128 x, m128 m) {
m128 one = set1_2x64(1);
m128 bitset = one;
m128 vres = zeroes128();
@@ -118,7 +117,7 @@ m128 compress128_impl(m128 x, m128 m) {
m128 mask = not128(eq64_m128(tv, zeroes128()));
mask = vandq_s64(bitset, mask);
vres = or128(vres, mask);
m = and128(m, sub_2x64(m, set1_2x64(1)));
m = and128(m, sub_2x64(m, one));
bitset = lshift64_m128(bitset, 1);
}
return vres;
@@ -134,6 +133,25 @@ u64a expand64_impl(u64a x, u64a m) {
return expand64_impl_c(x, m);
}
static really_inline
m128 expand128_impl(m128 x, m128 m) {
m128 one = set1_2x64(1);
m128 bitset = one;
m128 vres = zeroes128();
while (isnonzero128(m)) {
m128 tv = and128(x, m);
m128 mm = sub_2x64(zeroes128(), m);
m128 mask = not128(eq64_m128(tv, zeroes128()));
mask = vandq_s64(bitset, mask);
mask = and128(mask, mm);
vres = or128(vres, mask);
m = and128(m, sub_2x64(m, one));
bitset = lshift64_m128(bitset, 1);
}
return vres;
}
/* returns the first set bit after begin (if not ~0U). If no bit is set after
* begin returns ~0U
*/