existing scalar implementations were incorrect -but never tested, ported from arm/ppc64le

This commit is contained in:
Konstantnos Margaritis 2023-11-23 16:09:10 +00:00
parent 20f4f542a5
commit dfacf75855

View File

@ -214,16 +214,22 @@ u64a compress64_impl_c(u64a x, u64a m) {
} }
static really_inline static really_inline
m128 compress128_impl_c(m128 xvec, m128 mvec) { m128 compress128_impl_c(m128 x, m128 m) {
u64a ALIGN_ATTR(16) x[2]; m128 one = set1_2x64(1);
u64a ALIGN_ATTR(16) m[2]; m128 bitset = one;
store128(x, xvec); m128 vres = zeroes128();
store128(m, mvec); while (isnonzero128(m)) {
m128 mm = sub_2x64(zeroes128(), m);
m128 tv = and128(x, m);
tv = and128(tv, mm);
compress64_impl_c(x[0], m[0]); m128 mask = not128(eq64_m128(tv, zeroes128()));
compress64_impl_c(x[1], m[1]); mask = and128(bitset, mask);
vres = or128(vres, mask);
return xvec; m = and128(m, sub_2x64(m, one));
bitset = lshift64_m128(bitset, 1);
}
return vres;
} }
static really_inline static really_inline
@ -303,16 +309,20 @@ u64a expand64_impl_c(u64a x, u64a m) {
} }
static really_inline static really_inline
m128 expand128_impl_c(m128 xvec, m128 mvec) { m128 expand128_impl_c(m128 x, m128 m) {
u64a ALIGN_ATTR(16) x[2]; m128 one = set1_2x64(1);
u64a ALIGN_ATTR(16) m[2]; m128 bb = one;
store128(x, xvec); m128 res = zeroes128();
store128(m, mvec); while (isnonzero128(m)) {
m128 xm = and128(x, bb);
expand64_impl_c(x[0], m[0]); m128 mm = sub_2x64(zeroes128(), m);
expand64_impl_c(x[1], m[1]); m128 mask = not128(eq64_m128(xm, zeroes128()));
mask = and128(mask, and128(m,mm));
return xvec; res = or128(res, mask);
m = and128(m, sub_2x64(m, one));
bb = lshift64_m128(bb, 1);
}
return res;
} }
/* returns the first set bit after begin (if not ~0U). If no bit is set after /* returns the first set bit after begin (if not ~0U). If no bit is set after