diff --git a/src/util/arch/arm/bitutils.h b/src/util/arch/arm/bitutils.h index 0b579dc9..1d1e0167 100644 --- a/src/util/arch/arm/bitutils.h +++ b/src/util/arch/arm/bitutils.h @@ -107,102 +107,20 @@ u64a compress64_impl(u64a x, u64a m) { static really_inline m128 compress128_impl(m128 x, m128 m) { -/* x = and128(x, m); // clear irrelevant bits - - // Return zero quickly on trivial cases - if (diff128(x, zeroes128()) == 0) { - return zeroes128(); - }*/ - - - u64a ALIGN_ATTR(16) xv[2]; - u64a ALIGN_ATTR(16) mv[2]; - u64a ALIGN_ATTR(16) res[2]; - u64a ALIGN_ATTR(16) t[2]; - u64a ALIGN_ATTR(16) bbv[2]; - store128(xv, x); - store128(mv, m); - res[0] = 0; - res[1] = 0; - printf("x[%d] = %0llx\n", 0, xv[0]); - printf("x[%d] = %0llx\n", 1, xv[1]); - m128 one = set1_2x64(1); m128 bitset = one; m128 vres = zeroes128(); - for (u64a bb = 1; mv[0] | mv[1]; bb <<= 1) { - printf("bb = %lld\n", bb); - store128(bbv, bitset); - printf("bb[%d] = %0lld\n", 0, bbv[0]); - printf("bb[%d] = %0lld\n", 1, bbv[1]); - printf("m[%d] = %0llx\n", 0, mv[0]); - printf("m[%d] = %0llx\n", 1, mv[1]); - printf("scalar: -m[%d] = %0llx\n", 0, -mv[0]); - printf("scalar: -m[%d] = %0llx\n", 1, -mv[1]); + while (isnonzero128(m)) { m128 mm = sub_2x64(zeroes128(), m); - store128(t, mm); - printf("vector: -m[0] = %0llx\n", t[0]); - printf("vector: -m[1] = %0llx\n", t[1]); m128 tv = and128(x, m); - store128(t, tv); - printf("vector: x[0] & m[0] = %0llx\n", t[0]); - printf("vector: x[1] & m[1] = %0llx\n", t[1]); tv = and128(tv, mm); - store128(t, tv); - printf("vector: x[0] & m[0] & -m[0] = %0llx\n", t[0]); - printf("vector: x[1] & m[1] & -m[1] = %0llx\n", t[1]); - t[0] = xv[0] & mv[0]; - t[1] = xv[1] & mv[1]; - printf("scalar: x[0] & m[0] = %0llx\n", t[0]); - printf("scalar: x[1] & m[1] = %0llx\n", t[1]); - t[0] = xv[0] & mv[0] & -mv[0]; - t[1] = xv[1] & mv[1] & -mv[1]; - printf("scalar: x[0] & m[0] & -m[0] = %0llx\n", t[0]); - printf("scalar: x[1] & m[1] & -m[1] = %0llx\n", t[1]); - - if ( t[0] ) { - printf("x & m & -m != 0\n"); - res[0] |= bb; - printf("x[%d] = %0llx\n", 0, xv[0]); - } - if ( t[1] ) { - printf("x & m & -m != 0\n"); - res[1] |= bb; - printf("x[%d] = %0llx\n", 1, xv[1]); - } m128 mask = not128(eq64_m128(tv, zeroes128())); - store128(t, mask); - printf("mask: x[0] & m[0] & -m[0] != 0 : %0llx\n", t[0]); - printf("mask: x[1] & m[1] & -m[1] != 0 : %0llx\n", t[1]); - mask = vandq_s64(bitset, mask); - store128(t, mask); - printf("mask: mask[0] & bitset[1] != 0 : %0llx\n", t[0]); - printf("mask: mask[1] & bitset[1] != 0 : %0llx\n", t[1]); - vres = or128(vres, mask); - store128(t, vres); - printf("res: res[0] != 0 : %0llx\n", t[0]); - printf("res: res[1] != 0 : %0llx\n", t[1]); - if (t[0] != res[0]) { - printf("mismatch: t[0] != res[0]: %0llx != %0llx\n", t[0], res[0]); - } - if (t[1] != res[1]) { - printf("mismatch: t[1] != res[1]: %0llx != %0llx\n", t[1], res[1]); - } - - mv[0] &= mv[0] - 1; - mv[1] &= mv[1] - 1; m = and128(m, sub_2x64(m, set1_2x64(1))); - printf("x[%d] = %0llx\n", 0, xv[0]); - printf("x[%d] = %0llx\n", 1, xv[1]); bitset = lshift64_m128(bitset, 1); } - store128(res, vres); - printf("final x[%d] = %0llx\n", 0, res[0]); - printf("final x[%d] = %0llx\n", 1, res[1]); -// x = load128(res); return vres; } diff --git a/src/util/arch/x86/bitutils.h b/src/util/arch/x86/bitutils.h index a0769a5e..424ad957 100644 --- a/src/util/arch/x86/bitutils.h +++ b/src/util/arch/x86/bitutils.h @@ -239,7 +239,6 @@ u64a expand64_impl(u64a x, u64a m) { #endif } - /* returns the first set bit after begin (if not ~0U). If no bit is set after * begin returns ~0U */ diff --git a/src/util/state_compress.c b/src/util/state_compress.c index 586e47f4..360ec39e 100644 --- a/src/util/state_compress.c +++ b/src/util/state_compress.c @@ -109,10 +109,6 @@ static really_inline void storecompressed128_64bit(void *ptr, m128 xvec, m128 mvec) { printf("storecompressed128_64bit()\n"); // First, decompose our vectors into 64-bit chunks. -/* u64a x[2]; - memcpy(x, &xvec, sizeof(xvec)); - u64a m[2]; - memcpy(m, &mvec, sizeof(mvec));*/ u64a ALIGN_ATTR(16) x[2]; u64a ALIGN_ATTR(16) m[2]; store128(m, mvec); @@ -121,10 +117,8 @@ void storecompressed128_64bit(void *ptr, m128 xvec, m128 mvec) { // Count the number of bits of compressed state we're writing out per // chunk. u32 ALIGN_ATTR(16) bits[2] = { popcount64(m[0]), popcount64(m[1]) }; - //m128 vbits = load128(bits); // Compress each 64-bit chunk individually. - //u64a v[2] = { compress64(x[0], m[0]), compress64(x[1], m[1]) }; xvec = compress128(xvec, mvec); store128(x, xvec); @@ -169,29 +163,13 @@ m128 loadcompressed128_64bit(const void *ptr, m128 mvec) { // First, decompose our vectors into 64-bit chunks. u64a ALIGN_ATTR(16) m[2]; store128(m, mvec); - printf("m[0] = %0llx\n", m[0]); - printf("m[1] = %0llx\n", m[1]); - -// m[0] = movq(mvec); -// m[1] = movq(rshiftbyte_m128(mvec, 8)); - //store128(m, mvec); -// printf("m[0] = %0llx\n", m[0]); -// printf("m[1] = %0llx\n", m[1]); u32 bits[2] = { popcount64(m[0]), popcount64(m[1]) }; u64a ALIGN_ATTR(16) v[2]; - printf("bits[0] = %0x\n", bits[0]); - printf("bits[1] = %0x\n", bits[1]); - unpack_bits_64(v, (const u8 *)ptr, bits, 2); - printf("v[0] = %0llx\n", v[0]); - printf("v[1] = %0llx\n", v[1]); u64a x[2] = { expand64(v[0], m[0]), expand64(v[1], m[1]) }; - printf("x[0] = %0llx\n", x[0]); - printf("x[1] = %0llx\n", x[1]); - return set2x64(x[1], x[0]); }