diff --git a/src/util/arch/arm/bitutils.h b/src/util/arch/arm/bitutils.h index ddca35c9..498db568 100644 --- a/src/util/arch/arm/bitutils.h +++ b/src/util/arch/arm/bitutils.h @@ -82,11 +82,7 @@ u32 findAndClearLSB_64_impl(u64a *v) { static really_inline u32 findAndClearMSB_32_impl(u32 *v) { - u32 val = *v; - u32 offset = 31 - clz32_impl(val); - *v = val & ~(1 << offset); - assert(offset < 32); - return offset; + return findAndClearMSB_32_impl_c(v); } static really_inline @@ -107,20 +103,19 @@ u64a compress64_impl(u64a x, u64a m) { static really_inline m128 compress128_impl(m128 x, m128 m) { m128 one = set1_2x64(1); - m128 bitset = one; - m128 vres = zeroes128(); + m128 bb = one; + m128 res = zeroes128(); while (isnonzero128(m)) { - m128 mm = sub_2x64(zeroes128(), m); - m128 tv = and128(x, m); - tv = and128(tv, mm); - - m128 mask = not128(eq64_m128(tv, zeroes128())); - mask = vandq_s64(bitset, mask); - vres = or128(vres, mask); - m = and128(m, sub_2x64(m, one)); - bitset = lshift64_m128(bitset, 1); + m128 mm = sub_2x64(zeroes128(), m); + m128 xm = and128(x, m); + xm = and128(xm, mm); + + m128 mask = not128(eq64_m128(xm, zeroes128())); + res = or128(res, and128(bb, mask)); + m = and128(m, sub_2x64(m, one)); + bb = lshift64_m128(bb, 1); } - return vres; + return res; } static really_inline @@ -136,20 +131,18 @@ u64a expand64_impl(u64a x, u64a m) { static really_inline m128 expand128_impl(m128 x, m128 m) { m128 one = set1_2x64(1); - m128 bitset = one; - m128 vres = zeroes128(); + m128 bb = one; + m128 res = zeroes128(); while (isnonzero128(m)) { - m128 tv = and128(x, m); - - m128 mm = sub_2x64(zeroes128(), m); - m128 mask = not128(eq64_m128(tv, zeroes128())); - mask = vandq_s64(bitset, mask); - mask = and128(mask, mm); - vres = or128(vres, mask); - m = and128(m, sub_2x64(m, one)); - bitset = lshift64_m128(bitset, 1); + m128 xm = and128(x, bb); + m128 mm = sub_2x64(zeroes128(), m); + m128 mask = not128(eq64_m128(xm, zeroes128())); + mask = and128(mask, and128(m, mm)); + res = or128(res, mask); + m = and128(m, sub_2x64(m, one)); + bb = lshift64_m128(bb, 1); } - return vres; + return res; } /* returns the first set bit after begin (if not ~0U). If no bit is set after diff --git a/unit/internal/bitutils.cpp b/unit/internal/bitutils.cpp index 3f788544..8af8f9a4 100644 --- a/unit/internal/bitutils.cpp +++ b/unit/internal/bitutils.cpp @@ -294,6 +294,39 @@ TEST(BitUtils, compress64) { } } +TEST(BitUtils, compress128) { + const m128 all_zeroes = zeroes128(); + const m128 all_ones = ones128(); + const m128 odd_bits = set1_2x64(0x5555555555555555ull); + const m128 even_bits = set1_2x64(0xaaaaaaaaaaaaaaaaull); + + EXPECT_EQ(0, diff128(all_zeroes, compress128(all_zeroes, all_zeroes))); + EXPECT_EQ(0, diff128(all_zeroes, compress128(all_zeroes, set1_4x32(1)))); + EXPECT_EQ(0, diff128(all_zeroes, compress128(all_zeroes, all_ones))); + EXPECT_EQ(0, diff128(all_ones, compress128(all_ones, all_ones))); + EXPECT_EQ(0, diff128(set1_2x64(0xffffffffull), compress128(odd_bits, odd_bits))); + EXPECT_EQ(0, diff128(set1_2x64(0xffffffffull), compress128(even_bits, even_bits))); + EXPECT_EQ(0, diff128(all_zeroes, compress128(odd_bits, even_bits))); + EXPECT_EQ(0, diff128(all_zeroes, compress128(even_bits, odd_bits))); + + // Some single-bit tests. + for (u32 i = 0; i < 64; i++) { + const m128 one_bit = set1_2x64(1ull << i); + + EXPECT_EQ(0, diff128(all_zeroes, compress128(all_zeroes, one_bit))); + EXPECT_EQ(0, diff128(set1_2x64(1ull), compress128(one_bit, one_bit))); + EXPECT_EQ(0, diff128(one_bit, compress128(one_bit, all_ones))); + + if (i % 2) { + EXPECT_EQ(0, diff128(set1_2x64(1ull << (i / 2)), compress128(one_bit, even_bits))); + EXPECT_EQ(0, diff128(all_zeroes, compress128(one_bit, odd_bits))); + } else { + EXPECT_EQ(0, diff128(set1_2x64(1ull << (i / 2)), compress128(one_bit, odd_bits))); + EXPECT_EQ(0, diff128(all_zeroes, compress128(one_bit, even_bits))); + } + } +} + TEST(BitUtils, expand32) { const u32 all_ones = 0xffffffffu; const u32 odd_bits = 0x55555555u; @@ -352,6 +385,35 @@ TEST(BitUtils, expand64) { } } +TEST(BitUtils, expand128) { + const m128 all_zeroes = zeroes128(); + const m128 all_ones = ones128(); + const m128 odd_bits = set1_2x64(0x5555555555555555ull); + const m128 even_bits = set1_2x64(0xaaaaaaaaaaaaaaaaull); + + EXPECT_EQ(0, diff128(all_zeroes, expand128(all_zeroes, all_zeroes))); + EXPECT_EQ(0, diff128(all_zeroes, expand128(all_zeroes, set1_2x64(1ull)))); + EXPECT_EQ(0, diff128(all_zeroes, expand128(all_zeroes, all_ones))); + EXPECT_EQ(0, diff128(all_ones, expand128(all_ones, all_ones))); + EXPECT_EQ(0, diff128(odd_bits, expand128(set1_2x64(0xffffffffull), odd_bits))); + EXPECT_EQ(0, diff128(even_bits, expand128(set1_2x64(0xffffffffull), even_bits))); + EXPECT_EQ(0, diff128(all_zeroes, expand128(set1_2x64(0xffffffff00000000ull), even_bits))); + EXPECT_EQ(0, diff128(all_zeroes, expand128(set1_2x64(0xffffffff00000000ull), odd_bits))); + EXPECT_EQ(0, diff128(set1_2x64(1u), expand128(set1_2x64(1u), odd_bits))); + EXPECT_EQ(0, diff128(set1_2x64(2u), expand128(set1_2x64(1u), even_bits))); + + // Some single-bit tests. + for (u32 i = 0; i < 64; i++) { + const m128 one_bit = set1_2x64(1ull << i); + + EXPECT_EQ(0, diff128(all_zeroes, expand128(all_zeroes, one_bit))); + EXPECT_EQ(0, diff128(one_bit, expand128(set1_2x64(1ull), one_bit))); + EXPECT_EQ(0, diff128(one_bit, expand128(one_bit, all_ones))); + + EXPECT_EQ(0, diff128(one_bit, expand128(set1_2x64(1ull << (i / 2)), i % 2 ? even_bits : odd_bits))); + } +} + TEST(BitUtils, bf_op_1) { u64a a = 0; for (u32 i = 0; i < 64; i++) {