diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index b2aa9a0a..413eece7 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -45,10 +45,10 @@ static really_inline u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { m128 shuffled = pshufb_m128(s, permute); - int8x16_t res = (int8x16_t) pshufb_m128(s, permute); - printf("shufled:"); - for(int i=15; i>=0; i--) {printf("%02x ", res[i]);} - printf("\n"); + //int8x16_t res = (int8x16_t) pshufb_m128(s, permute); + //printf("shufled:"); + //for(int i=15; i>=0; i--) {printf("%02x ", res[i]);} + //printf("\n"); m128 compared = and128(shuffled, compare); u16 rv = ~movemask128(eq128(compared, shuffled)); return (u32)rv; diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index d66db7e2..26743abe 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -819,4 +819,47 @@ TEST(SimdUtilsTest, sub_u8_m128) { EXPECT_TRUE(!diff128(result, loadu128(expec))); } +TEST(SimdUtilsTest, movemask_128) { + srand (time(NULL)); + u8 vec[16] = {0}; + u8 vec2[16] = {0}; + u16 r = rand() % 100 + 1; + for(int i=0; i<16; i++) { + if (r & (1 << i)) { + vec[i] = 0xff; + } + } + m128 v = loadu128(vec); + u16 mask = movemask128(v); + for(int i=0; i<16; i++) { + if (mask & (1 << i)) { + vec2[i] = 0xff; + } + } + for (int i=0; i<16; i++) { + ASSERT_EQ(vec[i],vec2[i]); + } +} + +TEST(SimdUtilsTest, pshufb_m128) { + srand (time(NULL)); + u8 vec[16]; + for (int i=0; i<16; i++) { + vec[i] = rand() % 100 + 1; + } + u8 vec2[16]; + for (int i=0; i<16; i++) { + vec2[i]=i; + } + m128 v1 = loadu128(vec); + m128 v2 = loadu128(vec2); + m128 vres = pshufb_m128(v1, v2); + u8 res[16]; + store128(res, vres); + for (int i=0; i<16; i++) { + ASSERT_EQ(vec[vec2[i]], res[i]); + } +} + + } // namespace