diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index 413eece7..a1728e6a 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -45,10 +45,7 @@ static really_inline u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { m128 shuffled = pshufb_m128(s, permute); - //int8x16_t res = (int8x16_t) pshufb_m128(s, permute); - //printf("shufled:"); - //for(int i=15; i>=0; i--) {printf("%02x ", res[i]);} - //printf("\n"); + print_m128_16x8("shufled", shuffled); m128 compared = and128(shuffled, compare); u16 rv = ~movemask128(eq128(compared, shuffled)); return (u32)rv; diff --git a/src/util/arch/ppc64el/simd_utils.h b/src/util/arch/ppc64el/simd_utils.h index d962163e..9e8c59bf 100644 --- a/src/util/arch/ppc64el/simd_utils.h +++ b/src/util/arch/ppc64el/simd_utils.h @@ -463,14 +463,6 @@ char testbit128(m128 val, unsigned int n) { static really_inline m128 pshufb_m128(m128 a, m128 b) { return (m128) vec_perm((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b); - //return (m128) vec_perm((int8x16_t)vec_splat_s8(0), (int8x16_t)a, (uint8x16_t)b);; - //uint8x16_t btransparent = vec_and((uint8x16_t)b, (uint8x16_t)vec_splats(0x8f)); - //return (m128) vec_perm(a, a, btransparent); - //return (m128) vec_perm((int8x16_t)vec_splat_s8(0), (int8x16_t)b, (uint8x16_t)a); - - //return (m128) vec_perm((int8x16_t)a, (int8x16_t)b, (uint8x16_t)vec_splat_s8(0)); - //return (m128) vec_perm((int8x16_t)b, (int8x16_t)a, (uint8x16_t)vec_splat_s8(0)); - } static really_inline diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index 93cc4d63..dc318c82 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -603,7 +603,7 @@ template<> template<> really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) { - return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]); + return (m128) vec_perm((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]); } template<> diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 26743abe..2085c9df 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -849,15 +849,15 @@ TEST(SimdUtilsTest, pshufb_m128) { } u8 vec2[16]; for (int i=0; i<16; i++) { - vec2[i]=i; - } + vec2[i]=i + (rand() % 16 + 0); + } m128 v1 = loadu128(vec); m128 v2 = loadu128(vec2); m128 vres = pshufb_m128(v1, v2); u8 res[16]; store128(res, vres); for (int i=0; i<16; i++) { - ASSERT_EQ(vec[vec2[i]], res[i]); + ASSERT_EQ(vec[vec2[i] % 16 ], res[i]); } } diff --git a/unit/internal/supervector.cpp b/unit/internal/supervector.cpp index 342f8fd4..4be93aa8 100644 --- a/unit/internal/supervector.cpp +++ b/unit/internal/supervector.cpp @@ -280,13 +280,13 @@ TEST(SuperVectorUtilsTest,pshufb128c) { } u8 vec2[16]; for (int i=0; i<16; i++) { - vec2[i]=i; + vec2[i]=i + (rand() % 15 + 0); } auto SP1 = SuperVector<16>::loadu(vec); auto SP2 = SuperVector<16>::loadu(vec2); auto SResult = SP1.template pshufb(SP2); for (int i=0; i<16; i++) { - ASSERT_EQ(vec[vec2[i]],SResult.u.u8[i]); + ASSERT_EQ(vec[vec2[i] % 16 ],SResult.u.u8[i]); } }