SuperVector shuffle implementation and test function optimized

2026-01-17 16:00:26 +03:00 · 2021-10-22 11:55:39 +00:00
parent 57301721f1
commit d43d6733b6
5 changed files with 7 additions and 18 deletions
--- a/src/nfa/limex_shuffle.h
+++ b/src/nfa/limex_shuffle.h
@@ -45,10 +45,7 @@
 static really_inline
 u32 packedExtract128(m128 s, const m128 permute, const m128 compare) {
    m128 shuffled = pshufb_m128(s, permute);
-    //int8x16_t res = (int8x16_t) pshufb_m128(s, permute);
+    print_m128_16x8("shufled", shuffled);
    //printf("shufled:");
    //for(int i=15; i>=0; i--) {printf("%02x ", res[i]);}
    //printf("\n");
    m128 compared = and128(shuffled, compare);
    u16 rv = ~movemask128(eq128(compared, shuffled));
    return (u32)rv;
--- a/src/util/arch/ppc64el/simd_utils.h
+++ b/src/util/arch/ppc64el/simd_utils.h
@@ -463,14 +463,6 @@ char testbit128(m128 val, unsigned int n) {
 static really_inline
 m128 pshufb_m128(m128 a, m128 b) {
    return (m128) vec_perm((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b);
    //return (m128) vec_perm((int8x16_t)vec_splat_s8(0), (int8x16_t)a, (uint8x16_t)b);;
    //uint8x16_t btransparent = vec_and((uint8x16_t)b, (uint8x16_t)vec_splats(0x8f));
    //return (m128) vec_perm(a, a, btransparent);
    //return (m128) vec_perm((int8x16_t)vec_splat_s8(0), (int8x16_t)b, (uint8x16_t)a);
    //return (m128) vec_perm((int8x16_t)a, (int8x16_t)b, (uint8x16_t)vec_splat_s8(0));
    //return (m128) vec_perm((int8x16_t)b, (int8x16_t)a, (uint8x16_t)vec_splat_s8(0));
 }
 static really_inline
--- a/src/util/supervector/arch/ppc64el/impl.cpp
+++ b/src/util/supervector/arch/ppc64el/impl.cpp
@@ -603,7 +603,7 @@ template<>
 template<>
 really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
 {
-    return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]);
+    return (m128) vec_perm((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]);
 }
 template<>
--- a/unit/internal/simd_utils.cpp
+++ b/unit/internal/simd_utils.cpp
@@ -849,7 +849,7 @@ TEST(SimdUtilsTest, pshufb_m128) {
    }
    u8 vec2[16];
    for (int i=0; i<16; i++) {
-        vec2[i]=i;
+        vec2[i]=i + (rand() % 16 + 0);
    } 
    m128 v1 = loadu128(vec);
    m128 v2 = loadu128(vec2);
@@ -857,7 +857,7 @@ TEST(SimdUtilsTest, pshufb_m128) {
    u8 res[16];
    store128(res, vres);
    for (int i=0; i<16; i++) {
-        ASSERT_EQ(vec[vec2[i]], res[i]);
+        ASSERT_EQ(vec[vec2[i] % 16 ], res[i]);
    }
 }
--- a/unit/internal/supervector.cpp
+++ b/unit/internal/supervector.cpp
@@ -280,13 +280,13 @@ TEST(SuperVectorUtilsTest,pshufb128c) {
    }
    u8 vec2[16];
    for (int i=0; i<16; i++) {
-        vec2[i]=i;
+        vec2[i]=i + (rand() % 15 + 0);
    }
    auto SP1 = SuperVector<16>::loadu(vec);
    auto SP2 = SuperVector<16>::loadu(vec2);
    auto SResult = SP1.template pshufb<true>(SP2);
    for (int i=0; i<16; i++) {
-        ASSERT_EQ(vec[vec2[i]],SResult.u.u8[i]);
+        ASSERT_EQ(vec[vec2[i] % 16 ],SResult.u.u8[i]);
    }
 }