From 1eb3b19f63f05bad1cb5776bb5ca39b8f192bc23 Mon Sep 17 00:00:00 2001 From: Apostolos Tapsas Date: Sun, 24 Oct 2021 16:52:12 +0000 Subject: [PATCH] Shuffle simd and SuperVector implementetions as well as their test realy fixed --- src/nfa/limex_shuffle.h | 1 - src/util/arch/ppc64el/simd_utils.h | 4 +++- src/util/supervector/arch/ppc64el/impl.cpp | 4 +++- unit/internal/shuffle.cpp | 6 +++--- unit/internal/simd_utils.cpp | 17 ++++++++++++----- unit/internal/supervector.cpp | 6 +++++- 6 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/nfa/limex_shuffle.h b/src/nfa/limex_shuffle.h index a1728e6a..365d4729 100644 --- a/src/nfa/limex_shuffle.h +++ b/src/nfa/limex_shuffle.h @@ -45,7 +45,6 @@ static really_inline u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { m128 shuffled = pshufb_m128(s, permute); - print_m128_16x8("shufled", shuffled); m128 compared = and128(shuffled, compare); u16 rv = ~movemask128(eq128(compared, shuffled)); return (u32)rv; diff --git a/src/util/arch/ppc64el/simd_utils.h b/src/util/arch/ppc64el/simd_utils.h index 9e8c59bf..107ca110 100644 --- a/src/util/arch/ppc64el/simd_utils.h +++ b/src/util/arch/ppc64el/simd_utils.h @@ -462,7 +462,9 @@ char testbit128(m128 val, unsigned int n) { static really_inline m128 pshufb_m128(m128 a, m128 b) { - return (m128) vec_perm((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b); + uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b, (uint8x16_t)vec_splats((uint8_t)0x80)); + uint8x16_t res = vec_perm ((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b); + return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)zeroes128(), (uint8x16_t)mask); } static really_inline diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index dc318c82..0af136a5 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -603,7 +603,9 @@ template<> template<> really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) { - return (m128) vec_perm((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]); + uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b.u.v128[0], (uint8x16_t)vec_splats((uint8_t)0x80)); + uint8x16_t res = vec_perm ((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]); + return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)vec_splat_s8(0), (uint8x16_t)mask); } template<> diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index b7c1b4f5..038c6193 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -187,7 +187,7 @@ TEST(Shuffle, PackedExtract128_1) { // shuffle a single 1 bit to the front m128 permute, compare; build_pshufb_masks_onebit(i, &permute, &compare); - EXPECT_EQ(1U, packedExtract128(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract128(setbit(i), permute, compare)); EXPECT_EQ(1U, packedExtract128(ones128(), permute, compare)); // we should get zero out of these cases EXPECT_EQ(0U, packedExtract128(zeroes128(), permute, compare)); @@ -199,7 +199,7 @@ TEST(Shuffle, PackedExtract128_1) { } } -/* + TEST(Shuffle, PackedExtract_templatized_128_1) { // Try all possible one-bit masks for (unsigned int i = 0; i < 128; i++) { @@ -218,7 +218,7 @@ TEST(Shuffle, PackedExtract_templatized_128_1) { } } } -*/ + #if defined(HAVE_AVX2) diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp index 2085c9df..037230d0 100644 --- a/unit/internal/simd_utils.cpp +++ b/unit/internal/simd_utils.cpp @@ -849,15 +849,22 @@ TEST(SimdUtilsTest, pshufb_m128) { } u8 vec2[16]; for (int i=0; i<16; i++) { - vec2[i]=i + (rand() % 16 + 0); - } + vec2[i]=i + (rand() % 15 + 0); + } + m128 v1 = loadu128(vec); m128 v2 = loadu128(vec2); - m128 vres = pshufb_m128(v1, v2); + m128 vres = pshufb_m128(v1, v2); + u8 res[16]; - store128(res, vres); + storeu128(res, vres); + for (int i=0; i<16; i++) { - ASSERT_EQ(vec[vec2[i] % 16 ], res[i]); + if(vec2[i] & 0x80){ + ASSERT_EQ(res[i], 0); + }else{ + ASSERT_EQ(vec[vec2[i] % 16 ], res[i]); + } } } diff --git a/unit/internal/supervector.cpp b/unit/internal/supervector.cpp index 4be93aa8..9c5f8f3a 100644 --- a/unit/internal/supervector.cpp +++ b/unit/internal/supervector.cpp @@ -286,7 +286,11 @@ TEST(SuperVectorUtilsTest,pshufb128c) { auto SP2 = SuperVector<16>::loadu(vec2); auto SResult = SP1.template pshufb(SP2); for (int i=0; i<16; i++) { - ASSERT_EQ(vec[vec2[i] % 16 ],SResult.u.u8[i]); + if(vec2[i] & 0x80){ + ASSERT_EQ(SResult.u.u8[i], 0); + }else{ + ASSERT_EQ(vec[vec2[i] % 16 ],SResult.u.u8[i]); + } } }