Special case for Shuffle test added as well as comments for respectives implementations

2026-01-05 07:55:27 +03:00 · 2021-10-26 11:48:33 +03:00
parent 1eb3b19f63
commit bf54aae779
3 changed files with 47 additions and 4 deletions
--- a/src/util/arch/ppc64el/simd_utils.h
+++ b/src/util/arch/ppc64el/simd_utils.h
@@ -462,6 +462,9 @@ char testbit128(m128 val, unsigned int n) {

 static really_inline
 m128 pshufb_m128(m128 a, m128 b) {
+    /* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
+       In NEON or PPC, if >=16, then the result is zero, otherwise it is that lane.
+       below is the version that is converted from Intel to PPC.  */
    uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b, (uint8x16_t)vec_splats((uint8_t)0x80));
    uint8x16_t res = vec_perm ((uint8x16_t)a, (uint8x16_t)a, (uint8x16_t)b);
    return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)zeroes128(), (uint8x16_t)mask);
--- a/src/util/supervector/arch/ppc64el/impl.cpp
+++ b/src/util/supervector/arch/ppc64el/impl.cpp
@@ -603,6 +603,9 @@ template<>
 template<>
 really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
 {
+    /* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf.
+       In NEON or PPC, if >=16, then the result is zero, otherwise it is that lane.
+       below is the version that is converted from Intel to PPC.  */
    uint8x16_t mask =(uint8x16_t)vec_cmpge((uint8x16_t)b.u.v128[0], (uint8x16_t)vec_splats((uint8_t)0x80));
    uint8x16_t res = vec_perm ((uint8x16_t)u.v128[0], (uint8x16_t)u.v128[0], (uint8x16_t)b.u.v128[0]);
    return (m128) vec_sel((uint8x16_t)res, (uint8x16_t)vec_splat_s8(0), (uint8x16_t)mask);