From 558313a2c2d35e7fc61b2aa856085ddc4eaffcee Mon Sep 17 00:00:00 2001 From: Apostolos Tapsas Date: Mon, 18 Oct 2021 12:26:38 +0000 Subject: [PATCH] SuperVector operators fixes and simd_utils low/high64 functions implementations added --- src/util/arch/common/simd_utils.h | 16 ++++++------- src/util/arch/ppc64el/simd_utils.h | 14 +++-------- src/util/supervector/arch/ppc64el/impl.cpp | 27 +++++++++++++++------- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/util/arch/common/simd_utils.h b/src/util/arch/common/simd_utils.h index 65e7b69a..5bf846f9 100644 --- a/src/util/arch/common/simd_utils.h +++ b/src/util/arch/common/simd_utils.h @@ -49,8 +49,8 @@ static inline void print_m128_16x8(const char *label, m128 vector) { uint8_t ALIGN_ATTR(16) data[16]; store128(data, vector); - DEBUG_PRINTF("%s: ", label); - for(int i=0; i < 16; i++) + DEBUG_PRINTF("%12s: ", label); + for(int i=15; i >=0; i--) printf("%02x ", data[i]); printf("\n"); } @@ -58,8 +58,8 @@ static inline void print_m128_16x8(const char *label, m128 vector) { static inline void print_m128_8x16(const char *label, m128 vector) { uint16_t ALIGN_ATTR(16) data[8]; store128(data, vector); - DEBUG_PRINTF("%s: ", label); - for(int i=0; i < 8; i++) + DEBUG_PRINTF("%12s: ", label); + for(int i=7; i >= 0; i--) printf("%04x ", data[i]); printf("\n"); } @@ -67,8 +67,8 @@ static inline void print_m128_8x16(const char *label, m128 vector) { static inline void print_m128_4x32(const char *label, m128 vector) { uint32_t ALIGN_ATTR(16) data[4]; store128(data, vector); - DEBUG_PRINTF("%s: ", label); - for(int i=0; i < 4; i++) + DEBUG_PRINTF("%12s: ", label); + for(int i=3; i >= 0; i--) printf("%08x ", data[i]); printf("\n"); } @@ -76,8 +76,8 @@ static inline void print_m128_4x32(const char *label, m128 vector) { static inline void print_m128_2x64(const char *label, m128 vector) { uint64_t ALIGN_ATTR(16) data[2]; store128(data, vector); - DEBUG_PRINTF("%s: ", label); - for(int i=0; i < 2; i++) + DEBUG_PRINTF("%12s: ", label); + for(int i=1; i >= 0; i--) printf("%016lx ", data[i]); printf("\n"); } diff --git a/src/util/arch/ppc64el/simd_utils.h b/src/util/arch/ppc64el/simd_utils.h index f4b97ffb..a54012aa 100644 --- a/src/util/arch/ppc64el/simd_utils.h +++ b/src/util/arch/ppc64el/simd_utils.h @@ -270,7 +270,7 @@ switch (imm) { } } -static really_inline u64a extract64from128(const m128 in, unsigned UNUSED imm) { +static really_inline u64a extract64from128(const m128 in, unsigned imm) { u64a ALIGN_ATTR(16) a[2]; vec_xst((uint64x2_t) in, 0, a); switch (imm) { @@ -285,19 +285,11 @@ switch (imm) { } static really_inline m128 low64from128(const m128 in) { - //u64a ALIGN_ATTR(16) a[2]; - //vec_xst((uint64x2_t) in, 0, a); - //return a[1]; - // #warning FIXME - return vec_add(in, in); + return (m128) vec_perm((int64x2_t)in, (int64x2_t)vec_splats((uint64_t)0), (uint8x16_t)vec_splat_u8(1)); } static really_inline m128 high64from128(const m128 in) { - //u64a ALIGN_ATTR(16) a[2]; - //vec_xst((uint64x2_t) in, 0, a); - //return a[0]; - // #warning FIXME - return vec_add(in, in); + return (m128) vec_perm((int64x2_t)in, (int64x2_t)vec_splats((uint64_t)0), (uint8x16_t)vec_splat_u8(0)); } diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index 8628c662..93cc4d63 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -186,29 +186,25 @@ really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const template <> really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const { - int32x4_t v = {u.s32[0] > b.u.s32[0], u.s32[1] > b.u.s32[1], u.s32[2] > b.u.s32[2], u.s32[3] > b.u.s32[3]}; - return (m128) v; + return {(m128) vec_cmpgt(u.v128[0], b.u.v128[0])}; } template <> really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const &b) const { - int32x4_t v = {u.s32[0] >= b.u.s32[0], u.s32[1] >= b.u.s32[1], u.s32[2] >= b.u.s32[2], u.s32[3] >= b.u.s32[3]}; - return (m128) v; + return {(m128) vec_cmpge(u.v128[0], b.u.v128[0])}; } template <> really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const { - int32x4_t v = {u.s32[0] < b.u.s32[0], u.s32[1] < b.u.s32[1], u.s32[2] < b.u.s32[2], u.s32[3] < b.u.s32[3]}; - return (m128) v; + return {(m128) vec_cmpgt(b.u.v128[0], u.v128[0])}; } template <> really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const &b) const { - int32x4_t v = {u.s32[0] <= b.u.s32[0], u.s32[1] <= b.u.s32[1], u.s32[2] <= b.u.s32[2], u.s32[3] <= b.u.s32[3]}; - return (m128) v; + return {(m128) vec_cmpge(b.u.v128[0], u.v128[0])}; } @@ -222,9 +218,21 @@ template <> really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask(void)const { uint8x16_t s1 = vec_sr((uint8x16_t)u.v128[0], vec_splat_u8(7)); + //printf("s1:"); + //for(int i=15; i>=0; i--) {printf("%02x, ",s1[i]);} + //printf("\n"); uint16x8_t ss = vec_sr((uint16x8_t)s1, vec_splat_u16(7)); + //printf("ss:"); + //for(int i=7; i>=0; i--) {printf("%04x, ",ss[i]);} + //printf("\n"); uint16x8_t res_and = vec_and((uint16x8_t)s1, vec_splats((uint16_t)0xff)); + //printf("res_and:"); + //for(int i=7; i>=0; i--) {printf("%04x, ",res_and[i]);} + //printf("\n"); uint16x8_t s2 = vec_or((uint16x8_t)ss, res_and); + //printf("s2:"); + //for(int i=7; i>=0; i--) {printf("%04x, ",s2[i]);} + //printf("\n"); uint32x4_t ss2 = vec_sr((uint32x4_t)s2 , vec_splat_u32(14)); uint32x4_t res_and2 = vec_and((uint32x4_t)s2, vec_splats((uint32_t)0xff)); @@ -238,6 +246,9 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask( uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff)); uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4); + //printf("s5:"); + //for(int i=1; i>=0; i--) {printf("%016llx, ",s5[i]);} + //printf("\n"); return s5[0]; }