diff --git a/src/util/simd/arch/arm/impl.cpp b/src/util/simd/arch/arm/impl.cpp index 75796a4b..fb2138d1 100644 --- a/src/util/simd/arch/arm/impl.cpp +++ b/src/util/simd/arch/arm/impl.cpp @@ -148,7 +148,7 @@ really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const b template <> really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const b) const { - return {vandq_s8(u.v128[0], b.u.v128[0])}; + return {vorrq_s8(u.v128[0], b.u.v128[0])}; } template <> @@ -193,31 +193,31 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::eqmask(Su #ifndef HS_OPTIMIZE template <> -really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const { - return {vshlq_n_s32(u.v128[0], N)}; + return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), N)}; } #else template <> -really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const { switch(N) { case 0: return *this; break; - case 1: return {vshlq_n_s32((int16x8_t) u.v128[0], 1)}; break; - case 2: return {vshlq_n_s32((int16x8_t) u.v128[0], 2)}; break; - case 3: return {vshlq_n_s32((int16x8_t) u.v128[0], 3)}; break; - case 4: return {vshlq_n_s32((int16x8_t) u.v128[0], 4)}; break; - case 5: return {vshlq_n_s32((int16x8_t) u.v128[0], 5)}; break; - case 6: return {vshlq_n_s32((int16x8_t) u.v128[0], 6)}; break; - case 7: return {vshlq_n_s32((int16x8_t) u.v128[0], 7)}; break; - case 8: return {vshlq_n_s32((int16x8_t) u.v128[0], 8)}; break; - case 9: return {vshlq_n_s32((int16x8_t) u.v128[0], 9)}; break; - case 10: return {vshlq_n_s32((int16x8_t) u.v128[0], 10)}; break; - case 11: return {vshlq_n_s32((int16x8_t) u.v128[0], 11)}; break; - case 12: return {vshlq_n_s32((int16x8_t) u.v128[0], 12)}; break; - case 13: return {vshlq_n_s32((int16x8_t) u.v128[0], 13)}; break; - case 14: return {vshlq_n_s32((int16x8_t) u.v128[0], 14)}; break; - case 15: return {vshlq_n_s32((int16x8_t) u.v128[0], 15)}; break; + case 1: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 1)}; break; + case 2: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 2)}; break; + case 3: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 3)}; break; + case 4: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 4)}; break; + case 5: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 5)}; break; + case 6: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 6)}; break; + case 7: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 7)}; break; + case 8: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 8)}; break; + case 9: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 9)}; break; + case 10: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 10)}; break; + case 11: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 11)}; break; + case 12: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 12)}; break; + case 13: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 13)}; break; + case 14: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 14)}; break; + case 15: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 15)}; break; case 16: return Zeroes(); break; default: break; } @@ -225,33 +225,34 @@ really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const } #endif -#ifdef HS_OPTIMIZE +#ifndef HS_OPTIMIZE template <> -really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const { - return {vshrq_n_s32(u.v128[0], N)}; + return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 16 - N)}; } #else template <> -really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const { switch(N) { - case 0: return {vshrq_n_s32(u.v128[0], 0)}; break; - case 1: return {vshrq_n_s32(u.v128[0], 1)}; break; - case 2: return {vshrq_n_s32(u.v128[0], 2)}; break; - case 3: return {vshrq_n_s32(u.v128[0], 3)}; break; - case 4: return {vshrq_n_s32(u.v128[0], 4)}; break; - case 5: return {vshrq_n_s32(u.v128[0], 5)}; break; - case 6: return {vshrq_n_s32(u.v128[0], 6)}; break; - case 7: return {vshrq_n_s32(u.v128[0], 7)}; break; - case 8: return {vshrq_n_s32(u.v128[0], 8)}; break; - case 9: return {vshrq_n_s32(u.v128[0], 9)}; break; - case 10: return {vshrq_n_s32(u.v128[0], 10)}; break; - case 11: return {vshrq_n_s32(u.v128[0], 11)}; break; - case 12: return {vshrq_n_s32(u.v128[0], 12)}; break; - case 13: return {vshrq_n_s32(u.v128[0], 13)}; break; - case 14: return {vshrq_n_s32(u.v128[0], 14)}; break; - case 15: return {vshrq_n_s32(u.v128[0], 15)}; break; + case 0: return *this; break; + case 1: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 15)}; break; + case 2: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 14)}; break; + case 3: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 13)}; break; + case 4: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 12)}; break; + case 5: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 11)}; break; + case 6: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 10)}; break; + case 7: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 9)}; break; + case 8: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 8)}; break; + case 9: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 7)}; break; + case 10: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 6)}; break; + case 11: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 5)}; break; + case 12: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 4)}; break; + case 13: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 3)}; break; + case 14: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 2)}; break; + case 15: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 1)}; break; + case 16: return Zeroes(); break; default: break; } return *this; @@ -286,30 +287,30 @@ really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint template<> really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> r, int8_t offset) { - return {vextq_s8((int16x8_t)u.v128[0], (int16x8_t)r.u.v128[0], offset)}; + return {vextq_s8((int16x8_t)r.u.v128[0], (int16x8_t)u.v128[0], 16 - offset)}; } #else template<> -really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> l, int8_t offset) +really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> r, int8_t offset) { switch(offset) { case 0: return *this; break; - case 1: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 1)}; break; - case 2: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 2)}; break; - case 3: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 3)}; break; - case 4: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 4)}; break; - case 5: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 5)}; break; - case 6: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 6)}; break; - case 7: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 7)}; break; - case 8: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 8)}; break; - case 9: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 9)}; break; - case 10: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 10)}; break; - case 11: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 11)}; break; - case 12: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 12)}; break; - case 13: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 13)}; break; - case 14: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 14)}; break; - case 15: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 15)}; break; - case 16: return l; break; + case 1: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 15)}; break; + case 2: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 14)}; break; + case 3: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 13)}; break; + case 4: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 12)}; break; + case 5: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 11)}; break; + case 6: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 10)}; break; + case 7: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 9)}; break; + case 8: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 8)}; break; + case 9: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 7)}; break; + case 10: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 6)}; break; + case 11: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 5)}; break; + case 12: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 4)}; break; + case 13: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 3)}; break; + case 14: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 2)}; break; + case 15: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 1)}; break; + case 16: return r; break; default: break; } return *this; diff --git a/src/util/simd/types.hpp b/src/util/simd/types.hpp index 4c948888..5bfd55ec 100644 --- a/src/util/simd/types.hpp +++ b/src/util/simd/types.hpp @@ -31,6 +31,7 @@ #define SIMD_TYPES_HPP #include +#include #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/simd/arch/x86/types.hpp" @@ -213,5 +214,38 @@ public: #endif +template +static void printv_u8(const char *label, SuperVector &v) { + printf("%s: ", label); + for(int i=0; i < S; i++) + printf("%02x ", v.u.u8[i]); + printf("\n"); +} + +template +static void printv_u16(const char *label, SuperVector &v) { + printf("%s: ", label); + for(int i=0; i < S/sizeof(u16); i++) + printf("%04x ", v.u.u16[i]); + printf("\n"); +} + +template +static void printv_u32(const char *label, SuperVector &v) { + printf("%s: ", label); + for(int i=0; i < S/sizeof(u32); i++) + printf("%08x ", v.u.u32[i]); + printf("\n"); +} + +template +static inline void printv_u64(const char *label, SuperVector &v) { + printf("%s: ", label); + for(int i=0; i < S/sizeof(u64a); i++) + printf("%016lx ", v.u.u64[i]); + printf("\n"); +} + + #endif /* SIMD_TYPES_H */ diff --git a/unit/internal/supervector.cpp b/unit/internal/supervector.cpp index 12d9fae0..c6caae6e 100644 --- a/unit/internal/supervector.cpp +++ b/unit/internal/supervector.cpp @@ -38,232 +38,266 @@ TEST(SuperVectorUtilsTest, Zero128c) { - m128_t zeroes = SuperVector<16>::Zeroes(); - char buf[16]{0}; - for(int i=0; i<16; i++){ASSERT_EQ(zeroes.u.s8[i],buf[i]);} + auto zeroes = SuperVector<16>::Zeroes(); + u8 buf[16]{0}; + for(int i=0; i<16; i++) { + ASSERT_EQ(zeroes.u.u8[i],buf[i]); + } } - TEST(SuperVectorUtilsTest, Ones128c) { - m128_t ones = SuperVector<16>::Ones(); - char buf[16]; - for (int i=0; i<16; i++){buf[i]=0xff;} - for(int i=0; i<16; i++){ASSERT_EQ(ones.u.s8[i],buf[i]);} + auto ones = SuperVector<16>::Ones(); + u8 buf[16]; + for (int i=0; i<16; i++) { buf[i]=0xff; } + for(int i=0; i<16; i++) { + ASSERT_EQ(ones.u.u8[i],buf[i]); + } } - TEST(SuperVectorUtilsTest, Loadu128c) { - char vec[32]; - for(int i=0; i<32;i++){vec[i]=i;} - for(int i=0; i<=16;i++){ - m128_t SP = SuperVector<16>::loadu(vec+i); - for(int j=0; j<16; j++){ - ASSERT_EQ(SP.u.s8[j],vec[j+i]); + u8 vec[32]; + for(int i=0; i<32;i++) { vec[i]=i; } + for(int i=0; i<=16;i++) { + auto SP = SuperVector<16>::loadu(vec+i); + for(int j=0; j<16; j++) { + ASSERT_EQ(SP.u.u8[j],vec[j+i]); } } } TEST(SuperVectorUtilsTest, Load128c) { - char vec[128] __attribute__((aligned(16))); - for(int i=0; i<128;i++){vec[i]=i;} - for(int i=0;i<=16;i+=16){ - m128_t SP = SuperVector<16>::loadu(vec+i); + u8 ALIGN_ATTR(16) vec[32]; + for(int i=0; i<32;i++) { vec[i]=i; } + for(int i=0;i<=16;i+=16) { + auto SP = SuperVector<16>::loadu(vec+i); for(int j=0; j<16; j++){ - ASSERT_EQ(SP.u.s8[j],vec[j+i]); + ASSERT_EQ(SP.u.u8[j],vec[j+i]); } } } TEST(SuperVectorUtilsTest,Equal128c){ - char vec[32]; + u8 vec[32]; for (int i=0; i<32; i++) {vec[i]=i;}; - m128_t SP1 = SuperVector<16>::loadu(vec); - m128_t SP2 = SuperVector<16>::loadu(vec+16); - char buf[16]={0}; + auto SP1 = SuperVector<16>::loadu(vec); + auto SP2 = SuperVector<16>::loadu(vec+16); + u8 buf[16]={0}; /*check for equality byte by byte*/ for (int s=0; s<16; s++){ if(vec[s]==vec[s+16]){ buf[s]=1; } } - m128_t SPResult = SP1.eq(SP2); - for (int i=0; i<16; i++){ASSERT_EQ(SPResult.u.s8[i],buf[i]);} + auto SPResult = SP1.eq(SP2); + for (int i=0; i<16; i++) { + ASSERT_EQ(SPResult.u.s8[i],buf[i]); + } } TEST(SuperVectorUtilsTest,And128c){ - m128_t SPResult = SuperVector<16>::Zeroes() & SuperVector<16>::Ones(); - for (int i=0; i<16; i++){ASSERT_EQ(SPResult.u.s8[i],0);} + auto SPResult = SuperVector<16>::Zeroes() & SuperVector<16>::Ones(); + for (int i=0; i<16; i++) { + ASSERT_EQ(SPResult.u.u8[i],0); + } } TEST(SuperVectorUtilsTest,OPAnd128c){ - m128_t SP1 = SuperVector<16>::Zeroes(); - m128_t SP2 = SuperVector<16>::Ones(); + auto SP1 = SuperVector<16>::Zeroes(); + auto SP2 = SuperVector<16>::Ones(); SP2 = SP2.opand(SP1); - for (int i=0; i<16; i++){ASSERT_EQ(SP2.u.s8[i],0);} + for (int i=0; i<16; i++) { + ASSERT_EQ(SP2.u.u8[i],0); + } } - TEST(SuperVectorUtilsTest,OR128c){ - m128_t SPResult = SuperVector<16>::Zeroes() | SuperVector<16>::Ones(); - for (int i=0; i<16; i++){ASSERT_EQ(SPResult.u.s8[i],-1);} + auto SPResult = SuperVector<16>::Zeroes() | SuperVector<16>::Ones(); + for (int i=0; i<16; i++) { + ASSERT_EQ(SPResult.u.u8[i],0xff); + } } TEST(SuperVectorUtilsTest,OPANDNOT128c){ - m128_t SP1 = SuperVector<16>::Zeroes(); - m128_t SP2 = SuperVector<16>::Ones(); + auto SP1 = SuperVector<16>::Zeroes(); + auto SP2 = SuperVector<16>::Ones(); SP2 = SP2.opandnot(SP1); - for (int i=0; i<16; i++){ASSERT_EQ(SP2.u.s8[i],0);} + for (int i=0; i<16; i++) { + ASSERT_EQ(SP2.u.s8[i],0); + } } TEST(SuperVectorUtilsTest,Movemask128c){ - uint8_t vec[16] = {0,0xff,0xff,3,4,5,6,7,8,9,0xff,11,12,13,14,0xff}; + u8 vec[16] = { 0, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0, 0, 0, 0xff }; /*according to the array above the movemask outcome must be the following: 10000100000000110 or 0x8406*/ - m128_t SP = SuperVector<16>::loadu(vec); - int SP_Mask = SP.movemask(); - ASSERT_EQ(SP_Mask,0x8406); + auto SP = SuperVector<16>::loadu(vec); + int mask = SP.movemask(); + ASSERT_EQ(mask, 0x8c06); } TEST(SuperVectorUtilsTest,Eqmask128c){ - uint8_t vec[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - uint8_t vec2[16] = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; - uint8_t vec3[16] = {16,17,3,4,5,6,7,8,1,2,11,12,13,14,15,16}; - m128_t SP = SuperVector<16>::loadu(vec); - m128_t SP1 = SuperVector<16>::loadu(vec); - int SP_Mask = SP.eqmask(SP1); - /*if masks are equal the outcome is 1111111111111111 or 0xffff*/ - ASSERT_EQ(SP_Mask,0xffff); - SP = SuperVector<16>::loadu(vec); - SP1 = SuperVector<16>::loadu(vec2); - SP_Mask = SP.eqmask(SP1); - ASSERT_EQ(SP_Mask,0); - SP = SuperVector<16>::loadu(vec2); - SP1 = SuperVector<16>::loadu(vec3); - SP_Mask = SP.eqmask(SP1); - ASSERT_EQ(SP_Mask,3); + u8 vec[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 }; + u8 vec2[16] = { 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 }; + u8 vec3[16] = { 16,17, 3, 4, 5, 6, 7, 8, 1, 2,11,12,13,14,15,16 }; + auto SP = SuperVector<16>::loadu(vec); + auto SP1 = SuperVector<16>::loadu(vec2); + auto SP2 = SuperVector<16>::loadu(vec3); + int mask = SP.eqmask(SP); + /*if vectors are equal the mask is 1111111111111111 or 0xffff*/ + ASSERT_EQ(mask,0xffff); + mask = SP.eqmask(SP2); + ASSERT_EQ(mask,0); + mask = SP1.eqmask(SP2); + ASSERT_EQ(mask,3); } /*Define LSHIFT128 macro*/ -#define TEST_LSHIFT128(l) { SP_after_Lshift = SP<<(l); \ - buf[l-1]=0; \ - for(int i=0; i<16; i++){ASSERT_EQ(SP_after_Lshift.u.s8[i],buf[i]);} \ - } +#define TEST_LSHIFT128(buf, vec, v, l) { \ + auto v_shifted = v << (l); \ + for (int i=15; i>= l; --i) { \ + buf[i] = vec[i-l]; \ + } \ + for (int i=0; i::loadu(vec); - char buf[16]; - for (int i=0; i<16; i++){buf[i]=0xff;} - m128_t SP_after_Lshift = SP<<(0); - TEST_LSHIFT128(1) - TEST_LSHIFT128(2) - TEST_LSHIFT128(3) - TEST_LSHIFT128(4) - TEST_LSHIFT128(5) - TEST_LSHIFT128(6) - TEST_LSHIFT128(7) - TEST_LSHIFT128(8) - TEST_LSHIFT128(9) - TEST_LSHIFT128(10) - TEST_LSHIFT128(11) - TEST_LSHIFT128(12) - TEST_LSHIFT128(13) - TEST_LSHIFT128(14) - TEST_LSHIFT128(15) - TEST_LSHIFT128(16) + u8 vec[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 }; + auto SP = SuperVector<16>::loadu(vec); + u8 buf[16]; + TEST_LSHIFT128(buf, vec, SP, 0); + TEST_LSHIFT128(buf, vec, SP, 1); + TEST_LSHIFT128(buf, vec, SP, 2); + TEST_LSHIFT128(buf, vec, SP, 3); + TEST_LSHIFT128(buf, vec, SP, 4); + TEST_LSHIFT128(buf, vec, SP, 5); + TEST_LSHIFT128(buf, vec, SP, 6); + TEST_LSHIFT128(buf, vec, SP, 7); + TEST_LSHIFT128(buf, vec, SP, 8); + TEST_LSHIFT128(buf, vec, SP, 9); + TEST_LSHIFT128(buf, vec, SP, 10); + TEST_LSHIFT128(buf, vec, SP, 11); + TEST_LSHIFT128(buf, vec, SP, 12); + TEST_LSHIFT128(buf, vec, SP, 13); + TEST_LSHIFT128(buf, vec, SP, 14); + TEST_LSHIFT128(buf, vec, SP, 15); + TEST_LSHIFT128(buf, vec, SP, 16); } TEST(SuperVectorUtilsTest,LShift64_128c){ - u_int64_t vec[2] = {128, 512}; - m128_t SP = SuperVector<16>::loadu(vec); - for(int s = 0; s<16; s++){ - m128_t SP_after_shift = SP.lshift64(s); - for (int i=0; i<2; i++){ASSERT_EQ(SP_after_shift.u.u64[i],vec[i]<::loadu(vec); + for(int s = 0; s<16; s++) { + auto SP_after_shift = SP.lshift64(s); + for (int i=0; i<2; i++) { + ASSERT_EQ(SP_after_shift.u.u64[i], vec[i] << s); + } } } TEST(SuperVectorUtilsTest,RShift64_128c){ - u_int64_t vec[2] = {128, 512}; - m128_t SP = SuperVector<16>::loadu(vec); - for(int s = 0; s<16; s++){ - m128_t SP_after_shift = SP.rshift64(s); - for (int i=0; i<2; i++){ASSERT_EQ(SP_after_shift.u.u64[i],vec[i]>>s);} + u64a vec[2] = {128, 512}; + auto SP = SuperVector<16>::loadu(vec); + for(int s = 0; s<16; s++) { + auto SP_after_shift = SP.rshift64(s); + for (int i=0; i<2; i++) { + ASSERT_EQ(SP_after_shift.u.u64[i], vec[i] >> s); + } } } - /*Define RSHIFT128 macro*/ -#define TEST_RSHIFT128(l) { SP_after_Rshift = SP>>(l); \ - buf[16-l] = 0; \ - for(int i=0; i<16; i++) {ASSERT_EQ(SP_after_Rshift.u.u8[i],buf[i]);} \ - } +#define TEST_RSHIFT128(buf, vec, v, l) { \ + auto v_shifted = v >> (l); \ + for (int i=0; i<16-l; i++) { \ + buf[i] = vec[i+l]; \ + } \ + for (int i=16-l; i<16; i++) { \ + buf[i] = 0; \ + } \ + for(int i=0; i<16; i++) { \ + ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \ + } \ + } TEST(SuperVectorUtilsTest,RShift128c){ - char vec[16]; - for (int i=0; i<16; i++) {vec[i]=0xff;} - m128_t SP = SuperVector<16>::loadu(vec); - uint8_t buf[16]; - for (int i=0; i<16; i++){buf[i]=0xff;} - m128_t SP_after_Rshift = SP>>(0); - TEST_RSHIFT128(1) - TEST_RSHIFT128(2) - TEST_RSHIFT128(3) - TEST_RSHIFT128(4) - TEST_RSHIFT128(5) - TEST_RSHIFT128(6) - TEST_RSHIFT128(7) - TEST_RSHIFT128(8) - TEST_RSHIFT128(9) - TEST_RSHIFT128(10) - TEST_RSHIFT128(11) - TEST_RSHIFT128(12) - TEST_RSHIFT128(13) - TEST_RSHIFT128(14) - TEST_RSHIFT128(15) - TEST_RSHIFT128(16) + u8 vec[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 }; + auto SP = SuperVector<16>::loadu(vec); + u8 buf[16]; + TEST_RSHIFT128(buf, vec, SP, 0); + TEST_RSHIFT128(buf, vec, SP, 1); + TEST_RSHIFT128(buf, vec, SP, 2); + TEST_RSHIFT128(buf, vec, SP, 3); + TEST_RSHIFT128(buf, vec, SP, 4); + TEST_RSHIFT128(buf, vec, SP, 5); + TEST_RSHIFT128(buf, vec, SP, 6); + TEST_RSHIFT128(buf, vec, SP, 7); + TEST_RSHIFT128(buf, vec, SP, 8); + TEST_RSHIFT128(buf, vec, SP, 9); + TEST_RSHIFT128(buf, vec, SP, 10); + TEST_RSHIFT128(buf, vec, SP, 11); + TEST_RSHIFT128(buf, vec, SP, 12); + TEST_RSHIFT128(buf, vec, SP, 13); + TEST_RSHIFT128(buf, vec, SP, 14); + TEST_RSHIFT128(buf, vec, SP, 15); + TEST_RSHIFT128(buf, vec, SP, 16); } - -TEST(SuperVectorUtilsTest,pshufbc){ +TEST(SuperVectorUtilsTest,pshufbc) { srand (time(NULL)); - uint8_t vec[16]; - for (int i=0; i<16; i++){vec[i]=rand() % 100 + 1;;}; - uint8_t vec2[16]; - for (int i=0; i<16; i++){vec2[i]=i;}; - m128_t SP1 = SuperVector<16>::loadu(vec); - m128_t SP2 = SuperVector<16>::loadu(vec2); - m128_t SResult = SP1.pshufb(SP2); - for (int i=0; i<16; i++){ASSERT_EQ(vec[vec2[i]],SResult.u.u8[i]);} + u8 vec[16]; + for (int i=0; i<16; i++) { + vec[i] = rand() % 100 + 1; + } + u8 vec2[16]; + for (int i=0; i<16; i++) { + vec2[i]=i; + } + auto SP1 = SuperVector<16>::loadu(vec); + auto SP2 = SuperVector<16>::loadu(vec2); + auto SResult = SP1.pshufb(SP2); + for (int i=0; i<16; i++) { + ASSERT_EQ(vec[vec2[i]],SResult.u.u8[i]); + } } - /*Define ALIGNR128 macro*/ -#define TEST_ALIGNR128(l) { SP_test = SP1.alignr(SP,l); \ - for (int i=0; i<16; i++){ASSERT_EQ(SP_test.u.u8[i],vec[i+l]);} \ - } +#define TEST_ALIGNR128(v1, v2, buf, l) { \ + auto v_aligned = v2.alignr(v1,l); \ + printv_u8("v1", v1); \ + printv_u8("v2", v2); \ + printv_u8("v_aligned", v_aligned); \ + for (size_t i=0; i<16; i++) { \ + ASSERT_EQ(v_aligned.u.u8[i], vec[16 -l +i]); \ + } \ + } TEST(SuperVectorUtilsTest,Alignr128c){ - uint8_t vec[32]; - for (int i=0; i<32; i++) {vec[i]=i;} - m128_t SP = SuperVector<16>::loadu(vec); - m128_t SP1 = SuperVector<16>::loadu(vec+16); - m128_t SP_test = SP1.alignr(SP,0); - TEST_ALIGNR128(1) - TEST_ALIGNR128(2) - TEST_ALIGNR128(3) - TEST_ALIGNR128(4) - TEST_ALIGNR128(5) - TEST_ALIGNR128(6) - TEST_ALIGNR128(7) - TEST_ALIGNR128(8) - TEST_ALIGNR128(9) - TEST_ALIGNR128(10) - TEST_ALIGNR128(11) - TEST_ALIGNR128(12) - TEST_ALIGNR128(13) - TEST_ALIGNR128(14) - TEST_ALIGNR128(15) - TEST_ALIGNR128(16) - + u8 vec[32]; + for (int i=0; i<32; i++) { + vec[i]=i; + } + auto SP1 = SuperVector<16>::loadu(vec); + auto SP2 = SuperVector<16>::loadu(vec+16); + TEST_ALIGNR128(SP1, SP2, vec, 0); + TEST_ALIGNR128(SP1, SP2, vec, 1); + TEST_ALIGNR128(SP1, SP2, vec, 2); + TEST_ALIGNR128(SP1, SP2, vec, 3); + TEST_ALIGNR128(SP1, SP2, vec, 4); + TEST_ALIGNR128(SP1, SP2, vec, 5); + TEST_ALIGNR128(SP1, SP2, vec, 6); + TEST_ALIGNR128(SP1, SP2, vec, 7); + TEST_ALIGNR128(SP1, SP2, vec, 8); + TEST_ALIGNR128(SP1, SP2, vec, 9); + TEST_ALIGNR128(SP1, SP2, vec, 10); + TEST_ALIGNR128(SP1, SP2, vec, 11); + TEST_ALIGNR128(SP1, SP2, vec, 12); + TEST_ALIGNR128(SP1, SP2, vec, 13); + TEST_ALIGNR128(SP1, SP2, vec, 14); + TEST_ALIGNR128(SP1, SP2, vec, 15); + TEST_ALIGNR128(SP1, SP2, vec, 16); }