fix unit tests, and resp. ARM SuperVector methods based on those unit tests, add print functions for SuperVector

This commit is contained in:
Konstantinos Margaritis 2021-06-23 22:16:24 +03:00
parent 1e434a9b3d
commit e49fa3a97a
3 changed files with 282 additions and 213 deletions

View File

@ -148,7 +148,7 @@ really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const b
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const b) const really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const b) const
{ {
return {vandq_s8(u.v128[0], b.u.v128[0])}; return {vorrq_s8(u.v128[0], b.u.v128[0])};
} }
template <> template <>
@ -193,31 +193,31 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::eqmask(Su
#ifndef HS_OPTIMIZE #ifndef HS_OPTIMIZE
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
{ {
return {vshlq_n_s32(u.v128[0], N)}; return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), N)};
} }
#else #else
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
{ {
switch(N) { switch(N) {
case 0: return *this; break; case 0: return *this; break;
case 1: return {vshlq_n_s32((int16x8_t) u.v128[0], 1)}; break; case 1: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 1)}; break;
case 2: return {vshlq_n_s32((int16x8_t) u.v128[0], 2)}; break; case 2: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 2)}; break;
case 3: return {vshlq_n_s32((int16x8_t) u.v128[0], 3)}; break; case 3: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 3)}; break;
case 4: return {vshlq_n_s32((int16x8_t) u.v128[0], 4)}; break; case 4: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 4)}; break;
case 5: return {vshlq_n_s32((int16x8_t) u.v128[0], 5)}; break; case 5: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 5)}; break;
case 6: return {vshlq_n_s32((int16x8_t) u.v128[0], 6)}; break; case 6: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 6)}; break;
case 7: return {vshlq_n_s32((int16x8_t) u.v128[0], 7)}; break; case 7: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 7)}; break;
case 8: return {vshlq_n_s32((int16x8_t) u.v128[0], 8)}; break; case 8: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 8)}; break;
case 9: return {vshlq_n_s32((int16x8_t) u.v128[0], 9)}; break; case 9: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 9)}; break;
case 10: return {vshlq_n_s32((int16x8_t) u.v128[0], 10)}; break; case 10: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 10)}; break;
case 11: return {vshlq_n_s32((int16x8_t) u.v128[0], 11)}; break; case 11: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 11)}; break;
case 12: return {vshlq_n_s32((int16x8_t) u.v128[0], 12)}; break; case 12: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 12)}; break;
case 13: return {vshlq_n_s32((int16x8_t) u.v128[0], 13)}; break; case 13: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 13)}; break;
case 14: return {vshlq_n_s32((int16x8_t) u.v128[0], 14)}; break; case 14: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 14)}; break;
case 15: return {vshlq_n_s32((int16x8_t) u.v128[0], 15)}; break; case 15: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 15)}; break;
case 16: return Zeroes(); break; case 16: return Zeroes(); break;
default: break; default: break;
} }
@ -225,33 +225,34 @@ really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
} }
#endif #endif
#ifdef HS_OPTIMIZE #ifndef HS_OPTIMIZE
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
{ {
return {vshrq_n_s32(u.v128[0], N)}; return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 16 - N)};
} }
#else #else
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
{ {
switch(N) { switch(N) {
case 0: return {vshrq_n_s32(u.v128[0], 0)}; break; case 0: return *this; break;
case 1: return {vshrq_n_s32(u.v128[0], 1)}; break; case 1: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 15)}; break;
case 2: return {vshrq_n_s32(u.v128[0], 2)}; break; case 2: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 14)}; break;
case 3: return {vshrq_n_s32(u.v128[0], 3)}; break; case 3: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 13)}; break;
case 4: return {vshrq_n_s32(u.v128[0], 4)}; break; case 4: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 12)}; break;
case 5: return {vshrq_n_s32(u.v128[0], 5)}; break; case 5: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 11)}; break;
case 6: return {vshrq_n_s32(u.v128[0], 6)}; break; case 6: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 10)}; break;
case 7: return {vshrq_n_s32(u.v128[0], 7)}; break; case 7: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 9)}; break;
case 8: return {vshrq_n_s32(u.v128[0], 8)}; break; case 8: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 8)}; break;
case 9: return {vshrq_n_s32(u.v128[0], 9)}; break; case 9: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 7)}; break;
case 10: return {vshrq_n_s32(u.v128[0], 10)}; break; case 10: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 6)}; break;
case 11: return {vshrq_n_s32(u.v128[0], 11)}; break; case 11: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 5)}; break;
case 12: return {vshrq_n_s32(u.v128[0], 12)}; break; case 12: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 4)}; break;
case 13: return {vshrq_n_s32(u.v128[0], 13)}; break; case 13: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 3)}; break;
case 14: return {vshrq_n_s32(u.v128[0], 14)}; break; case 14: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 2)}; break;
case 15: return {vshrq_n_s32(u.v128[0], 15)}; break; case 15: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 1)}; break;
case 16: return Zeroes(); break;
default: break; default: break;
} }
return *this; return *this;
@ -286,30 +287,30 @@ really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint
template<> template<>
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> r, int8_t offset) really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> r, int8_t offset)
{ {
return {vextq_s8((int16x8_t)u.v128[0], (int16x8_t)r.u.v128[0], offset)}; return {vextq_s8((int16x8_t)r.u.v128[0], (int16x8_t)u.v128[0], 16 - offset)};
} }
#else #else
template<> template<>
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> l, int8_t offset) really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> r, int8_t offset)
{ {
switch(offset) { switch(offset) {
case 0: return *this; break; case 0: return *this; break;
case 1: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 1)}; break; case 1: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 15)}; break;
case 2: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 2)}; break; case 2: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 14)}; break;
case 3: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 3)}; break; case 3: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 13)}; break;
case 4: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 4)}; break; case 4: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 12)}; break;
case 5: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 5)}; break; case 5: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 11)}; break;
case 6: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 6)}; break; case 6: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 10)}; break;
case 7: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 7)}; break; case 7: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 9)}; break;
case 8: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 8)}; break; case 8: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 8)}; break;
case 9: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 9)}; break; case 9: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 7)}; break;
case 10: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 10)}; break; case 10: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 6)}; break;
case 11: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 11)}; break; case 11: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 5)}; break;
case 12: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 12)}; break; case 12: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 4)}; break;
case 13: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 13)}; break; case 13: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 3)}; break;
case 14: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 14)}; break; case 14: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 2)}; break;
case 15: return {vextq_s8((int16x8_t) u.v128[0], (int16x8_t) l.u.v128[0], 15)}; break; case 15: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 1)}; break;
case 16: return l; break; case 16: return r; break;
default: break; default: break;
} }
return *this; return *this;

View File

@ -31,6 +31,7 @@
#define SIMD_TYPES_HPP #define SIMD_TYPES_HPP
#include <cstdint> #include <cstdint>
#include <cstdio>
#if defined(ARCH_IA32) || defined(ARCH_X86_64) #if defined(ARCH_IA32) || defined(ARCH_X86_64)
#include "util/simd/arch/x86/types.hpp" #include "util/simd/arch/x86/types.hpp"
@ -213,5 +214,38 @@ public:
#endif #endif
template <uint16_t S>
static void printv_u8(const char *label, SuperVector<S> &v) {
printf("%s: ", label);
for(int i=0; i < S; i++)
printf("%02x ", v.u.u8[i]);
printf("\n");
}
template <uint16_t S>
static void printv_u16(const char *label, SuperVector<S> &v) {
printf("%s: ", label);
for(int i=0; i < S/sizeof(u16); i++)
printf("%04x ", v.u.u16[i]);
printf("\n");
}
template <uint16_t S>
static void printv_u32(const char *label, SuperVector<S> &v) {
printf("%s: ", label);
for(int i=0; i < S/sizeof(u32); i++)
printf("%08x ", v.u.u32[i]);
printf("\n");
}
template <uint16_t S>
static inline void printv_u64(const char *label, SuperVector<S> &v) {
printf("%s: ", label);
for(int i=0; i < S/sizeof(u64a); i++)
printf("%016lx ", v.u.u64[i]);
printf("\n");
}
#endif /* SIMD_TYPES_H */ #endif /* SIMD_TYPES_H */

View File

@ -38,232 +38,266 @@
TEST(SuperVectorUtilsTest, Zero128c) { TEST(SuperVectorUtilsTest, Zero128c) {
m128_t zeroes = SuperVector<16>::Zeroes(); auto zeroes = SuperVector<16>::Zeroes();
char buf[16]{0}; u8 buf[16]{0};
for(int i=0; i<16; i++){ASSERT_EQ(zeroes.u.s8[i],buf[i]);} for(int i=0; i<16; i++) {
ASSERT_EQ(zeroes.u.u8[i],buf[i]);
}
} }
TEST(SuperVectorUtilsTest, Ones128c) { TEST(SuperVectorUtilsTest, Ones128c) {
m128_t ones = SuperVector<16>::Ones(); auto ones = SuperVector<16>::Ones();
char buf[16]; u8 buf[16];
for (int i=0; i<16; i++){buf[i]=0xff;} for (int i=0; i<16; i++) { buf[i]=0xff; }
for(int i=0; i<16; i++){ASSERT_EQ(ones.u.s8[i],buf[i]);} for(int i=0; i<16; i++) {
ASSERT_EQ(ones.u.u8[i],buf[i]);
}
} }
TEST(SuperVectorUtilsTest, Loadu128c) { TEST(SuperVectorUtilsTest, Loadu128c) {
char vec[32]; u8 vec[32];
for(int i=0; i<32;i++){vec[i]=i;} for(int i=0; i<32;i++) { vec[i]=i; }
for(int i=0; i<=16;i++){ for(int i=0; i<=16;i++) {
m128_t SP = SuperVector<16>::loadu(vec+i); auto SP = SuperVector<16>::loadu(vec+i);
for(int j=0; j<16; j++){ for(int j=0; j<16; j++) {
ASSERT_EQ(SP.u.s8[j],vec[j+i]); ASSERT_EQ(SP.u.u8[j],vec[j+i]);
} }
} }
} }
TEST(SuperVectorUtilsTest, Load128c) { TEST(SuperVectorUtilsTest, Load128c) {
char vec[128] __attribute__((aligned(16))); u8 ALIGN_ATTR(16) vec[32];
for(int i=0; i<128;i++){vec[i]=i;} for(int i=0; i<32;i++) { vec[i]=i; }
for(int i=0;i<=16;i+=16){ for(int i=0;i<=16;i+=16) {
m128_t SP = SuperVector<16>::loadu(vec+i); auto SP = SuperVector<16>::loadu(vec+i);
for(int j=0; j<16; j++){ for(int j=0; j<16; j++){
ASSERT_EQ(SP.u.s8[j],vec[j+i]); ASSERT_EQ(SP.u.u8[j],vec[j+i]);
} }
} }
} }
TEST(SuperVectorUtilsTest,Equal128c){ TEST(SuperVectorUtilsTest,Equal128c){
char vec[32]; u8 vec[32];
for (int i=0; i<32; i++) {vec[i]=i;}; for (int i=0; i<32; i++) {vec[i]=i;};
m128_t SP1 = SuperVector<16>::loadu(vec); auto SP1 = SuperVector<16>::loadu(vec);
m128_t SP2 = SuperVector<16>::loadu(vec+16); auto SP2 = SuperVector<16>::loadu(vec+16);
char buf[16]={0}; u8 buf[16]={0};
/*check for equality byte by byte*/ /*check for equality byte by byte*/
for (int s=0; s<16; s++){ for (int s=0; s<16; s++){
if(vec[s]==vec[s+16]){ if(vec[s]==vec[s+16]){
buf[s]=1; buf[s]=1;
} }
} }
m128_t SPResult = SP1.eq(SP2); auto SPResult = SP1.eq(SP2);
for (int i=0; i<16; i++){ASSERT_EQ(SPResult.u.s8[i],buf[i]);} for (int i=0; i<16; i++) {
ASSERT_EQ(SPResult.u.s8[i],buf[i]);
}
} }
TEST(SuperVectorUtilsTest,And128c){ TEST(SuperVectorUtilsTest,And128c){
m128_t SPResult = SuperVector<16>::Zeroes() & SuperVector<16>::Ones(); auto SPResult = SuperVector<16>::Zeroes() & SuperVector<16>::Ones();
for (int i=0; i<16; i++){ASSERT_EQ(SPResult.u.s8[i],0);} for (int i=0; i<16; i++) {
ASSERT_EQ(SPResult.u.u8[i],0);
}
} }
TEST(SuperVectorUtilsTest,OPAnd128c){ TEST(SuperVectorUtilsTest,OPAnd128c){
m128_t SP1 = SuperVector<16>::Zeroes(); auto SP1 = SuperVector<16>::Zeroes();
m128_t SP2 = SuperVector<16>::Ones(); auto SP2 = SuperVector<16>::Ones();
SP2 = SP2.opand(SP1); SP2 = SP2.opand(SP1);
for (int i=0; i<16; i++){ASSERT_EQ(SP2.u.s8[i],0);} for (int i=0; i<16; i++) {
ASSERT_EQ(SP2.u.u8[i],0);
}
} }
TEST(SuperVectorUtilsTest,OR128c){ TEST(SuperVectorUtilsTest,OR128c){
m128_t SPResult = SuperVector<16>::Zeroes() | SuperVector<16>::Ones(); auto SPResult = SuperVector<16>::Zeroes() | SuperVector<16>::Ones();
for (int i=0; i<16; i++){ASSERT_EQ(SPResult.u.s8[i],-1);} for (int i=0; i<16; i++) {
ASSERT_EQ(SPResult.u.u8[i],0xff);
}
} }
TEST(SuperVectorUtilsTest,OPANDNOT128c){ TEST(SuperVectorUtilsTest,OPANDNOT128c){
m128_t SP1 = SuperVector<16>::Zeroes(); auto SP1 = SuperVector<16>::Zeroes();
m128_t SP2 = SuperVector<16>::Ones(); auto SP2 = SuperVector<16>::Ones();
SP2 = SP2.opandnot(SP1); SP2 = SP2.opandnot(SP1);
for (int i=0; i<16; i++){ASSERT_EQ(SP2.u.s8[i],0);} for (int i=0; i<16; i++) {
ASSERT_EQ(SP2.u.s8[i],0);
}
} }
TEST(SuperVectorUtilsTest,Movemask128c){ TEST(SuperVectorUtilsTest,Movemask128c){
uint8_t vec[16] = {0,0xff,0xff,3,4,5,6,7,8,9,0xff,11,12,13,14,0xff}; u8 vec[16] = { 0, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0, 0, 0, 0xff };
/*according to the array above the movemask outcome must be the following: /*according to the array above the movemask outcome must be the following:
10000100000000110 or 0x8406*/ 10000100000000110 or 0x8406*/
m128_t SP = SuperVector<16>::loadu(vec); auto SP = SuperVector<16>::loadu(vec);
int SP_Mask = SP.movemask(); int mask = SP.movemask();
ASSERT_EQ(SP_Mask,0x8406); ASSERT_EQ(mask, 0x8c06);
} }
TEST(SuperVectorUtilsTest,Eqmask128c){ TEST(SuperVectorUtilsTest,Eqmask128c){
uint8_t vec[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; u8 vec[16] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 };
uint8_t vec2[16] = {16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; u8 vec2[16] = { 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 };
uint8_t vec3[16] = {16,17,3,4,5,6,7,8,1,2,11,12,13,14,15,16}; u8 vec3[16] = { 16,17, 3, 4, 5, 6, 7, 8, 1, 2,11,12,13,14,15,16 };
m128_t SP = SuperVector<16>::loadu(vec); auto SP = SuperVector<16>::loadu(vec);
m128_t SP1 = SuperVector<16>::loadu(vec); auto SP1 = SuperVector<16>::loadu(vec2);
int SP_Mask = SP.eqmask(SP1); auto SP2 = SuperVector<16>::loadu(vec3);
/*if masks are equal the outcome is 1111111111111111 or 0xffff*/ int mask = SP.eqmask(SP);
ASSERT_EQ(SP_Mask,0xffff); /*if vectors are equal the mask is 1111111111111111 or 0xffff*/
SP = SuperVector<16>::loadu(vec); ASSERT_EQ(mask,0xffff);
SP1 = SuperVector<16>::loadu(vec2); mask = SP.eqmask(SP2);
SP_Mask = SP.eqmask(SP1); ASSERT_EQ(mask,0);
ASSERT_EQ(SP_Mask,0); mask = SP1.eqmask(SP2);
SP = SuperVector<16>::loadu(vec2); ASSERT_EQ(mask,3);
SP1 = SuperVector<16>::loadu(vec3);
SP_Mask = SP.eqmask(SP1);
ASSERT_EQ(SP_Mask,3);
} }
/*Define LSHIFT128 macro*/ /*Define LSHIFT128 macro*/
#define TEST_LSHIFT128(l) { SP_after_Lshift = SP<<(l); \ #define TEST_LSHIFT128(buf, vec, v, l) { \
buf[l-1]=0; \ auto v_shifted = v << (l); \
for(int i=0; i<16; i++){ASSERT_EQ(SP_after_Lshift.u.s8[i],buf[i]);} \ for (int i=15; i>= l; --i) { \
buf[i] = vec[i-l]; \
} \
for (int i=0; i<l; i++) { \
buf[i] = 0; \
} \
for(int i=0; i<16; i++) { \
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
} \
} }
TEST(SuperVectorUtilsTest,LShift128c){ TEST(SuperVectorUtilsTest,LShift128c){
char vec[16]; u8 vec[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 };
for (int i=0; i<16; i++) {vec[i]=0xff;} auto SP = SuperVector<16>::loadu(vec);
m128_t SP = SuperVector<16>::loadu(vec); u8 buf[16];
char buf[16]; TEST_LSHIFT128(buf, vec, SP, 0);
for (int i=0; i<16; i++){buf[i]=0xff;} TEST_LSHIFT128(buf, vec, SP, 1);
m128_t SP_after_Lshift = SP<<(0); TEST_LSHIFT128(buf, vec, SP, 2);
TEST_LSHIFT128(1) TEST_LSHIFT128(buf, vec, SP, 3);
TEST_LSHIFT128(2) TEST_LSHIFT128(buf, vec, SP, 4);
TEST_LSHIFT128(3) TEST_LSHIFT128(buf, vec, SP, 5);
TEST_LSHIFT128(4) TEST_LSHIFT128(buf, vec, SP, 6);
TEST_LSHIFT128(5) TEST_LSHIFT128(buf, vec, SP, 7);
TEST_LSHIFT128(6) TEST_LSHIFT128(buf, vec, SP, 8);
TEST_LSHIFT128(7) TEST_LSHIFT128(buf, vec, SP, 9);
TEST_LSHIFT128(8) TEST_LSHIFT128(buf, vec, SP, 10);
TEST_LSHIFT128(9) TEST_LSHIFT128(buf, vec, SP, 11);
TEST_LSHIFT128(10) TEST_LSHIFT128(buf, vec, SP, 12);
TEST_LSHIFT128(11) TEST_LSHIFT128(buf, vec, SP, 13);
TEST_LSHIFT128(12) TEST_LSHIFT128(buf, vec, SP, 14);
TEST_LSHIFT128(13) TEST_LSHIFT128(buf, vec, SP, 15);
TEST_LSHIFT128(14) TEST_LSHIFT128(buf, vec, SP, 16);
TEST_LSHIFT128(15)
TEST_LSHIFT128(16)
} }
TEST(SuperVectorUtilsTest,LShift64_128c){ TEST(SuperVectorUtilsTest,LShift64_128c){
u_int64_t vec[2] = {128, 512}; u64a vec[2] = {128, 512};
m128_t SP = SuperVector<16>::loadu(vec); auto SP = SuperVector<16>::loadu(vec);
for(int s = 0; s<16; s++){ for(int s = 0; s<16; s++) {
m128_t SP_after_shift = SP.lshift64(s); auto SP_after_shift = SP.lshift64(s);
for (int i=0; i<2; i++){ASSERT_EQ(SP_after_shift.u.u64[i],vec[i]<<s);} for (int i=0; i<2; i++) {
ASSERT_EQ(SP_after_shift.u.u64[i], vec[i] << s);
}
} }
} }
TEST(SuperVectorUtilsTest,RShift64_128c){ TEST(SuperVectorUtilsTest,RShift64_128c){
u_int64_t vec[2] = {128, 512}; u64a vec[2] = {128, 512};
m128_t SP = SuperVector<16>::loadu(vec); auto SP = SuperVector<16>::loadu(vec);
for(int s = 0; s<16; s++){ for(int s = 0; s<16; s++) {
m128_t SP_after_shift = SP.rshift64(s); auto SP_after_shift = SP.rshift64(s);
for (int i=0; i<2; i++){ASSERT_EQ(SP_after_shift.u.u64[i],vec[i]>>s);} for (int i=0; i<2; i++) {
ASSERT_EQ(SP_after_shift.u.u64[i], vec[i] >> s);
}
} }
} }
/*Define RSHIFT128 macro*/ /*Define RSHIFT128 macro*/
#define TEST_RSHIFT128(l) { SP_after_Rshift = SP>>(l); \ #define TEST_RSHIFT128(buf, vec, v, l) { \
buf[16-l] = 0; \ auto v_shifted = v >> (l); \
for(int i=0; i<16; i++) {ASSERT_EQ(SP_after_Rshift.u.u8[i],buf[i]);} \ for (int i=0; i<16-l; i++) { \
buf[i] = vec[i+l]; \
} \
for (int i=16-l; i<16; i++) { \
buf[i] = 0; \
} \
for(int i=0; i<16; i++) { \
ASSERT_EQ(v_shifted.u.u8[i], buf[i]); \
} \
} }
TEST(SuperVectorUtilsTest,RShift128c){ TEST(SuperVectorUtilsTest,RShift128c){
char vec[16]; u8 vec[16] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 };
for (int i=0; i<16; i++) {vec[i]=0xff;} auto SP = SuperVector<16>::loadu(vec);
m128_t SP = SuperVector<16>::loadu(vec); u8 buf[16];
uint8_t buf[16]; TEST_RSHIFT128(buf, vec, SP, 0);
for (int i=0; i<16; i++){buf[i]=0xff;} TEST_RSHIFT128(buf, vec, SP, 1);
m128_t SP_after_Rshift = SP>>(0); TEST_RSHIFT128(buf, vec, SP, 2);
TEST_RSHIFT128(1) TEST_RSHIFT128(buf, vec, SP, 3);
TEST_RSHIFT128(2) TEST_RSHIFT128(buf, vec, SP, 4);
TEST_RSHIFT128(3) TEST_RSHIFT128(buf, vec, SP, 5);
TEST_RSHIFT128(4) TEST_RSHIFT128(buf, vec, SP, 6);
TEST_RSHIFT128(5) TEST_RSHIFT128(buf, vec, SP, 7);
TEST_RSHIFT128(6) TEST_RSHIFT128(buf, vec, SP, 8);
TEST_RSHIFT128(7) TEST_RSHIFT128(buf, vec, SP, 9);
TEST_RSHIFT128(8) TEST_RSHIFT128(buf, vec, SP, 10);
TEST_RSHIFT128(9) TEST_RSHIFT128(buf, vec, SP, 11);
TEST_RSHIFT128(10) TEST_RSHIFT128(buf, vec, SP, 12);
TEST_RSHIFT128(11) TEST_RSHIFT128(buf, vec, SP, 13);
TEST_RSHIFT128(12) TEST_RSHIFT128(buf, vec, SP, 14);
TEST_RSHIFT128(13) TEST_RSHIFT128(buf, vec, SP, 15);
TEST_RSHIFT128(14) TEST_RSHIFT128(buf, vec, SP, 16);
TEST_RSHIFT128(15)
TEST_RSHIFT128(16)
} }
TEST(SuperVectorUtilsTest,pshufbc) {
TEST(SuperVectorUtilsTest,pshufbc){
srand (time(NULL)); srand (time(NULL));
uint8_t vec[16]; u8 vec[16];
for (int i=0; i<16; i++){vec[i]=rand() % 100 + 1;;}; for (int i=0; i<16; i++) {
uint8_t vec2[16]; vec[i] = rand() % 100 + 1;
for (int i=0; i<16; i++){vec2[i]=i;}; }
m128_t SP1 = SuperVector<16>::loadu(vec); u8 vec2[16];
m128_t SP2 = SuperVector<16>::loadu(vec2); for (int i=0; i<16; i++) {
m128_t SResult = SP1.pshufb(SP2); vec2[i]=i;
for (int i=0; i<16; i++){ASSERT_EQ(vec[vec2[i]],SResult.u.u8[i]);} }
auto SP1 = SuperVector<16>::loadu(vec);
auto SP2 = SuperVector<16>::loadu(vec2);
auto SResult = SP1.pshufb(SP2);
for (int i=0; i<16; i++) {
ASSERT_EQ(vec[vec2[i]],SResult.u.u8[i]);
}
} }
/*Define ALIGNR128 macro*/ /*Define ALIGNR128 macro*/
#define TEST_ALIGNR128(l) { SP_test = SP1.alignr(SP,l); \ #define TEST_ALIGNR128(v1, v2, buf, l) { \
for (int i=0; i<16; i++){ASSERT_EQ(SP_test.u.u8[i],vec[i+l]);} \ auto v_aligned = v2.alignr(v1,l); \
printv_u8("v1", v1); \
printv_u8("v2", v2); \
printv_u8("v_aligned", v_aligned); \
for (size_t i=0; i<16; i++) { \
ASSERT_EQ(v_aligned.u.u8[i], vec[16 -l +i]); \
} \
} }
TEST(SuperVectorUtilsTest,Alignr128c){ TEST(SuperVectorUtilsTest,Alignr128c){
uint8_t vec[32]; u8 vec[32];
for (int i=0; i<32; i++) {vec[i]=i;} for (int i=0; i<32; i++) {
m128_t SP = SuperVector<16>::loadu(vec); vec[i]=i;
m128_t SP1 = SuperVector<16>::loadu(vec+16); }
m128_t SP_test = SP1.alignr(SP,0); auto SP1 = SuperVector<16>::loadu(vec);
TEST_ALIGNR128(1) auto SP2 = SuperVector<16>::loadu(vec+16);
TEST_ALIGNR128(2) TEST_ALIGNR128(SP1, SP2, vec, 0);
TEST_ALIGNR128(3) TEST_ALIGNR128(SP1, SP2, vec, 1);
TEST_ALIGNR128(4) TEST_ALIGNR128(SP1, SP2, vec, 2);
TEST_ALIGNR128(5) TEST_ALIGNR128(SP1, SP2, vec, 3);
TEST_ALIGNR128(6) TEST_ALIGNR128(SP1, SP2, vec, 4);
TEST_ALIGNR128(7) TEST_ALIGNR128(SP1, SP2, vec, 5);
TEST_ALIGNR128(8) TEST_ALIGNR128(SP1, SP2, vec, 6);
TEST_ALIGNR128(9) TEST_ALIGNR128(SP1, SP2, vec, 7);
TEST_ALIGNR128(10) TEST_ALIGNR128(SP1, SP2, vec, 8);
TEST_ALIGNR128(11) TEST_ALIGNR128(SP1, SP2, vec, 9);
TEST_ALIGNR128(12) TEST_ALIGNR128(SP1, SP2, vec, 10);
TEST_ALIGNR128(13) TEST_ALIGNR128(SP1, SP2, vec, 11);
TEST_ALIGNR128(14) TEST_ALIGNR128(SP1, SP2, vec, 12);
TEST_ALIGNR128(15) TEST_ALIGNR128(SP1, SP2, vec, 13);
TEST_ALIGNR128(16) TEST_ALIGNR128(SP1, SP2, vec, 14);
TEST_ALIGNR128(SP1, SP2, vec, 15);
TEST_ALIGNR128(SP1, SP2, vec, 16);
} }