add {l,r}shift128()+tests, rename printv_u64() to print64()

This commit is contained in:
Konstantinos Margaritis 2021-07-20 14:32:40 +03:00 committed by Konstantinos Margaritis
parent 051ceed0f9
commit 6c51f7f591
3 changed files with 212 additions and 2 deletions

View File

@ -366,6 +366,18 @@ really_inline SuperVector<16> SuperVector<16>::rshift64(uint8_t const N)
}
#endif
template<>
really_inline SuperVector<16> SuperVector<16>::lshift128(uint8_t const N)
{
return *this << N;
}
template<>
really_inline SuperVector<16> SuperVector<16>::rshift128(uint8_t const N)
{
return *this >> N;
}
// 256-bit AVX2 implementation
#if defined(HAVE_AVX2)
template<>
@ -667,6 +679,22 @@ really_inline SuperVector<32> SuperVector<32>::alignr(SuperVector<32> &other, in
case 13: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 13)}; break;
case 14: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 14)}; break;
case 15: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 15)}; break;
case 16: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 16)}; break;
case 17: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 17)}; break;
case 18: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 18)}; break;
case 19: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 19)}; break;
case 20: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 20)}; break;
case 21: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 21)}; break;
case 22: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 22)}; break;
case 23: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 23)}; break;
case 24: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 24)}; break;
case 25: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 25)}; break;
case 26: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 26)}; break;
case 27: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 27)}; break;
case 28: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 28)}; break;
case 29: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 39)}; break;
case 30: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 30)}; break;
case 31: return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], 31)}; break;
default: break;
}
return *this;
@ -706,6 +734,22 @@ really_inline SuperVector<32> SuperVector<32>::lshift64(uint8_t const N)
case 13: return {_mm256_slli_epi64(u.v256[0], 13)}; break;
case 14: return {_mm256_slli_epi64(u.v256[0], 14)}; break;
case 15: return {_mm256_slli_epi64(u.v256[0], 15)}; break;
case 16: return {_mm256_slli_epi64(u.v256[0], 16)}; break;
case 17: return {_mm256_slli_epi64(u.v256[0], 17)}; break;
case 18: return {_mm256_slli_epi64(u.v256[0], 18)}; break;
case 19: return {_mm256_slli_epi64(u.v256[0], 19)}; break;
case 20: return {_mm256_slli_epi64(u.v256[0], 20)}; break;
case 21: return {_mm256_slli_epi64(u.v256[0], 21)}; break;
case 22: return {_mm256_slli_epi64(u.v256[0], 22)}; break;
case 23: return {_mm256_slli_epi64(u.v256[0], 23)}; break;
case 24: return {_mm256_slli_epi64(u.v256[0], 24)}; break;
case 25: return {_mm256_slli_epi64(u.v256[0], 25)}; break;
case 26: return {_mm256_slli_epi64(u.v256[0], 26)}; break;
case 27: return {_mm256_slli_epi64(u.v256[0], 27)}; break;
case 28: return {_mm256_slli_epi64(u.v256[0], 28)}; break;
case 29: return {_mm256_slli_epi64(u.v256[0], 29)}; break;
case 30: return {_mm256_slli_epi64(u.v256[0], 30)}; break;
case 31: return {_mm256_slli_epi64(u.v256[0], 31)}; break;
default: break;
}
return *this;
@ -739,6 +783,120 @@ really_inline SuperVector<32> SuperVector<32>::rshift64(uint8_t const N)
case 13: return {_mm256_srli_epi64(u.v256[0], 13)}; break;
case 14: return {_mm256_srli_epi64(u.v256[0], 14)}; break;
case 15: return {_mm256_srli_epi64(u.v256[0], 15)}; break;
case 16: return {_mm256_srli_epi64(u.v256[0], 16)}; break;
case 17: return {_mm256_srli_epi64(u.v256[0], 17)}; break;
case 18: return {_mm256_srli_epi64(u.v256[0], 18)}; break;
case 19: return {_mm256_srli_epi64(u.v256[0], 19)}; break;
case 20: return {_mm256_srli_epi64(u.v256[0], 20)}; break;
case 21: return {_mm256_srli_epi64(u.v256[0], 21)}; break;
case 22: return {_mm256_srli_epi64(u.v256[0], 22)}; break;
case 23: return {_mm256_srli_epi64(u.v256[0], 23)}; break;
case 24: return {_mm256_srli_epi64(u.v256[0], 24)}; break;
case 25: return {_mm256_srli_epi64(u.v256[0], 25)}; break;
case 26: return {_mm256_srli_epi64(u.v256[0], 26)}; break;
case 27: return {_mm256_srli_epi64(u.v256[0], 27)}; break;
case 28: return {_mm256_srli_epi64(u.v256[0], 28)}; break;
case 29: return {_mm256_srli_epi64(u.v256[0], 29)}; break;
case 30: return {_mm256_srli_epi64(u.v256[0], 30)}; break;
case 31: return {_mm256_srli_epi64(u.v256[0], 31)}; break;
default: break;
}
return *this;
}
#endif
#ifdef HS_OPTIMIZE
template<>
really_inline SuperVector<32> SuperVector<32>::lshift128(uint8_t const N)
{
return {_mm256_slli_si256(u.v256[0], N)};
}
#else
template<>
really_inline SuperVector<32> SuperVector<32>::lshift128(uint8_t const N)
{
switch(N) {
case 0: return *this; break;
case 1: return {_mm256_slli_si256(u.v256[0], 1)}; break;
case 2: return {_mm256_slli_si256(u.v256[0], 2)}; break;
case 3: return {_mm256_slli_si256(u.v256[0], 3)}; break;
case 4: return {_mm256_slli_si256(u.v256[0], 4)}; break;
case 5: return {_mm256_slli_si256(u.v256[0], 5)}; break;
case 6: return {_mm256_slli_si256(u.v256[0], 6)}; break;
case 7: return {_mm256_slli_si256(u.v256[0], 7)}; break;
case 8: return {_mm256_slli_si256(u.v256[0], 8)}; break;
case 9: return {_mm256_slli_si256(u.v256[0], 9)}; break;
case 10: return {_mm256_slli_si256(u.v256[0], 10)}; break;
case 11: return {_mm256_slli_si256(u.v256[0], 11)}; break;
case 12: return {_mm256_slli_si256(u.v256[0], 12)}; break;
case 13: return {_mm256_slli_si256(u.v256[0], 13)}; break;
case 14: return {_mm256_slli_si256(u.v256[0], 14)}; break;
case 15: return {_mm256_slli_si256(u.v256[0], 15)}; break;
case 16: return {_mm256_slli_si256(u.v256[0], 16)}; break;
case 17: return {_mm256_slli_si256(u.v256[0], 17)}; break;
case 18: return {_mm256_slli_si256(u.v256[0], 18)}; break;
case 19: return {_mm256_slli_si256(u.v256[0], 19)}; break;
case 20: return {_mm256_slli_si256(u.v256[0], 20)}; break;
case 21: return {_mm256_slli_si256(u.v256[0], 21)}; break;
case 22: return {_mm256_slli_si256(u.v256[0], 22)}; break;
case 23: return {_mm256_slli_si256(u.v256[0], 23)}; break;
case 24: return {_mm256_slli_si256(u.v256[0], 24)}; break;
case 25: return {_mm256_slli_si256(u.v256[0], 25)}; break;
case 26: return {_mm256_slli_si256(u.v256[0], 26)}; break;
case 27: return {_mm256_slli_si256(u.v256[0], 27)}; break;
case 28: return {_mm256_slli_si256(u.v256[0], 28)}; break;
case 29: return {_mm256_slli_si256(u.v256[0], 29)}; break;
case 30: return {_mm256_slli_si256(u.v256[0], 30)}; break;
case 31: return {_mm256_slli_si256(u.v256[0], 31)}; break;
default: break;
}
return *this;
}
#endif
#ifdef HS_OPTIMIZE
template<>
really_inline SuperVector<32> SuperVector<32>::rshift128(uint8_t const N)
{
return {_mm256_srli_si256(u.v256[0], N)};
}
#else
template<>
really_inline SuperVector<32> SuperVector<32>::rshift128(uint8_t const N)
{
switch(N) {
case 0: return *this; break;
case 1: return {_mm256_srli_si256(u.v256[0], 1)}; break;
case 2: return {_mm256_srli_si256(u.v256[0], 2)}; break;
case 3: return {_mm256_srli_si256(u.v256[0], 3)}; break;
case 4: return {_mm256_srli_si256(u.v256[0], 4)}; break;
case 5: return {_mm256_srli_si256(u.v256[0], 5)}; break;
case 6: return {_mm256_srli_si256(u.v256[0], 6)}; break;
case 7: return {_mm256_srli_si256(u.v256[0], 7)}; break;
case 8: return {_mm256_srli_si256(u.v256[0], 8)}; break;
case 9: return {_mm256_srli_si256(u.v256[0], 9)}; break;
case 10: return {_mm256_srli_si256(u.v256[0], 10)}; break;
case 11: return {_mm256_srli_si256(u.v256[0], 11)}; break;
case 12: return {_mm256_srli_si256(u.v256[0], 12)}; break;
case 13: return {_mm256_srli_si256(u.v256[0], 13)}; break;
case 14: return {_mm256_srli_si256(u.v256[0], 14)}; break;
case 15: return {_mm256_srli_si256(u.v256[0], 15)}; break;
case 16: return {_mm256_srli_si256(u.v256[0], 16)}; break;
case 17: return {_mm256_srli_si256(u.v256[0], 17)}; break;
case 18: return {_mm256_srli_si256(u.v256[0], 18)}; break;
case 19: return {_mm256_srli_si256(u.v256[0], 19)}; break;
case 20: return {_mm256_srli_si256(u.v256[0], 20)}; break;
case 21: return {_mm256_srli_si256(u.v256[0], 21)}; break;
case 22: return {_mm256_srli_si256(u.v256[0], 22)}; break;
case 23: return {_mm256_srli_si256(u.v256[0], 23)}; break;
case 24: return {_mm256_srli_si256(u.v256[0], 24)}; break;
case 25: return {_mm256_srli_si256(u.v256[0], 25)}; break;
case 26: return {_mm256_srli_si256(u.v256[0], 26)}; break;
case 27: return {_mm256_srli_si256(u.v256[0], 27)}; break;
case 28: return {_mm256_srli_si256(u.v256[0], 28)}; break;
case 29: return {_mm256_srli_si256(u.v256[0], 29)}; break;
case 30: return {_mm256_srli_si256(u.v256[0], 30)}; break;
case 31: return {_mm256_srli_si256(u.v256[0], 31)}; break;
default: break;
}
return *this;

View File

@ -198,6 +198,8 @@ public:
SuperVector pshufb(SuperVector b);
SuperVector lshift64(uint8_t const N);
SuperVector rshift64(uint8_t const N);
SuperVector lshift128(uint8_t const N);
SuperVector rshift128(uint8_t const N);
// Constants
static SuperVector Ones();
@ -225,7 +227,7 @@ public:
printf("\n");
}
void printv_u64(const char *label) {
void print64(const char *label) {
printf("%12s: ", label);
for(s16 i=SIZE/sizeof(u64a)-1; i >= 0; i--)
printf("%016lx ", u.u64[i]);
@ -235,7 +237,7 @@ public:
void print8(const char *label UNUSED) {};
void print16(const char *label UNUSED) {};
void print32(const char *label UNUSED) {};
void printv_u64(const char *label UNUSED) {};
void print64(const char *label UNUSED) {};
#endif
};

View File

@ -578,6 +578,22 @@ TEST(SuperVectorUtilsTest,LShift256c){
TEST_LSHIFT256(buf, vec, SP, 14);
TEST_LSHIFT256(buf, vec, SP, 15);
TEST_LSHIFT256(buf, vec, SP, 16);
TEST_LSHIFT256(buf, vec, SP, 17);
TEST_LSHIFT256(buf, vec, SP, 18);
TEST_LSHIFT256(buf, vec, SP, 19);
TEST_LSHIFT256(buf, vec, SP, 20);
TEST_LSHIFT256(buf, vec, SP, 21);
TEST_LSHIFT256(buf, vec, SP, 22);
TEST_LSHIFT256(buf, vec, SP, 23);
TEST_LSHIFT256(buf, vec, SP, 24);
TEST_LSHIFT256(buf, vec, SP, 25);
TEST_LSHIFT256(buf, vec, SP, 26);
TEST_LSHIFT256(buf, vec, SP, 27);
TEST_LSHIFT256(buf, vec, SP, 28);
TEST_LSHIFT256(buf, vec, SP, 29);
TEST_LSHIFT256(buf, vec, SP, 30);
TEST_LSHIFT256(buf, vec, SP, 31);
TEST_LSHIFT256(buf, vec, SP, 32);
}
/*
@ -640,6 +656,22 @@ TEST(SuperVectorUtilsTest,RShift256c){
TEST_RSHIFT256(buf, vec, SP, 14);
TEST_RSHIFT256(buf, vec, SP, 15);
TEST_RSHIFT256(buf, vec, SP, 16);
TEST_RSHIFT256(buf, vec, SP, 17);
TEST_RSHIFT256(buf, vec, SP, 18);
TEST_RSHIFT256(buf, vec, SP, 19);
TEST_RSHIFT256(buf, vec, SP, 20);
TEST_RSHIFT256(buf, vec, SP, 21);
TEST_RSHIFT256(buf, vec, SP, 22);
TEST_RSHIFT256(buf, vec, SP, 23);
TEST_RSHIFT256(buf, vec, SP, 24);
TEST_RSHIFT256(buf, vec, SP, 25);
TEST_RSHIFT256(buf, vec, SP, 26);
TEST_RSHIFT256(buf, vec, SP, 27);
TEST_RSHIFT256(buf, vec, SP, 28);
TEST_RSHIFT256(buf, vec, SP, 29);
TEST_RSHIFT256(buf, vec, SP, 30);
TEST_RSHIFT256(buf, vec, SP, 31);
TEST_RSHIFT256(buf, vec, SP, 32);
}
@ -647,7 +679,9 @@ TEST(SuperVectorUtilsTest,RShift256c){
/*
#define TEST_ALIGNR256(v1, v2, buf, l) { \
auto v_aligned = v2.alignr(v1, l); \
v_aligned.print8("v_aligned");\
for (size_t i=0; i<32; i++) { \
printf("vec[%ld] = %02x\n", i+l, vec[i+l]);\
ASSERT_EQ(v_aligned.u.u8[i], vec[i + l]); \
} \
}
@ -676,6 +710,22 @@ TEST(SuperVectorUtilsTest,Alignr256c){
TEST_ALIGNR256(SP1, SP2, vec, 14);
TEST_ALIGNR256(SP1, SP2, vec, 15);
TEST_ALIGNR256(SP1, SP2, vec, 16);
TEST_ALIGNR256(SP1, SP2, vec, 17);
TEST_ALIGNR256(SP1, SP2, vec, 18);
TEST_ALIGNR256(SP1, SP2, vec, 19);
TEST_ALIGNR256(SP1, SP2, vec, 20);
TEST_ALIGNR256(SP1, SP2, vec, 21);
TEST_ALIGNR256(SP1, SP2, vec, 22);
TEST_ALIGNR256(SP1, SP2, vec, 23);
TEST_ALIGNR256(SP1, SP2, vec, 24);
TEST_ALIGNR256(SP1, SP2, vec, 25);
TEST_ALIGNR256(SP1, SP2, vec, 26);
TEST_ALIGNR256(SP1, SP2, vec, 27);
TEST_ALIGNR256(SP1, SP2, vec, 28);
TEST_ALIGNR256(SP1, SP2, vec, 29);
TEST_ALIGNR256(SP1, SP2, vec, 30);
TEST_ALIGNR256(SP1, SP2, vec, 31);
TEST_ALIGNR256(SP1, SP2, vec, 32);
}
*/