provide an {l,r}shift128_var() to fix immediate value build failure in loadu_maskz

This commit is contained in:
Konstantinos Margaritis 2021-07-21 10:20:40 +00:00
parent 825460856f
commit ebb1b84ae3
2 changed files with 105 additions and 99 deletions

View File

@ -198,19 +198,8 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::eqmask(Su
return eq(b).movemask(); return eq(b).movemask();
} }
#ifdef HS_OPTIMIZE
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::rshift128_var(uint8_t const N) const
{
if (N >= 16) {
return Zeroes();
} else {
return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), N)};
}
}
#else
template <>
really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
{ {
switch(N) { switch(N) {
case 1: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 1)}; break; case 1: return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), 1)}; break;
@ -233,21 +222,23 @@ really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
} }
return *this; return *this;
} }
#endif
#ifdef HS_OPTIMIZE #ifdef HS_OPTIMIZE
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
{ {
if (N == 0) { return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), N)};
return *this;
} else {
return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 16 - N)};
}
} }
#else #else
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const
{
return rshift128_var(N);
}
#endif
template <>
really_inline SuperVector<16> SuperVector<16>::lshift128_var(uint8_t const N) const
{ {
switch(N) { switch(N) {
case 1: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 15)}; break; case 1: return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 15)}; break;
@ -270,6 +261,19 @@ really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
} }
return *this; return *this;
} }
#ifdef HS_OPTIMIZE
template <>
really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
{
return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 16 - N)};
}
#else
template <>
really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const
{
return lshift128_var(N);
}
#endif #endif
template <> template <>
@ -289,7 +293,7 @@ really_inline SuperVector<16> SuperVector<16>::load(void const *ptr)
template <> template <>
really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len) really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len)
{ {
SuperVector<16> mask = Ones() >> (16 -len); SuperVector<16> mask = Ones().rshift128_var(16 -len);
mask.print8("mask"); mask.print8("mask");
SuperVector<16> v = loadu(ptr); SuperVector<16> v = loadu(ptr);
v.print8("v"); v.print8("v");

View File

@ -200,6 +200,8 @@ public:
SuperVector rshift64(uint8_t const N); SuperVector rshift64(uint8_t const N);
SuperVector lshift128(uint8_t const N); SuperVector lshift128(uint8_t const N);
SuperVector rshift128(uint8_t const N); SuperVector rshift128(uint8_t const N);
SuperVector lshift128_var(uint8_t const N) const;
SuperVector rshift128_var(uint8_t const N) const;
// Constants // Constants
static SuperVector Ones(); static SuperVector Ones();