arm supervector explicit constructor

This commit is contained in:
gtsoul-tech 2024-05-14 09:53:08 +03:00
parent ee8bc59ee0
commit 3d60d4f3be
2 changed files with 38 additions and 38 deletions

View File

@ -159,13 +159,13 @@ really_inline SuperVector<16>::SuperVector(uint64_t const other)
template<> template<>
really_inline SuperVector<16> SuperVector<16>::Ones(void) really_inline SuperVector<16> SuperVector<16>::Ones(void)
{ {
return {vdupq_n_u8(0xFF)}; return SuperVector<16>(vdupq_n_u8(0xFF));
} }
template<> template<>
really_inline SuperVector<16> SuperVector<16>::Zeroes(void) really_inline SuperVector<16> SuperVector<16>::Zeroes(void)
{ {
return {vdupq_n_u8(0)}; return SuperVector<16>(vdupq_n_u8(0));
} }
// Methods // Methods
@ -179,37 +179,37 @@ really_inline void SuperVector<16>::operator=(SuperVector<16> const &other)
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const
{ {
return {vandq_u8(u.u8x16[0], b.u.u8x16[0])}; return SuperVector<16>(vandq_u8(u.u8x16[0], b.u.u8x16[0]));
} }
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const
{ {
return {vorrq_u8(u.u8x16[0], b.u.u8x16[0])}; return SuperVector<16>(vorrq_u8(u.u8x16[0], b.u.u8x16[0]));
} }
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const
{ {
return {veorq_u8(u.u8x16[0], b.u.u8x16[0])}; return SuperVector<16>(veorq_u8(u.u8x16[0], b.u.u8x16[0]));
} }
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator!() const really_inline SuperVector<16> SuperVector<16>::operator!() const
{ {
return {vmvnq_u8(u.u8x16[0])}; return SuperVector<16>(vmvnq_u8(u.u8x16[0]));
} }
template <> template <>
really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const
{ {
return {vandq_u8(vmvnq_u8(u.u8x16[0]), b.u.u8x16[0])}; return SuperVector<16>(vandq_u8(vmvnq_u8(u.u8x16[0]), b.u.u8x16[0]));
} }
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const
{ {
return {vceqq_u8(u.u8x16[0], b.u.u8x16[0])}; return SuperVector<16>(vceqq_u8(u.u8x16[0], b.u.u8x16[0]));
} }
template <> template <>
@ -221,25 +221,25 @@ really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const
{ {
return {vcgtq_s8(u.s8x16[0], b.u.s8x16[0])}; return SuperVector<16>(vcgtq_s8(u.s8x16[0], b.u.s8x16[0]));
} }
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const &b) const
{ {
return {vcgeq_u8(u.u8x16[0], b.u.u8x16[0])}; return SuperVector<16>(vcgeq_u8(u.u8x16[0], b.u.u8x16[0]));
} }
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const
{ {
return {vcltq_s8(u.s8x16[0], b.u.s8x16[0])}; return SuperVector<16>(vcltq_s8(u.s8x16[0], b.u.s8x16[0]));
} }
template <> template <>
really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const &b) const really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const &b) const
{ {
return {vcgeq_s8(u.s8x16[0], b.u.s8x16[0])}; return SuperVector<16>(vcgeq_s8(u.s8x16[0], b.u.s8x16[0]));
} }
template <> template <>
@ -274,35 +274,35 @@ template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_8_imm() const really_inline SuperVector<16> SuperVector<16>::vshl_8_imm() const
{ {
return {vshlq_n_u8(u.u8x16[0], N)}; return SuperVector<16>(vshlq_n_u8(u.u8x16[0], N));
} }
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const
{ {
return {vshlq_n_u16(u.u16x8[0], N)}; return SuperVector<16>(vshlq_n_u16(u.u16x8[0], N));
} }
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const
{ {
return {vshlq_n_u32(u.u32x4[0], N)}; return SuperVector<16>(vshlq_n_u32(u.u32x4[0], N));
} }
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const
{ {
return {vshlq_n_u64(u.u64x2[0], N)}; return SuperVector<16>(vshlq_n_u64(u.u64x2[0], N));
} }
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const
{ {
return {vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)}; return SuperVector<16>(vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N));
} }
template <> template <>
@ -316,35 +316,35 @@ template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_8_imm() const really_inline SuperVector<16> SuperVector<16>::vshr_8_imm() const
{ {
return {vshrq_n_u8(u.u8x16[0], N)}; return SuperVector<16>(vshrq_n_u8(u.u8x16[0], N));
} }
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const
{ {
return {vshrq_n_u16(u.u16x8[0], N)}; return SuperVector<16>(vshrq_n_u16(u.u16x8[0], N));
} }
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const
{ {
return {vshrq_n_u32(u.u32x4[0], N)}; return SuperVector<16>(vshrq_n_u32(u.u32x4[0], N));
} }
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const
{ {
return {vshrq_n_u64(u.u64x2[0], N)}; return SuperVector<16>(vshrq_n_u64(u.u64x2[0], N));
} }
template <> template <>
template<uint8_t N> template<uint8_t N>
really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const
{ {
return {vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)}; return SuperVector<16>(vextq_u8(u.u8x16[0], vdupq_n_u8(0), N));
} }
template <> template <>
@ -376,7 +376,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 8) return Zeroes(); if (N == 8) return Zeroes();
int8x16_t shift_indices = vdupq_n_s8(N); int8x16_t shift_indices = vdupq_n_s8(N);
return { vshlq_s8(u.s8x16[0], shift_indices) }; return SuperVector<16>(vshlq_s8(u.s8x16[0], shift_indices));
} }
template <> template <>
@ -385,7 +385,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
int16x8_t shift_indices = vdupq_n_s16(N); int16x8_t shift_indices = vdupq_n_s16(N);
return { vshlq_s16(u.s16x8[0], shift_indices) }; return SuperVector<16>(vshlq_s16(u.s16x8[0], shift_indices));
} }
template <> template <>
@ -394,7 +394,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 32) return Zeroes(); if (N == 32) return Zeroes();
int32x4_t shift_indices = vdupq_n_s32(N); int32x4_t shift_indices = vdupq_n_s32(N);
return { vshlq_s32(u.s32x4[0], shift_indices) }; return SuperVector<16>(vshlq_s32(u.s32x4[0], shift_indices));
} }
template <> template <>
@ -403,7 +403,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 64) return Zeroes(); if (N == 64) return Zeroes();
int64x2_t shift_indices = vdupq_n_s64(N); int64x2_t shift_indices = vdupq_n_s64(N);
return { vshlq_s64(u.s64x2[0], shift_indices) }; return SuperVector<16>(vshlq_s64(u.s64x2[0], shift_indices));
} }
template <> template <>
@ -413,11 +413,11 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
#if defined(HAVE__BUILTIN_CONSTANT_P) #if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(N)) { if (__builtin_constant_p(N)) {
return {vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)}; return SuperVector<16>(vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N));
} }
#endif #endif
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(vdupq_n_u8(0), v->u.u8x16[0], 16 - n)}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = SuperVector<16>(vextq_u8(vdupq_n_u8(0), v->u.u8x16[0], 16 - n)); });
return result; return result;
} }
@ -433,7 +433,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 8) return Zeroes(); if (N == 8) return Zeroes();
int8x16_t shift_indices = vdupq_n_s8(-N); int8x16_t shift_indices = vdupq_n_s8(-N);
return { vshlq_s8(u.s8x16[0], shift_indices) }; return SuperVector<16>(vshlq_s8(u.s8x16[0], shift_indices));
} }
template <> template <>
@ -442,7 +442,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
int16x8_t shift_indices = vdupq_n_s16(-N); int16x8_t shift_indices = vdupq_n_s16(-N);
return { vshlq_s16(u.s16x8[0], shift_indices) }; return SuperVector<16>(vshlq_s16(u.s16x8[0], shift_indices));
} }
template <> template <>
@ -451,7 +451,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 32) return Zeroes(); if (N == 32) return Zeroes();
int32x4_t shift_indices = vdupq_n_s32(-N); int32x4_t shift_indices = vdupq_n_s32(-N);
return { vshlq_s32(u.s32x4[0], shift_indices) }; return SuperVector<16>(vshlq_s32(u.s32x4[0], shift_indices));
} }
template <> template <>
@ -460,7 +460,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const
if (N == 0) return *this; if (N == 0) return *this;
if (N == 64) return Zeroes(); if (N == 64) return Zeroes();
int64x2_t shift_indices = vdupq_n_s64(-N); int64x2_t shift_indices = vdupq_n_s64(-N);
return { vshlq_s64(u.s64x2[0], shift_indices) }; return SuperVector<16>(vshlq_s64(u.s64x2[0], shift_indices));
} }
template <> template <>
@ -470,11 +470,11 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const
if (N == 16) return Zeroes(); if (N == 16) return Zeroes();
#if defined(HAVE__BUILTIN_CONSTANT_P) #if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(N)) { if (__builtin_constant_p(N)) {
return {vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)}; return SuperVector<16>(vextq_u8(u.u8x16[0], vdupq_n_u8(0), N));
} }
#endif #endif
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(v->u.u8x16[0], vdupq_n_u8(0), n)}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = SuperVector<16>(vextq_u8(v->u.u8x16[0], vdupq_n_u8(0), n)); });
return result; return result;
} }
@ -537,11 +537,11 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in
if (offset == 16) return *this; if (offset == 16) return *this;
#if defined(HAVE__BUILTIN_CONSTANT_P) #if defined(HAVE__BUILTIN_CONSTANT_P)
if (__builtin_constant_p(offset)) { if (__builtin_constant_p(offset)) {
return {vextq_u8(other.u.u8x16[0], u.u8x16[0], offset)}; return SuperVector<16>(vextq_u8(other.u.u8x16[0], u.u8x16[0], offset));
} }
#endif #endif
SuperVector result; SuperVector result;
Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (offset == n) result = {vextq_u8(other.u.u8x16[0], v->u.u8x16[0], n)}; }); Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (offset == n) result = SuperVector<16>(vextq_u8(other.u.u8x16[0], v->u.u8x16[0], n)); });
return result; return result;
} }
@ -549,7 +549,7 @@ template<>
template<> template<>
really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b) really_inline SuperVector<16> SuperVector<16>::pshufb<false>(SuperVector<16> b)
{ {
return {vqtbl1q_u8(u.u8x16[0], b.u.u8x16[0])}; return SuperVector<16>(vqtbl1q_u8(u.u8x16[0], b.u.u8x16[0]));
} }
template<> template<>

View File

@ -214,7 +214,7 @@ public:
SuperVector(previous_type const lo, previous_type const hi); SuperVector(previous_type const lo, previous_type const hi);
static SuperVector dup_u8 (uint8_t other) { return {SuperVector(other)}; }; static SuperVector dup_u8 (uint8_t other) { return {SuperVector(other)}; };
static SuperVector dup_s8 (int8_t other) { return {other}; }; static SuperVector dup_s8 (int8_t other) { return {SuperVector(other)}; };
static SuperVector dup_u16(uint16_t other) { return {other}; }; static SuperVector dup_u16(uint16_t other) { return {other}; };
static SuperVector dup_s16(int16_t other) { return {other}; }; static SuperVector dup_s16(int16_t other) { return {other}; };
static SuperVector dup_u32(uint32_t other) { return {other}; }; static SuperVector dup_u32(uint32_t other) { return {other}; };