diff --git a/src/util/supervector/arch/arm/impl.cpp b/src/util/supervector/arch/arm/impl.cpp index 55f6c55c..845ccea0 100644 --- a/src/util/supervector/arch/arm/impl.cpp +++ b/src/util/supervector/arch/arm/impl.cpp @@ -159,13 +159,13 @@ really_inline SuperVector<16>::SuperVector(uint64_t const other) template<> really_inline SuperVector<16> SuperVector<16>::Ones(void) { - return {vdupq_n_u8(0xFF)}; + return SuperVector<16>(vdupq_n_u8(0xFF)); } template<> really_inline SuperVector<16> SuperVector<16>::Zeroes(void) { - return {vdupq_n_u8(0)}; + return SuperVector<16>(vdupq_n_u8(0)); } // Methods @@ -179,37 +179,37 @@ really_inline void SuperVector<16>::operator=(SuperVector<16> const &other) template <> really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const { - return {vandq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vandq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const { - return {vorrq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vorrq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const { - return {veorq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(veorq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator!() const { - return {vmvnq_u8(u.u8x16[0])}; + return SuperVector<16>(vmvnq_u8(u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const { - return {vandq_u8(vmvnq_u8(u.u8x16[0]), b.u.u8x16[0])}; + return SuperVector<16>(vandq_u8(vmvnq_u8(u.u8x16[0]), b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const { - return {vceqq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vceqq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> @@ -221,25 +221,25 @@ really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const template <> really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const { - return {vcgtq_s8(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vcgtq_s8(u.s8x16[0], b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const &b) const { - return {vcgeq_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vcgeq_u8(u.u8x16[0], b.u.u8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const { - return {vcltq_s8(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vcltq_s8(u.s8x16[0], b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const &b) const { - return {vcgeq_s8(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vcgeq_s8(u.s8x16[0], b.u.s8x16[0])); } template <> @@ -274,35 +274,35 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshl_8_imm() const { - return {vshlq_n_u8(u.u8x16[0], N)}; + return SuperVector<16>(vshlq_n_u8(u.u8x16[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const { - return {vshlq_n_u16(u.u16x8[0], N)}; + return SuperVector<16>(vshlq_n_u16(u.u16x8[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const { - return {vshlq_n_u32(u.u32x4[0], N)}; + return SuperVector<16>(vshlq_n_u32(u.u32x4[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const { - return {vshlq_n_u64(u.u64x2[0], N)}; + return SuperVector<16>(vshlq_n_u64(u.u64x2[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const { - return {vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)}; + return SuperVector<16>(vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)); } template <> @@ -316,35 +316,35 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshr_8_imm() const { - return {vshrq_n_u8(u.u8x16[0], N)}; + return SuperVector<16>(vshrq_n_u8(u.u8x16[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const { - return {vshrq_n_u16(u.u16x8[0], N)}; + return SuperVector<16>(vshrq_n_u16(u.u16x8[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const { - return {vshrq_n_u32(u.u32x4[0], N)}; + return SuperVector<16>(vshrq_n_u32(u.u32x4[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const { - return {vshrq_n_u64(u.u64x2[0], N)}; + return SuperVector<16>(vshrq_n_u64(u.u64x2[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const { - return {vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)}; + return SuperVector<16>(vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)); } template <> @@ -376,7 +376,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const if (N == 0) return *this; if (N == 8) return Zeroes(); int8x16_t shift_indices = vdupq_n_s8(N); - return { vshlq_s8(u.s8x16[0], shift_indices) }; + return SuperVector<16>(vshlq_s8(u.s8x16[0], shift_indices)); } template <> @@ -385,7 +385,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); int16x8_t shift_indices = vdupq_n_s16(N); - return { vshlq_s16(u.s16x8[0], shift_indices) }; + return SuperVector<16>(vshlq_s16(u.s16x8[0], shift_indices)); } template <> @@ -394,7 +394,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); int32x4_t shift_indices = vdupq_n_s32(N); - return { vshlq_s32(u.s32x4[0], shift_indices) }; + return SuperVector<16>(vshlq_s32(u.s32x4[0], shift_indices)); } template <> @@ -403,7 +403,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); int64x2_t shift_indices = vdupq_n_s64(N); - return { vshlq_s64(u.s64x2[0], shift_indices) }; + return SuperVector<16>(vshlq_s64(u.s64x2[0], shift_indices)); } template <> @@ -413,11 +413,11 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const if (N == 16) return Zeroes(); #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)}; + return SuperVector<16>(vextq_u8(vdupq_n_u8(0), u.u8x16[0], 16 - N)); } #endif SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(vdupq_n_u8(0), v->u.u8x16[0], 16 - n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = SuperVector<16>(vextq_u8(vdupq_n_u8(0), v->u.u8x16[0], 16 - n)); }); return result; } @@ -433,7 +433,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const if (N == 0) return *this; if (N == 8) return Zeroes(); int8x16_t shift_indices = vdupq_n_s8(-N); - return { vshlq_s8(u.s8x16[0], shift_indices) }; + return SuperVector<16>(vshlq_s8(u.s8x16[0], shift_indices)); } template <> @@ -442,7 +442,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); int16x8_t shift_indices = vdupq_n_s16(-N); - return { vshlq_s16(u.s16x8[0], shift_indices) }; + return SuperVector<16>(vshlq_s16(u.s16x8[0], shift_indices)); } template <> @@ -451,7 +451,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); int32x4_t shift_indices = vdupq_n_s32(-N); - return { vshlq_s32(u.s32x4[0], shift_indices) }; + return SuperVector<16>(vshlq_s32(u.s32x4[0], shift_indices)); } template <> @@ -460,7 +460,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); int64x2_t shift_indices = vdupq_n_s64(-N); - return { vshlq_s64(u.s64x2[0], shift_indices) }; + return SuperVector<16>(vshlq_s64(u.s64x2[0], shift_indices)); } template <> @@ -470,11 +470,11 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const if (N == 16) return Zeroes(); #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)}; + return SuperVector<16>(vextq_u8(u.u8x16[0], vdupq_n_u8(0), N)); } #endif SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_u8(v->u.u8x16[0], vdupq_n_u8(0), n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = SuperVector<16>(vextq_u8(v->u.u8x16[0], vdupq_n_u8(0), n)); }); return result; } @@ -511,7 +511,7 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N) template <> really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr) { - return {vld1q_s32((const int32_t *)ptr)}; + return {SuperVector<16>(vld1q_s32((const int32_t *)ptr))}; } template <> @@ -519,7 +519,7 @@ really_inline SuperVector<16> SuperVector<16>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); ptr = vectorscan_assume_aligned(ptr, SuperVector::size); - return {vld1q_s32((const int32_t *)ptr)}; + return {SuperVector<16>(vld1q_s32((const int32_t *)ptr))}; } template <> @@ -537,11 +537,11 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in if (offset == 16) return *this; #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(offset)) { - return {vextq_u8(other.u.u8x16[0], u.u8x16[0], offset)}; + return SuperVector<16>(vextq_u8(other.u.u8x16[0], u.u8x16[0], offset)); } #endif SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (offset == n) result = {vextq_u8(other.u.u8x16[0], v->u.u8x16[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (offset == n) result = SuperVector<16>(vextq_u8(other.u.u8x16[0], v->u.u8x16[0], n)); }); return result; } @@ -549,7 +549,7 @@ template<> template<> really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) { - return {vqtbl1q_u8(u.u8x16[0], b.u.u8x16[0])}; + return SuperVector<16>(vqtbl1q_u8(u.u8x16[0], b.u.u8x16[0])); } template<> diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index de7c73fa..3b2cfa6b 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -183,13 +183,13 @@ really_inline SuperVector<16>::SuperVector(uint64_t const other) template<> really_inline SuperVector<16> SuperVector<16>::Ones(void) { - return { vec_splat_s8(-1)}; + return SuperVector<16>(vec_splat_s8(-1)); } template<> really_inline SuperVector<16> SuperVector<16>::Zeroes(void) { - return { vec_splat_s8(0) }; + return SuperVector<16>(vec_splat_s8(0)); } // Methods @@ -203,38 +203,38 @@ really_inline void SuperVector<16>::operator=(SuperVector<16> const &other) template <> really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const { - return { vec_and(u.v128[0], b.u.v128[0]) }; + return SuperVector<16>(vec_and(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const { - return { vec_or(u.v128[0], b.u.v128[0]) }; + return SuperVector<16>(vec_or(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const { - return { vec_xor(u.v128[0], b.u.v128[0]) }; + return SuperVector<16>(vec_xor(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator!() const { - return { vec_xor(u.v128[0], u.v128[0]) }; + return SuperVector<16>(vec_xor(u.v128[0], u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const { int8x16_t not_res = vec_xor(u.s8x16[0], vec_splat_s8(-1)); - return { vec_and(not_res, b.u.s8x16[0]) }; + return SuperVector<16>(vec_and(not_res, b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const { - return { vec_cmpeq(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vec_cmpeq(u.s8x16[0], b.u.s8x16[0])); } template <> @@ -246,25 +246,25 @@ really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const template <> really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const { - return { vec_cmpgt(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vec_cmpgt(u.s8x16[0], b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const &b) const { - return { vec_cmpge(u.s8x16[0], b.u.s8x16[0])}; + return SuperVector<16>(vec_cmpge(u.s8x16[0], b.u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const { - return { vec_cmpgt(b.u.s8x16[0], u.s8x16[0])}; + return SuperVector<16>(vec_cmpgt(b.u.s8x16[0], u.s8x16[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const &b) const { - return { vec_cmpge(b.u.s8x16[0], u.s8x16[0])}; + return SuperVector<16>(vec_cmpge(b.u.s8x16[0], u.s8x16[0])); } template <> @@ -310,35 +310,35 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshl_8_imm() const { - return { vec_sl(u.s8x16[0], vec_splat_u8(N)) }; + return SuperVector<16>(vec_sl(u.s8x16[0], vec_splat_u8(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const { - return { vec_sl(u.s16x8[0], vec_splat_u16(N)) }; + return SuperVector<16>(vec_sl(u.s16x8[0], vec_splat_u16(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const { - return { vec_sl(u.s32x4[0], vec_splat_u32(N)) }; + return SuperVector<16>(vec_sl(u.s32x4[0], vec_splat_u32(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const { - return { vec_sl(u.s64x2[0], vec_splats((ulong64_t) N)) }; + return SuperVector<16>(vec_sl(u.s64x2[0], vec_splats((ulong64_t) N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const { - return { vec_sld(u.s8x16[0], vec_splat_s8(0), N)}; + return SuperVector<16>(vec_sld(u.s8x16[0], vec_splat_s8(0), N)); } template <> @@ -352,35 +352,35 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshr_8_imm() const { - return { vec_sr(u.s8x16[0], vec_splat_u8(N)) }; + return SuperVector<16>(vec_sr(u.s8x16[0], vec_splat_u8(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const { - return { vec_sr(u.s16x8[0], vec_splat_u16(N)) }; + return SuperVector<16>(vec_sr(u.s16x8[0], vec_splat_u16(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const { - return { vec_sr(u.s32x4[0], vec_splat_u32(N)) }; + return SuperVector<16>(vec_sr(u.s32x4[0], vec_splat_u32(N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const { - return { vec_sr(u.s64x2[0], vec_splats((ulong64_t)N)) }; + return SuperVector<16>(vec_sr(u.s64x2[0], vec_splats((ulong64_t)N))); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const { - return { vec_sld(vec_splat_s8(0), u.s8x16[0], 16 - N) }; + return SuperVector<16>(vec_sld(vec_splat_s8(0), u.s8x16[0], 16 - N)); } template <> @@ -411,7 +411,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const { if (N == 0) return *this; uint8x16_t shift_indices = vec_splats((uint8_t) N); - return { vec_sl(u.u8x16[0], shift_indices) }; + return SuperVector<16>(vec_sl(u.u8x16[0], shift_indices)); } template <> @@ -419,7 +419,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const UNUSED N) { if (N == 0) return *this; uint16x8_t shift_indices = vec_splats((uint16_t) N); - return { vec_sl(u.u16x8[0], shift_indices) }; + return SuperVector<16>(vec_sl(u.u16x8[0], shift_indices)); } template <> @@ -427,7 +427,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const { if (N == 0) return *this; uint32x4_t shift_indices = vec_splats((uint32_t) N); - return { vec_sl(u.u32x4[0], shift_indices) }; + return SuperVector<16>(vec_sl(u.u32x4[0], shift_indices)); } template <> @@ -435,7 +435,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const { if (N == 0) return *this; uint64x2_t shift_indices = vec_splats((ulong64_t) N); - return { vec_sl(u.u64x2[0], shift_indices) }; + return SuperVector<16>(vec_sl(u.u64x2[0], shift_indices)); } template <> @@ -443,7 +443,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const { if (N == 0) return *this; SuperVector sl{N << 3}; - return { vec_slo(u.u8x16[0], sl.u.u8x16[0]) }; + return SuperVector<16>(vec_slo(u.u8x16[0], sl.u.u8x16[0])); } template <> @@ -457,7 +457,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const { if (N == 0) return *this; uint8x16_t shift_indices = vec_splats((uint8_t) N); - return { vec_sr(u.u8x16[0], shift_indices) }; + return SuperVector<16>(vec_sr(u.u8x16[0], shift_indices)); } template <> @@ -465,7 +465,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const { if (N == 0) return *this; uint16x8_t shift_indices = vec_splats((uint16_t) N); - return { vec_sr(u.u16x8[0], shift_indices) }; + return SuperVector<16>(vec_sr(u.u16x8[0], shift_indices)); } template <> @@ -473,7 +473,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const { if (N == 0) return *this; uint32x4_t shift_indices = vec_splats((uint32_t) N); - return { vec_sr(u.u32x4[0], shift_indices) }; + return SuperVector<16>(vec_sr(u.u32x4[0], shift_indices)); } template <> @@ -481,7 +481,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const { if (N == 0) return *this; uint64x2_t shift_indices = vec_splats((ulong64_t) N); - return { vec_sr(u.u64x2[0], shift_indices) }; + return SuperVector<16>(vec_sr(u.u64x2[0], shift_indices)); } template <> @@ -489,7 +489,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const { if (N == 0) return *this; SuperVector sr{N << 3}; - return { vec_sro(u.u8x16[0], sr.u.u8x16[0]) }; + return SuperVector<16>(vec_sro(u.u8x16[0], sr.u.u8x16[0])); } template <> @@ -504,7 +504,7 @@ really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const #if defined(HAVE__BUILTIN_CONSTANT_P) if (N == 0) return *this; if (__builtin_constant_p(N)) { - return { vec_sld(vec_splat_s8(0), u.s8x16[0], 16 - N) }; + return SuperVector<16>(vec_sld(vec_splat_s8(0), u.s8x16[0], 16 - N)); } #endif return vshr_128(N); @@ -516,7 +516,7 @@ really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const #if defined(HAVE__BUILTIN_CONSTANT_P) if (N == 0) return *this; if (__builtin_constant_p(N)) { - return { vec_sld(u.s8x16[0], vec_splat_s8(0), N)}; + return SuperVector<16>(vec_sld(u.s8x16[0], vec_splat_s8(0), N)); } #endif return vshl_128(N); @@ -537,14 +537,14 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N) template <> really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr) { - return { vec_xl(0, (const long64_t*)ptr) }; + return SuperVector<16>(vec_xl(0, (const long64_t*)ptr)); } template <> really_inline SuperVector<16> SuperVector<16>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); - return { vec_xl(0, (const long64_t*)ptr) }; + return SuperVector<16>(vec_xl(0, (const long64_t*)ptr)); } template <> @@ -562,14 +562,14 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in if (offset == 16) return *this; #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(offset)) { - return { vec_sld(u.s8x16[0], other.u.s8x16[0], offset) }; + return SuperVector<16>(vec_sld(u.s8x16[0], other.u.s8x16[0], offset)); } #endif uint8x16_t sl = vec_splats((uint8_t) (offset << 3)); uint8x16_t sr = vec_splats((uint8_t) ((16 - offset) << 3)); uint8x16_t rhs = vec_slo(u.u8x16[0], sr); uint8x16_t lhs = vec_sro(other.u.u8x16[0], sl); - return { vec_or(lhs, rhs) }; + return SuperVector<16>(vec_or(lhs, rhs)); } template<> @@ -581,7 +581,7 @@ really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) below is the version that is converted from Intel to PPC. */ uint8x16_t mask =(uint8x16_t)vec_cmpge(b.u.u8x16[0], vec_splats((uint8_t)0x80)); uint8x16_t res = vec_perm (u.u8x16[0], u.u8x16[0], b.u.u8x16[0]); - return { vec_sel(res, vec_splat_u8(0), mask) }; + return SuperVector<16>(vec_sel(res, vec_splat_u8(0), mask)); } template<> diff --git a/src/util/supervector/arch/x86/impl.cpp b/src/util/supervector/arch/x86/impl.cpp index d83f6792..6a7dfa3d 100644 --- a/src/util/supervector/arch/x86/impl.cpp +++ b/src/util/supervector/arch/x86/impl.cpp @@ -113,13 +113,13 @@ really_inline SuperVector<16>::SuperVector(uint64_t const other) template<> really_inline SuperVector<16> SuperVector<16>::Ones() { - return {_mm_set1_epi8(0xFF)}; + return SuperVector<16>(_mm_set1_epi8(0xFF)); } template<> really_inline SuperVector<16> SuperVector<16>::Zeroes(void) { - return {_mm_set1_epi8(0)}; + return SuperVector<16>(_mm_set1_epi8(0)); } // Methods @@ -133,37 +133,37 @@ really_inline void SuperVector<16>::operator=(SuperVector<16> const &other) template <> really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const { - return {_mm_and_si128(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_and_si128(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const { - return {_mm_or_si128(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_or_si128(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const { - return {_mm_xor_si128(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_xor_si128(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator!() const { - return {_mm_xor_si128(u.v128[0], u.v128[0])}; + return SuperVector<16>(_mm_xor_si128(u.v128[0], u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const { - return {_mm_andnot_si128(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_andnot_si128(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const { - return {_mm_cmpeq_epi8(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_cmpeq_epi8(u.v128[0], b.u.v128[0])); } template <> @@ -175,13 +175,13 @@ really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const template <> really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const &b) const { - return {_mm_cmpgt_epi8(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_cmpgt_epi8(u.v128[0], b.u.v128[0])); } template <> really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const &b) const { - return {_mm_cmplt_epi8(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_cmplt_epi8(u.v128[0], b.u.v128[0])); } template <> @@ -235,28 +235,28 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const { - return {_mm_slli_epi16(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi16(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const { - return {_mm_slli_epi32(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi32(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const { - return {_mm_slli_epi64(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi64(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const { - return {_mm_slli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_si128(u.v128[0], N)); } template <> @@ -277,28 +277,28 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const { - return {_mm_srli_epi16(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi16(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const { - return {_mm_srli_epi32(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi32(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const { - return {_mm_srli_epi64(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi64(u.v128[0], N)); } template <> template really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const { - return {_mm_srli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_si128(u.v128[0], N)); } template <> @@ -333,13 +333,13 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_slli_epi16(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi16(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_epi16(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_epi16(v->u.v128[0], n))}; }); return result; } @@ -348,13 +348,13 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_slli_epi32(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi32(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_epi32(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_epi32(v->u.v128[0], n))}; }); return result; } @@ -363,13 +363,13 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_slli_epi64(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_epi64(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_epi64(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_epi64(v->u.v128[0], n))}; }); return result; } @@ -378,13 +378,13 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND) if (__builtin_constant_p(N)) { - return {_mm_slli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_si128(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_slli_si128(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_slli_si128(v->u.v128[0], n))}; }); return result; } @@ -408,13 +408,13 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_srli_epi16(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi16(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_epi16(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_epi16(v->u.v128[0], n))}; }); return result; } @@ -423,13 +423,13 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_srli_epi32(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi32(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_epi32(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_epi32(v->u.v128[0], n))}; }); return result; } @@ -438,13 +438,13 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) if (__builtin_constant_p(N)) { - return {_mm_srli_epi64(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_epi64(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_epi64(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_epi64(v->u.v128[0], n))}; }); return result; } @@ -453,13 +453,13 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND) if (__builtin_constant_p(N)) { - return {_mm_srli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_si128(u.v128[0], N)); } #endif if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm_srli_si128(v->u.v128[0], n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<16>(_mm_srli_si128(v->u.v128[0], n))}; }); return result; } @@ -474,7 +474,7 @@ really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND) if (__builtin_constant_p(N)) { - return {_mm_srli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_srli_si128(u.v128[0], N)); } #endif return vshr_128(N); @@ -485,7 +485,7 @@ really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const { #if defined(HAVE__BUILTIN_CONSTANT_P) && !defined(VS_SIMDE_BACKEND) if (__builtin_constant_p(N)) { - return {_mm_slli_si128(u.v128[0], N)}; + return SuperVector<16>(_mm_slli_si128(u.v128[0], N)); } #endif return vshl_128(N); @@ -508,7 +508,7 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N) template <> really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr) { - return _mm_loadu_si128((const m128 *)ptr); + return SuperVector<16>(_mm_loadu_si128((const m128 *)ptr)); } template <> @@ -516,14 +516,14 @@ really_inline SuperVector<16> SuperVector<16>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); ptr = vectorscan_assume_aligned(ptr, SuperVector::size); - return _mm_load_si128((const m128 *)ptr); + return SuperVector<16>(_mm_load_si128((const m128 *)ptr)); } template <> really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len) { SuperVector mask = Ones_vshr(16 -len); - SuperVector v = _mm_loadu_si128((const m128 *)ptr); + SuperVector v = SuperVector<16>(_mm_loadu_si128((const m128 *)ptr)); return mask & v; } @@ -535,27 +535,27 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in if (offset == 16) { return *this; } else { - return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], offset)}; + return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], offset)); } } #endif switch(offset) { case 0: return other; break; - case 1: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 1)}; break; - case 2: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 2)}; break; - case 3: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 3)}; break; - case 4: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 4)}; break; - case 5: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 5)}; break; - case 6: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 6)}; break; - case 7: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 7)}; break; - case 8: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 8)}; break; - case 9: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 9)}; break; - case 10: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 10)}; break; - case 11: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 11)}; break; - case 12: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 12)}; break; - case 13: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 13)}; break; - case 14: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 14)}; break; - case 15: return {_mm_alignr_epi8(u.v128[0], other.u.v128[0], 15)}; break; + case 1: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 1)); break; + case 2: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 2)); break; + case 3: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 3)); break; + case 4: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 4)); break; + case 5: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 5)); break; + case 6: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 6)); break; + case 7: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 7)); break; + case 8: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 8)); break; + case 9: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 9)); break; + case 10: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 10)); break; + case 11: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 11)); break; + case 12: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 12)); break; + case 13: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 13)); break; + case 14: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 14)); break; + case 15: return SuperVector<16>(_mm_alignr_epi8(u.v128[0], other.u.v128[0], 15)); break; default: break; } return *this; @@ -565,7 +565,7 @@ template<> template<> really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) { - return {_mm_shuffle_epi8(u.v128[0], b.u.v128[0])}; + return SuperVector<16>(_mm_shuffle_epi8(u.v128[0], b.u.v128[0])); } template<> @@ -673,13 +673,13 @@ really_inline SuperVector<32>::SuperVector(uint64_t const other) template<> really_inline SuperVector<32> SuperVector<32>::Ones(void) { - return {_mm256_set1_epi8(0xFF)}; + return SuperVector<32>(_mm256_set1_epi8(0xFF)); } template<> really_inline SuperVector<32> SuperVector<32>::Zeroes(void) { - return {_mm256_set1_epi8(0)}; + return SuperVector<32>(_mm256_set1_epi8(0)); } template <> @@ -691,37 +691,37 @@ really_inline void SuperVector<32>::operator=(SuperVector<32> const &other) template <> really_inline SuperVector<32> SuperVector<32>::operator&(SuperVector<32> const &b) const { - return {_mm256_and_si256(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_and_si256(u.v256[0], b.u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::operator|(SuperVector<32> const &b) const { - return {_mm256_or_si256(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_or_si256(u.v256[0], b.u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::operator^(SuperVector<32> const &b) const { - return {_mm256_xor_si256(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_xor_si256(u.v256[0], b.u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::operator!() const { - return {_mm256_xor_si256(u.v256[0], u.v256[0])}; + return SuperVector<32>(_mm256_xor_si256(u.v256[0], u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::opandnot(SuperVector<32> const &b) const { - return {_mm256_andnot_si256(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_andnot_si256(u.v256[0], b.u.v256[0])); } template <> really_inline SuperVector<32> SuperVector<32>::operator==(SuperVector<32> const &b) const { - return {_mm256_cmpeq_epi8(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_cmpeq_epi8(u.v256[0], b.u.v256[0])); } template <> @@ -733,7 +733,7 @@ really_inline SuperVector<32> SuperVector<32>::operator!=(SuperVector<32> const template <> really_inline SuperVector<32> SuperVector<32>::operator>(SuperVector<32> const &b) const { - return {_mm256_cmpgt_epi8(u.v256[0], b.u.v256[0])}; + return SuperVector<32>(_mm256_cmpgt_epi8(u.v256[0], b.u.v256[0])); } template <> @@ -793,28 +793,28 @@ template <> template really_inline SuperVector<32> SuperVector<32>::vshl_16_imm() const { - return {_mm256_slli_epi16(u.v256[0], N)}; + return {SuperVector<32>(_mm256_slli_epi16(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshl_32_imm() const { - return {_mm256_slli_epi32(u.v256[0], N)}; + return {SuperVector<32>(_mm256_slli_epi32(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshl_64_imm() const { - return {_mm256_slli_epi64(u.v256[0], N)}; + return {SuperVector<32>(_mm256_slli_epi64(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshl_128_imm() const { - return {_mm256_slli_si256(u.v256[0], N)}; + return {SuperVector<32>(_mm256_slli_si256(u.v256[0], N))}; } template <> @@ -822,12 +822,12 @@ template really_inline SuperVector<32> SuperVector<32>::vshl_256_imm() const { if (N == 0) return *this; - if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))}; + if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)))}; if (N == 32) return Zeroes(); if (N < 16) { - return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)}; + return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))}; } else { - return {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16)}; + return {SuperVector<32>(_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16))}; } } @@ -849,28 +849,28 @@ template <> template really_inline SuperVector<32> SuperVector<32>::vshr_16_imm() const { - return {_mm256_srli_epi16(u.v256[0], N)}; + return {SuperVector<32>(_mm256_srli_epi16(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshr_32_imm() const { - return {_mm256_srli_epi32(u.v256[0], N)}; + return {SuperVector<32>(_mm256_srli_epi32(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshr_64_imm() const { - return {_mm256_srli_epi64(u.v256[0], N)}; + return {SuperVector<32>(_mm256_srli_epi64(u.v256[0], N))}; } template <> template really_inline SuperVector<32> SuperVector<32>::vshr_128_imm() const { - return {_mm256_srli_si256(u.v256[0], N)}; + return {SuperVector<32>(_mm256_srli_si256(u.v256[0], N))}; } template <> @@ -878,12 +878,12 @@ template really_inline SuperVector<32> SuperVector<32>::vshr_256_imm() const { if (N == 0) return *this; - if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1))}; + if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))}; if (N == 32) return Zeroes(); if (N < 16) { - return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)}; + return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))}; } else { - return {_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16)}; + return {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16))}; } } @@ -922,7 +922,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_16 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_epi16(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_epi16(v->u.v256[0], n))}; }); return result; } @@ -932,7 +932,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_32 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_epi32(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_epi32(v->u.v256[0], n))}; }); return result; } @@ -942,7 +942,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_64 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_epi64(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_epi64(v->u.v256[0], n))}; }); return result; } @@ -952,7 +952,7 @@ really_inline SuperVector<32> SuperVector<32>::vshl_128(uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_slli_si256(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_slli_si256(v->u.v256[0], n))}; }); return result; } @@ -960,16 +960,16 @@ template <> really_inline SuperVector<32> SuperVector<32>::vshl_256(uint8_t const N) const { if (N == 0) return *this; - if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))}; + if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)))}; if (N == 32) return Zeroes(); SuperVector result; Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; - if (N == n) result = {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n)};; + if (N == n) result = {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - n))};; }); Unroller<17, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; - if (N == n) result = {_mm256_slli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16)}; + if (N == n) result = {SuperVector<32>(_mm256_slli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), n - 16))}; }); return result; } @@ -995,7 +995,7 @@ really_inline SuperVector<32> SuperVector<32>::vshr_16 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_epi16(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_epi16(v->u.v256[0], n))}; }); return result; } @@ -1005,7 +1005,7 @@ really_inline SuperVector<32> SuperVector<32>::vshr_32 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_epi32(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_epi32(v->u.v256[0], n))}; }); return result; } @@ -1015,7 +1015,7 @@ really_inline SuperVector<32> SuperVector<32>::vshr_64 (uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_epi64(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_epi64(v->u.v256[0], n))}; }); return result; } @@ -1025,7 +1025,7 @@ really_inline SuperVector<32> SuperVector<32>::vshr_128(uint8_t const N) const if (N == 0) return *this; if (N == 32) return Zeroes(); SuperVector result; - Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm256_srli_si256(v->u.v256[0], n)}; }); + Unroller<1, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<32>(_mm256_srli_si256(v->u.v256[0], n))}; }); return result; } @@ -1033,16 +1033,16 @@ template <> really_inline SuperVector<32> SuperVector<32>::vshr_256(uint8_t const N) const { if (N == 0) return *this; - if (N == 16) return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1))}; + if (N == 16) return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))}; if (N == 32) return Zeroes(); SuperVector result; Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; - if (N == n) result = {_mm256_alignr_epi8(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), v->u.v256[0], n)}; + if (N == n) result = {SuperVector<32>(_mm256_alignr_epi8(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), v->u.v256[0], n))}; }); Unroller<17, 32>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; - if (N == n) result = {_mm256_srli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), n - 16)}; + if (N == n) result = {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(v->u.v256[0], v->u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), n - 16))}; }); return result; } @@ -1060,11 +1060,11 @@ really_inline SuperVector<32> SuperVector<32>::operator>>(uint8_t const N) const if (__builtin_constant_p(N)) { // As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx if (N < 16) { - return {_mm256_alignr_epi8(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), u.v256[0], N)}; + return {SuperVector<32>(_mm256_alignr_epi8(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), u.v256[0], N))}; } else if (N == 16) { - return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1))}; + return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)))}; } else { - return {_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16)}; + return {SuperVector<32>(_mm256_srli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(2, 0, 0, 1)), N - 16))}; } } #endif @@ -1078,11 +1078,11 @@ really_inline SuperVector<32> SuperVector<32>::operator<<(uint8_t const N) const if (__builtin_constant_p(N)) { // As found here: https://stackoverflow.com/questions/25248766/emulating-shifts-on-32-bytes-with-avx if (N < 16) { - return {_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N)}; + return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], _mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), 16 - N))}; } else if (N == 16) { - return {_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0))}; + return {SuperVector<32>(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)))}; } else { - return {_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16)}; + return {SuperVector<32>(_mm256_slli_si256(_mm256_permute2x128_si256(u.v256[0], u.v256[0], _MM_SHUFFLE(0, 0, 2, 0)), N - 16))}; } } #endif @@ -1112,7 +1112,7 @@ really_inline SuperVector<32> SuperVector<32>::Ones_vshl(uint8_t const N) template <> really_inline SuperVector<32> SuperVector<32>::loadu(void const *ptr) { - return {_mm256_loadu_si256((const m256 *)ptr)}; + return {SuperVector<32>(_mm256_loadu_si256((const m256 *)ptr))}; } template <> @@ -1120,7 +1120,7 @@ really_inline SuperVector<32> SuperVector<32>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); ptr = vectorscan_assume_aligned(ptr, SuperVector::size); - return {_mm256_load_si256((const m256 *)ptr)}; + return {SuperVector<32>(_mm256_load_si256((const m256 *)ptr))}; } template <> @@ -1128,7 +1128,7 @@ really_inline SuperVector<32> SuperVector<32>::loadu_maskz(void const *ptr, uint { #ifdef HAVE_AVX512 u32 mask = (~0ULL) >> (32 - len); - SuperVector<32> v = _mm256_mask_loadu_epi8(Zeroes().u.v256[0], mask, (const m256 *)ptr); + SuperVector<32> v = SuperVector<32>(_mm256_mask_loadu_epi8(Zeroes().u.v256[0], mask, (const m256 *)ptr)); v.print8("v"); return v; #else @@ -1136,7 +1136,7 @@ really_inline SuperVector<32> SuperVector<32>::loadu_maskz(void const *ptr, uint SuperVector<32> mask = Ones_vshr(32 -len); mask.print8("mask"); (Ones() >> (32 - len)).print8("mask"); - SuperVector<32> v = _mm256_loadu_si256((const m256 *)ptr); + SuperVector<32> v = SuperVector<32>(_mm256_loadu_si256((const m256 *)ptr)); v.print8("v"); return mask & v; #endif @@ -1150,44 +1150,44 @@ really_inline SuperVector<32> SuperVector<32>::alignr(SuperVector<32> &other, in if (offset == 16) { return *this; } else { - return {_mm256_alignr_epi8(u.v256[0], other.u.v256[0], offset)}; + return {SuperVector<32>(_mm256_alignr_epi8(u.v256[0], other.u.v256[0], offset))}; } } #endif // As found here: https://stackoverflow.com/questions/8517970/mm-alignr-epi8-palignr-equivalent-in-avx2#8637458 switch (offset){ - case 0 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 0), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 0)); break; - case 1 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 1), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 1)); break; - case 2 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 2), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 2)); break; - case 3 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 3), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 3)); break; - case 4 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 4), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 4)); break; - case 5 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 5), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 5)); break; - case 6 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 6), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 6)); break; - case 7 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 7), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 7)); break; - case 8 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 8), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 8)); break; - case 9 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 9), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 9)); break; - case 10 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 10), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 10)); break; - case 11 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 11), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 11)); break; - case 12 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 12), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 12)); break; - case 13 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 13), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 13)); break; - case 14 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 14), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 14)); break; - case 15 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 15), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 15)); break; - case 16 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 0), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 0)); break; - case 17 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 1), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 1)); break; - case 18 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 2), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 2)); break; - case 19 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 3), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 3)); break; - case 20 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 4), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 4)); break; - case 21 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 5), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 5)); break; - case 22 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 6), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 6)); break; - case 23 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 7), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 7)); break; - case 24 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 8), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 8)); break; - case 25 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 9), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 9)); break; - case 26 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 10), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 10)); break; - case 27 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 11), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 11)); break; - case 28 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 12), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 12)); break; - case 29 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 13), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 13)); break; - case 30 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 14), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 14)); break; - case 31 : return _mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 15), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 15)); break; + case 0 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 0), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 0))); break; + case 1 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 1), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 1))); break; + case 2 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 2), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 2))); break; + case 3 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 3), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 3))); break; + case 4 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 4), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 4))); break; + case 5 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 5), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 5))); break; + case 6 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 6), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 6))); break; + case 7 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 7), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 7))); break; + case 8 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 8), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 8))); break; + case 9 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 9), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 9))); break; + case 10 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 10), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 10))); break; + case 11 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 11), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 11))); break; + case 12 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 12), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 12))); break; + case 13 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 13), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 13))); break; + case 14 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 14), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 14))); break; + case 15 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[0], other.u.v128[1], 15), _mm_alignr_epi8(other.u.v128[1], other.u.v128[0], 15))); break; + case 16 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 0), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 0))); break; + case 17 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 1), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 1))); break; + case 18 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 2), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 2))); break; + case 19 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 3), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 3))); break; + case 20 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 4), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 4))); break; + case 21 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 5), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 5))); break; + case 22 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 6), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 6))); break; + case 23 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 7), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 7))); break; + case 24 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 8), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 8))); break; + case 25 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 9), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 9))); break; + case 26 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 10), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 10))); break; + case 27 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 11), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 11))); break; + case 28 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 12), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 12))); break; + case 29 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 13), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 13))); break; + case 30 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 14), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 14))); break; + case 31 : return SuperVector<32>(_mm256_set_m128i(_mm_alignr_epi8(u.v128[1], u.v128[0], 15), _mm_alignr_epi8(u.v128[0], other.u.v128[1], 15))); break; default: break; } return *this; @@ -1197,7 +1197,7 @@ template<> template<> really_inline SuperVector<32> SuperVector<32>::pshufb(SuperVector<32> b) { - return {_mm256_shuffle_epi8(u.v256[0], b.u.v256[0])}; + return {SuperVector<32>(_mm256_shuffle_epi8(u.v256[0], b.u.v256[0]))}; } template<> @@ -1313,13 +1313,13 @@ really_inline SuperVector<64>::SuperVector(uint64_t const o) template<> really_inline SuperVector<64> SuperVector<64>::Ones(void) { - return {_mm512_set1_epi8(0xFF)}; + return {SuperVector<64>(_mm512_set1_epi8(0xFF))}; } template<> really_inline SuperVector<64> SuperVector<64>::Zeroes(void) { - return {_mm512_set1_epi8(0)}; + return {SuperVector<64>(_mm512_set1_epi8(0))}; } // Methods @@ -1332,31 +1332,31 @@ really_inline void SuperVector<64>::operator=(SuperVector<64> const &o) template <> really_inline SuperVector<64> SuperVector<64>::operator&(SuperVector<64> const &b) const { - return {_mm512_and_si512(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_and_si512(u.v512[0], b.u.v512[0]))}; } template <> really_inline SuperVector<64> SuperVector<64>::operator|(SuperVector<64> const &b) const { - return {_mm512_or_si512(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_or_si512(u.v512[0], b.u.v512[0]))}; } template <> really_inline SuperVector<64> SuperVector<64>::operator^(SuperVector<64> const &b) const { - return {_mm512_xor_si512(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_xor_si512(u.v512[0], b.u.v512[0]))}; } template <> really_inline SuperVector<64> SuperVector<64>::operator!() const { - return {_mm512_xor_si512(u.v512[0], u.v512[0])}; + return {SuperVector<64>(_mm512_xor_si512(u.v512[0], u.v512[0]))}; } template <> really_inline SuperVector<64> SuperVector<64>::opandnot(SuperVector<64> const &b) const { - return {_mm512_andnot_si512(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_andnot_si512(u.v512[0], b.u.v512[0]))}; } template <> @@ -1364,7 +1364,7 @@ really_inline SuperVector<64> SuperVector<64>::operator==(SuperVector<64> const { SuperVector<64>::comparemask_type mask = _mm512_cmpeq_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1372,7 +1372,7 @@ really_inline SuperVector<64> SuperVector<64>::operator!=(SuperVector<64> const { SuperVector<64>::comparemask_type mask = _mm512_cmpneq_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1380,7 +1380,7 @@ really_inline SuperVector<64> SuperVector<64>::operator>(SuperVector<64> const & { SuperVector<64>::comparemask_type mask = _mm512_cmpgt_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1388,7 +1388,7 @@ really_inline SuperVector<64> SuperVector<64>::operator<(SuperVector<64> const & { SuperVector<64>::comparemask_type mask = _mm512_cmplt_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1396,7 +1396,7 @@ really_inline SuperVector<64> SuperVector<64>::operator>=(SuperVector<64> const { SuperVector<64>::comparemask_type mask = _mm512_cmpge_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1404,7 +1404,7 @@ really_inline SuperVector<64> SuperVector<64>::operator<=(SuperVector<64> const { SuperVector<64>::comparemask_type mask = _mm512_cmple_epi8_mask(u.v512[0], b.u.v512[0]); - return {_mm512_movm_epi8(mask)}; + return {SuperVector<64>(_mm512_movm_epi8(mask))}; } template <> @@ -1448,28 +1448,28 @@ template <> template really_inline SuperVector<64> SuperVector<64>::vshl_16_imm() const { - return {_mm512_slli_epi16(u.v512[0], N)}; + return {SuperVector<64>(_mm512_slli_epi16(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshl_32_imm() const { - return {_mm512_slli_epi32(u.v512[0], N)}; + return {SuperVector<64>(_mm512_slli_epi32(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshl_64_imm() const { - return {_mm512_slli_epi64(u.v512[0], N)}; + return {SuperVector<64>(_mm512_slli_epi64(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshl_128_imm() const { - return {_mm512_bslli_epi128(u.v512[0], N)}; + return {SuperVector<64>(_mm512_bslli_epi128(u.v512[0], N))}; } template <> @@ -1504,28 +1504,28 @@ template <> template really_inline SuperVector<64> SuperVector<64>::vshr_16_imm() const { - return {_mm512_srli_epi16(u.v512[0], N)}; + return {SuperVector<64>(_mm512_srli_epi16(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshr_32_imm() const { - return {_mm512_srli_epi32(u.v512[0], N)}; + return {SuperVector<64>(_mm512_srli_epi32(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshr_64_imm() const { - return {_mm512_srli_epi64(u.v512[0], N)}; + return {SuperVector<64>(_mm512_srli_epi64(u.v512[0], N))}; } template <> template really_inline SuperVector<64> SuperVector<64>::vshr_128_imm() const { - return {_mm512_bsrli_epi128(u.v512[0], N)}; + return {SuperVector<64>(_mm512_bsrli_epi128(u.v512[0], N))}; } template <> @@ -1574,7 +1574,7 @@ really_inline SuperVector<64> SuperVector<64>::vshl_16 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_slli_epi16(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_slli_epi16(v->u.v512[0], n))}; }); return result; } @@ -1584,7 +1584,7 @@ really_inline SuperVector<64> SuperVector<64>::vshl_32 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_slli_epi32(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_slli_epi32(v->u.v512[0], n))}; }); return result; } @@ -1594,7 +1594,7 @@ really_inline SuperVector<64> SuperVector<64>::vshl_64 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_slli_epi64(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_slli_epi64(v->u.v512[0], n))}; }); return result; } @@ -1604,7 +1604,7 @@ really_inline SuperVector<64> SuperVector<64>::vshl_128(uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_bslli_epi128(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_bslli_epi128(v->u.v512[0], n))}; }); return result; } @@ -1641,7 +1641,7 @@ really_inline SuperVector<64> SuperVector<64>::vshr_16 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_srli_epi16(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_srli_epi16(v->u.v512[0], n))}; }); return result; } @@ -1651,7 +1651,7 @@ really_inline SuperVector<64> SuperVector<64>::vshr_32 (uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_srli_epi32(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_srli_epi32(v->u.v512[0], n))}; }); return result; } @@ -1661,7 +1661,7 @@ really_inline SuperVector<64> SuperVector<64>::vshr_64 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_srli_epi64(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_srli_epi64(v->u.v512[0], n))}; }); return result; } @@ -1671,7 +1671,7 @@ really_inline SuperVector<64> SuperVector<64>::vshr_128(uint8_t const N) const if (N == 0) return *this; if (N == 64) return Zeroes(); SuperVector result; - Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {_mm512_bsrli_epi128(v->u.v512[0], n)}; }); + Unroller<1, 64>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {SuperVector<64>(_mm512_bsrli_epi128(v->u.v512[0], n))}; }); return result; } @@ -1719,18 +1719,18 @@ really_inline SuperVector<64> SuperVector<64>::operator>>(uint8_t const N) const if (N == 0) { return *this; } else if (N < 32) { - SuperVector<32> lo256 = u.v256[0]; - SuperVector<32> hi256 = u.v256[1]; + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); SuperVector<32> carry = hi256 << (32 - N); hi256 = hi256 >> N; lo256 = (lo256 >> N) | carry; - return SuperVector(lo256, hi256); + return SuperVector<64>(lo256, hi256); } else if (N == 32) { - SuperVector<32> hi256 = u.v256[1]; - return SuperVector(hi256, SuperVector<32>::Zeroes()); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); + return SuperVector<64>(hi256, SuperVector<32>::Zeroes()); } else if (N < 64) { - SuperVector<32> hi256 = u.v256[1]; - return SuperVector(hi256 >> (N - 32), SuperVector<32>::Zeroes()); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); + return SuperVector<64>(hi256 >> (N - 32), SuperVector<32>::Zeroes()); } else { return Zeroes(); } @@ -1742,18 +1742,18 @@ really_inline SuperVector<64> SuperVector<64>::operator<<(uint8_t const N) const if (N == 0) { return *this; } else if (N < 32) { - SuperVector<32> lo256 = u.v256[0]; - SuperVector<32> hi256 = u.v256[1]; + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); SuperVector<32> carry = lo256 >> (32 - N); hi256 = (hi256 << N) | carry; lo256 = lo256 << N; - return SuperVector(lo256, hi256); + return SuperVector<64>(lo256, hi256); } else if (N == 32) { - SuperVector<32> lo256 = u.v256[0]; - return SuperVector(SuperVector<32>::Zeroes(), lo256); + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + return SuperVector<64>(SuperVector<32>::Zeroes(), lo256); } else if (N < 64) { - SuperVector<32> lo256 = u.v256[0]; - return SuperVector(SuperVector<32>::Zeroes(), lo256 << (N - 32)); + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + return SuperVector<64>(SuperVector<32>::Zeroes(), lo256 << (N - 32)); } else { return Zeroes(); } @@ -1762,7 +1762,7 @@ really_inline SuperVector<64> SuperVector<64>::operator<<(uint8_t const N) const template <> really_inline SuperVector<64> SuperVector<64>::loadu(void const *ptr) { - return {_mm512_loadu_si512((const m512 *)ptr)}; + return {SuperVector<64>(_mm512_loadu_si512((const m512 *)ptr))}; } template <> @@ -1770,7 +1770,7 @@ really_inline SuperVector<64> SuperVector<64>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); ptr = vectorscan_assume_aligned(ptr, SuperVector::size); - return {_mm512_load_si512((const m512 *)ptr)}; + return {SuperVector<64>(_mm512_load_si512((const m512 *)ptr))}; } template <> @@ -1778,7 +1778,7 @@ really_inline SuperVector<64> SuperVector<64>::loadu_maskz(void const *ptr, uint { u64a mask = (~0ULL) >> (64 - len); DEBUG_PRINTF("mask = %016llx\n", mask); - SuperVector<64> v = _mm512_mask_loadu_epi8(Zeroes().u.v512[0], mask, (const m512 *)ptr); + SuperVector<64> v = SuperVector<64>(_mm512_mask_loadu_epi8(Zeroes().u.v512[0], mask, (const m512 *)ptr)); v.print8("v"); return v; } @@ -1787,7 +1787,7 @@ template<> template<> really_inline SuperVector<64> SuperVector<64>::pshufb(SuperVector<64> b) { - return {_mm512_shuffle_epi8(u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_shuffle_epi8(u.v512[0], b.u.v512[0]))}; } template<> @@ -1795,7 +1795,7 @@ really_inline SuperVector<64> SuperVector<64>::pshufb_maskz(SuperVector<64> b, u { u64a mask = (~0ULL) >> (64 - len); DEBUG_PRINTF("mask = %016llx\n", mask); - return {_mm512_maskz_shuffle_epi8(mask, u.v512[0], b.u.v512[0])}; + return {SuperVector<64>(_mm512_maskz_shuffle_epi8(mask, u.v512[0], b.u.v512[0]))}; } template<> @@ -1806,26 +1806,26 @@ really_inline SuperVector<64> SuperVector<64>::alignr(SuperVector<64> &l, int8_t if (offset == 16) { return *this; } else { - return {_mm512_alignr_epi8(u.v512[0], l.u.v512[0], offset)}; + return {SuperVector<64>(_mm512_alignr_epi8(u.v512[0], l.u.v512[0], offset))}; } } #endif if(offset == 0) { return *this; } else if (offset < 32){ - SuperVector<32> lo256 = u.v256[0]; - SuperVector<32> hi256 = u.v256[1]; - SuperVector<32> o_lo256 = l.u.v256[0]; - SuperVector<32> carry1 = hi256.alignr(lo256,offset); - SuperVector<32> carry2 = o_lo256.alignr(hi256,offset); - return SuperVector(carry1, carry2); + SuperVector<32> lo256 = SuperVector<32>(u.v256[0]); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); + SuperVector<32> o_lo256 = SuperVector<32>(l.u.v256[0]); + SuperVector<32> carry1 = SuperVector<32>(hi256.alignr(lo256,offset)); + SuperVector<32> carry2 = SuperVector<32>(o_lo256.alignr(hi256,offset)); + return SuperVector<64>(carry1, carry2); } else if (offset <= 64){ - SuperVector<32> hi256 = u.v256[1]; - SuperVector<32> o_lo256 = l.u.v256[0]; - SuperVector<32> o_hi256 = l.u.v256[1]; - SuperVector<32> carry1 = o_lo256.alignr(hi256, offset - 32); - SuperVector<32> carry2 = o_hi256.alignr(o_lo256,offset -32); - return SuperVector(carry1, carry2); + SuperVector<32> hi256 = SuperVector<32>(u.v256[1]); + SuperVector<32> o_lo256 = SuperVector<32>(l.u.v256[0]); + SuperVector<32> o_hi256 = SuperVector<32>(l.u.v256[1]); + SuperVector<32> carry1 = SuperVector<32>(o_lo256.alignr(hi256, offset - 32)); + SuperVector<32> carry2 = SuperVector<32>(o_hi256.alignr(o_lo256,offset -32)); + return SuperVector<64>(carry1, carry2); } else { return *this; } diff --git a/src/util/supervector/supervector.hpp b/src/util/supervector/supervector.hpp index 5e2de235..0601b937 100644 --- a/src/util/supervector/supervector.hpp +++ b/src/util/supervector/supervector.hpp @@ -205,21 +205,21 @@ public: constexpr SuperVector() {}; SuperVector(SuperVector const &other) :u(other.u) {}; - SuperVector(typename base_type::type const v); + explicit SuperVector(typename base_type::type const v); template - SuperVector(T const other); + explicit SuperVector(T const other); SuperVector(SuperVector const lo, SuperVector const hi); SuperVector(previous_type const lo, previous_type const hi); - static SuperVector dup_u8 (uint8_t other) { return {other}; }; - static SuperVector dup_s8 (int8_t other) { return {other}; }; + static SuperVector dup_u8 (uint8_t other) { return {SuperVector(other)}; }; + static SuperVector dup_s8 (int8_t other) { return {SuperVector(other)}; }; static SuperVector dup_u16(uint16_t other) { return {other}; }; static SuperVector dup_s16(int16_t other) { return {other}; }; static SuperVector dup_u32(uint32_t other) { return {other}; }; static SuperVector dup_s32(int32_t other) { return {other}; }; - static SuperVector dup_u64(uint64_t other) { return {other}; }; + static SuperVector dup_u64(uint64_t other) { return {SuperVector(other)}; }; static SuperVector dup_s64(int64_t other) { return {other}; }; void operator=(SuperVector const &other); diff --git a/unit/internal/shuffle.cpp b/unit/internal/shuffle.cpp index deb85e9f..a812071a 100644 --- a/unit/internal/shuffle.cpp +++ b/unit/internal/shuffle.cpp @@ -210,14 +210,14 @@ TEST(Shuffle, PackedExtract_templatized_128_1) { SuperVector<16> permute = SuperVector<16>::Zeroes(); SuperVector<16> compare = SuperVector<16>::Zeroes(); build_pshufb_masks_onebit(i, &permute.u.v128[0], &compare.u.v128[0]); - EXPECT_EQ(1U, packedExtract<16>(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract<16>(SuperVector<16>(setbit(i)), permute, compare)); EXPECT_EQ(1U, packedExtract<16>(SuperVector<16>::Ones(), permute, compare)); // we should get zero out of these cases EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>::Zeroes(), permute, compare)); - EXPECT_EQ(0U, packedExtract<16>(not128(setbit(i)), permute, compare)); + EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>(not128(setbit(i))), permute, compare)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 128); j++) { - EXPECT_EQ(0U, packedExtract<16>(setbit(j), permute, compare)); + EXPECT_EQ(0U, packedExtract<16>(SuperVector<16>(setbit(j)), permute, compare)); } } } @@ -251,14 +251,14 @@ TEST(Shuffle, PackedExtract_templatized_256_1) { SuperVector<32> permute = SuperVector<32>::Zeroes(); SuperVector<32> compare = SuperVector<32>::Zeroes(); build_pshufb_masks_onebit(i, &permute.u.v256[0], &compare.u.v256[0]); - EXPECT_EQ(1U, packedExtract<32>(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract<32>(SuperVector<32>(setbit(i)), permute, compare)); EXPECT_EQ(1U, packedExtract<32>(SuperVector<32>::Ones(), permute, compare)); // we should get zero out of these cases EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>::Zeroes(), permute, compare)); - EXPECT_EQ(0U, packedExtract<32>(not256(setbit(i)), permute, compare)); + EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>(not256(setbit(i))), permute, compare)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 256); j++) { - EXPECT_EQ(0U, packedExtract<32>(setbit(j), permute, compare)); + EXPECT_EQ(0U, packedExtract<32>(SuperVector<32>(setbit(j)), permute, compare)); } } } @@ -291,14 +291,14 @@ TEST(Shuffle, PackedExtract_templatized_512_1) { SuperVector<64> permute = SuperVector<64>::Zeroes(); SuperVector<64> compare = SuperVector<64>::Zeroes(); build_pshufb_masks_onebit(i, &permute.u.v512[0], &compare.u.v512[0]); - EXPECT_EQ(1U, packedExtract<64>(setbit(i), permute, compare)); + EXPECT_EQ(1U, packedExtract<64>(SuperVector<64>(setbit(i)), permute, compare)); EXPECT_EQ(1U, packedExtract<64>(SuperVector<64>::Ones(), permute, compare)); // we should get zero out of these cases EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>::Zeroes(), permute, compare)); - EXPECT_EQ(0U, packedExtract<64>(not512(setbit(i)), permute, compare)); + EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>(not512(setbit(i))), permute, compare)); // we should get zero out of all the other bit positions for (unsigned int j = 0; (j != i && j < 512); j++) { - EXPECT_EQ(0U, packedExtract<64>(setbit(j), permute, compare)); + EXPECT_EQ(0U, packedExtract<64>(SuperVector<64>(setbit(j)), permute, compare)); } } }