diff --git a/src/util/arch/ppc64el/simd_types.h b/src/util/arch/ppc64el/simd_types.h index 21dae5cb..8a5b0e25 100644 --- a/src/util/arch/ppc64el/simd_types.h +++ b/src/util/arch/ppc64el/simd_types.h @@ -30,7 +30,7 @@ #define ARCH_PPC64EL_SIMD_TYPES_H #if !defined(m128) && defined(HAVE_VSX) -typedef __vector int32_t m128; +typedef __vector int m128; #endif #endif /* ARCH_PPC64EL_SIMD_TYPES_H */ diff --git a/src/util/arch/ppc64el/simd_utils.h b/src/util/arch/ppc64el/simd_utils.h index 137fc94f..d046ed47 100644 --- a/src/util/arch/ppc64el/simd_utils.h +++ b/src/util/arch/ppc64el/simd_utils.h @@ -43,6 +43,18 @@ #include // for memcpy +typedef __vector unsigned long long int uint64x2_t; +typedef __vector signed long long int int64x2_t; +typedef __vector unsigned int uint32x4_t; +typedef __vector signed int int32x4_t; +typedef __vector unsigned short int uint16x8_t; +typedef __vector signed short int int16x8_t; +typedef __vector unsigned char uint8x16_t; +typedef __vector signed char int8x16_t; + +typedef unsigned long long int ulong64_t; +typedef signed long long int long64_t; +/* typedef __vector uint64_t uint64x2_t; typedef __vector int64_t int64x2_t; typedef __vector uint32_t uint32x4_t; @@ -50,7 +62,7 @@ typedef __vector int32_t int32x4_t; typedef __vector uint16_t uint16x8_t; typedef __vector int16_t int16x8_t; typedef __vector uint8_t uint8x16_t; -typedef __vector int8_t int8x16_t; +typedef __vector int8_t int8x16_t;*/ #define ZEROES_8 0, 0, 0, 0, 0, 0, 0, 0 @@ -182,13 +194,13 @@ m128 rshift_m128(m128 a, unsigned b) { static really_really_inline m128 lshift64_m128(m128 a, unsigned b) { - uint64x2_t shift_indices = vec_splats((uint64_t)b); + uint64x2_t shift_indices = vec_splats((ulong64_t)b); return (m128) vec_sl((int64x2_t)a, shift_indices); } static really_really_inline m128 rshift64_m128(m128 a, unsigned b) { - uint64x2_t shift_indices = vec_splats((uint64_t)b); + uint64x2_t shift_indices = vec_splats((ulong64_t)b); return (m128) vec_sr((int64x2_t)a, shift_indices); } @@ -213,11 +225,11 @@ static really_inline u32 movemask128(m128 a) { uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2); uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28)); - uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff)); + uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff)); uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3); uint64x2_t ss4 = vec_sld((uint64x2_t)vec_splats(0), s4, 9); - uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff)); + uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff)); uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4); return s5[0]; diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index e054e02e..109b8d5e 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -39,16 +39,6 @@ #include "util/supervector/supervector.hpp" #include - -typedef __vector uint64_t uint64x2_t; -typedef __vector int64_t int64x2_t; -typedef __vector uint32_t uint32x4_t; -typedef __vector int32_t int32x4_t; -typedef __vector uint16_t uint16x8_t; -typedef __vector int16_t int16x8_t; -typedef __vector uint8_t uint8x16_t; -typedef __vector int8_t int8x16_t; - // 128-bit Powerpc64le implementation template<> @@ -65,58 +55,58 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v) template<> template<> -really_inline SuperVector<16>::SuperVector(int8_t const other) +really_inline SuperVector<16>::SuperVector(int8_t const other) { u.v128[0] = (m128) vec_splats(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(uint8_t const other) +really_inline SuperVector<16>::SuperVector(uint8_t const other) { u.v128[0] = (m128) vec_splats(static_cast(other)); } template<> template<> -really_inline SuperVector<16>::SuperVector(int16_t const other) +really_inline SuperVector<16>::SuperVector(int16_t const other) { u.v128[0] = (m128) vec_splats(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(uint16_t const other) +really_inline SuperVector<16>::SuperVector(uint16_t const other) { u.v128[0] = (m128) vec_splats(static_cast(other)); } template<> template<> -really_inline SuperVector<16>::SuperVector(int32_t const other) +really_inline SuperVector<16>::SuperVector(int32_t const other) { u.v128[0] = (m128) vec_splats(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(uint32_t const other) +really_inline SuperVector<16>::SuperVector(uint32_t const other) { u.v128[0] = (m128) vec_splats(static_cast(other)); } template<> template<> -really_inline SuperVector<16>::SuperVector(int64_t const other) +really_inline SuperVector<16>::SuperVector(int64_t const other) { - u.v128[0] = (m128) vec_splats(other); + u.v128[0] = (m128) vec_splats(static_cast(other)); } template<> template<> -really_inline SuperVector<16>::SuperVector(uint64_t const other) +really_inline SuperVector<16>::SuperVector(uint64_t const other) { - u.v128[0] = (m128) vec_splats(static_cast(other)); + u.v128[0] = (m128) vec_splats(static_cast(other)); } // Constants @@ -229,11 +219,11 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask( uint32x4_t s3 = vec_or((uint32x4_t)ss2, res_and2); uint64x2_t ss3 = vec_sr((uint64x2_t)s3, (uint64x2_t)vec_splats(28)); - uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((uint64_t)0xff)); + uint64x2_t res_and3 = vec_and((uint64x2_t)s3, vec_splats((ulong64_t)0xff)); uint64x2_t s4 = vec_or((uint64x2_t)ss3, res_and3); uint64x2_t ss4 = vec_sld((uint64x2_t) vec_splats(0), s4, 9); - uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((uint64_t)0xff)); + uint64x2_t res_and4 = vec_and((uint64x2_t)s4, vec_splats((ulong64_t)0xff)); uint64x2_t s5 = vec_or((uint64x2_t)ss4, res_and4); return s5[0]; @@ -271,7 +261,7 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const { - return { (m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)N)) }; + return { (m128) vec_sl(u.s64x2[0], vec_splats((ulong64_t)N)) }; } template <> @@ -313,7 +303,7 @@ template <> template really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const { - return { (m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)N)) }; + return { (m128) vec_sr(u.s64x2[0], vec_splats((ulong64_t)N)) }; } template <> @@ -352,7 +342,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s8x16[0], vec_splats((uint8_t)n))}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s8x16[0], vec_splats((uint8_t)n))}; }); return result; } @@ -362,7 +352,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const UNUSED N) if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s16x8[0], vec_splats((uint16_t)n))}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s16x8[0], vec_splats((uint16_t)n))}; }); return result; } @@ -372,7 +362,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s32x4[0], vec_splats((uint32_t)n))}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s32x4[0], vec_splats((uint32_t)n))}; }); return result; } @@ -382,7 +372,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(u.s64x2[0], vec_splats((uint64_t)n))}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sl(v->u.s64x2[0], vec_splats((ulong64_t)n))}; }); return result; } @@ -392,7 +382,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld(v->u.s8x16[0], (int8x16_t)vec_splat_s8(0), n)}; }); return result; } @@ -408,7 +398,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s8x16[0], vec_splats((uint8_t)n))}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s8x16[0], vec_splats((uint8_t)n))}; }); return result; } @@ -418,7 +408,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s16x8[0], vec_splats((uint16_t)n))}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s16x8[0], vec_splats((uint16_t)n))}; }); return result; } @@ -428,7 +418,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s32x4[0], vec_splats((uint32_t)n))}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s32x4[0], vec_splats((uint32_t)n))}; }); return result; } @@ -438,7 +428,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(u.s64x2[0], vec_splats((uint64_t)n))}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sr(v->u.s64x2[0], vec_splats((ulong64_t)n))}; }); return result; } @@ -448,7 +438,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const UNUSED N) if (N == 0) return *this; if (N == 16) return Zeroes(); SuperVector result; - Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), u.s8x16[0], 16 - n)}; }); + Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128) vec_sld((int8x16_t)vec_splat_u8(0), v->u.s8x16[0], 16 - n)}; }); return result; } @@ -523,14 +513,14 @@ really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N) template <> really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr) { - return (m128) vec_xl(0, (const int64_t*)ptr); + return (m128) vec_xl(0, (const long64_t*)ptr); } template <> really_inline SuperVector<16> SuperVector<16>::load(void const *ptr) { assert(ISALIGNED_N(ptr, alignof(SuperVector::size))); - return (m128) vec_xl(0, (const int64_t*)ptr); + return (m128) vec_xl(0, (const long64_t*)ptr); } template <> diff --git a/src/util/supervector/arch/ppc64el/types.hpp b/src/util/supervector/arch/ppc64el/types.hpp index dbd863f4..bdc6608e 100644 --- a/src/util/supervector/arch/ppc64el/types.hpp +++ b/src/util/supervector/arch/ppc64el/types.hpp @@ -27,6 +27,18 @@ * POSSIBILITY OF SUCH DAMAGE. */ +typedef __vector unsigned long long int uint64x2_t; +typedef __vector signed long long int int64x2_t; +typedef __vector unsigned int uint32x4_t; +typedef __vector signed int int32x4_t; +typedef __vector unsigned short int uint16x8_t; +typedef __vector signed short int int16x8_t; +typedef __vector unsigned char uint8x16_t; +typedef __vector signed char int8x16_t; + +typedef unsigned long long int ulong64_t; +typedef signed long long int long64_t; + #if !defined(m128) && defined(HAVE_VSX) -typedef __vector int32_t m128; +typedef __vector int m128; #endif