From 4d2acd59e262931608d5746c0f600457e1a751f7 Mon Sep 17 00:00:00 2001 From: apostolos Date: Thu, 14 Oct 2021 15:08:23 +0300 Subject: [PATCH] Supervector vsh* added --- src/util/supervector/arch/ppc64el/impl.cpp | 344 +++++++++++++++++++-- 1 file changed, 323 insertions(+), 21 deletions(-) diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index b3562f75..478a195f 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -51,12 +51,6 @@ typedef __vector int8_t int8x16_t; // 128-bit Powerpc64le implementation -union Tmp -{ - uint32_t u32; - uint16_t u16[2]; -}; - template<> really_inline SuperVector<16>::SuperVector(SuperVector const &other) { @@ -164,17 +158,71 @@ really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const & return {(m128) vec_xor(u.v128[0], b.u.v128[0])}; } +template <> +really_inline SuperVector<16> SuperVector<16>::operator!() const +{ + return {(m128) vec_xor(u.v128[0], u.v128[0])}; +} + template <> really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const { - m128 and_res = vec_and(u.v128[0], b.u.v128[0]); - return vec_xor(and_res,and_res); + //m128 and_res = vec_and(u.v128[0], b.u.v128[0]); + //return vec_xor(and_res,and_res); + return vec_xor(vec_and(u.v128[0], b.u.v128[0]), vec_and(u.v128[0], b.u.v128[0])); } + +template <> +really_inline SuperVector<16> SuperVector<16>::operator==(SuperVector<16> const &b) const +{ + return {(m128) vec_cmpeq((int8x16_t)u.v128[0], (int8x16_t)b.u.v128[0])}; +} + +template <> +really_inline SuperVector<16> SuperVector<16>::operator!=(SuperVector<16> const &b) const +{ + return !(*this == b); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::operator>(SuperVector<16> const UNUSED &b) const +{ + //return {vcgtq_s8((int16x8_t)u.v128[0], (int16x8_t)b.u.v128[0])}; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::operator>=(SuperVector<16> const UNUSED &b) const +{ + //return {vcgeq_s8((int16x8_t)u.v128[0], (int16x8_t)b.u.v128[0])}; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::operator<(SuperVector<16> const UNUSED &b) const +{ + //return {vcltq_s8((int16x8_t)u.v128[0], (int16x8_t)b.u.v128[0])}; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::operator<=(SuperVector<16> const UNUSED &b) const +{ + //return {vcgeq_s8((int16x8_t)u.v128[0], (int16x8_t)b.u.v128[0])}; + // #warning FIXME + return Zeroes(); +} + + template <> really_inline SuperVector<16> SuperVector<16>::eq(SuperVector<16> const &b) const { - return {(m128) vec_cmpeq((int8x16_t)u.v128[0], (int8x16_t)b.u.v128[0])}; + return (*this == b); + //return {(m128) vec_cmpeq((int8x16_t)u.v128[0], (int8x16_t)b.u.v128[0])}; } template <> @@ -206,9 +254,264 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::eqmask(Su return eq(b).movemask(); } + template <> -really_inline SuperVector<16> SuperVector<16>::rshift128_var(uint8_t const N) const -{ +template +really_inline SuperVector<16> SuperVector<16>::vshl_8_imm() const +{ + //return {(m128)vshlq_n_s8(u.v128[0], N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshl_16_imm() const +{ + //return {(m128)vshlq_n_s16(u.v128[0], N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshl_32_imm() const +{ + //return {(m128)vshlq_n_s32(u.v128[0], N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshl_64_imm() const +{ + //return {(m128)vshlq_n_s64(u.v128[0], N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshl_128_imm() const +{ + //return {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 16 - N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshl_imm() const +{ + //return vshl_128_imm(); + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshr_8_imm() const +{ + //return {(m128)vshrq_n_s8(u.v128[0], N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshr_16_imm() const +{ + //return {(m128)vshrq_n_s16(u.v128[0], N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshr_32_imm() const +{ + //return {(m128)vshrq_n_s32(u.v128[0], N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshr_64_imm() const +{ + //return {(m128)vshrq_n_s64(u.v128[0], N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshr_128_imm() const +{ + //return {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), N)}; + // #warning FIXME + return Zeroes(); +} + +template <> +template +really_inline SuperVector<16> SuperVector<16>::vshr_imm() const +{ + return vshr_128_imm(); +} + +#if !defined(HS_OPTIMIZE) +template SuperVector<16> SuperVector<16>::vshl_8_imm<4>() const; +template SuperVector<16> SuperVector<16>::vshl_16_imm<1>() const; +template SuperVector<16> SuperVector<16>::vshl_64_imm<1>() const; +template SuperVector<16> SuperVector<16>::vshl_64_imm<4>() const; +template SuperVector<16> SuperVector<16>::vshl_128_imm<1>() const; +template SuperVector<16> SuperVector<16>::vshl_128_imm<4>() const; +template SuperVector<16> SuperVector<16>::vshr_8_imm<1>() const; +template SuperVector<16> SuperVector<16>::vshr_8_imm<4>() const; +template SuperVector<16> SuperVector<16>::vshr_16_imm<1>() const; +template SuperVector<16> SuperVector<16>::vshr_64_imm<1>() const; +template SuperVector<16> SuperVector<16>::vshr_64_imm<4>() const; +template SuperVector<16> SuperVector<16>::vshr_128_imm<1>() const; +template SuperVector<16> SuperVector<16>::vshr_128_imm<4>() const; +#endif + +template <> +really_inline SuperVector<16> SuperVector<16>::vshl_8 (uint8_t const UNUSED N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128)vshlq_n_s8(u.v128[0], n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshl_16 (uint8_t const UNUSED N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128)vshlq_n_s16(u.v128[0], n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshl_32 (uint8_t const UNUSED N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128)vshlq_n_s32(u.v128[0], n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshl_64 (uint8_t const UNUSED N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128)vshlq_n_s64(u.v128[0], n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshl_128(uint8_t const UNUSED N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_s8(vdupq_n_u8(0), (int16x8_t)u.v128[0], 16 - n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshl(uint8_t const N) const +{ + return vshl_128(N); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128)vshrq_n_s8(u.v128[0], n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128)vshrq_n_s16(u.v128[0], n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128)vshrq_n_s32(u.v128[0], n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {(m128)vshrq_n_s64(u.v128[0], n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const +{ + //if (N == 0) return *this; + //if (N == 16) return Zeroes(); + //SuperVector result; + //Unroller<1, 16>::iterator([&,v=this](auto const i) { constexpr uint8_t n = i.value; if (N == n) result = {vextq_s8((int16x8_t)u.v128[0], vdupq_n_u8(0), n)}; }); + //return result; + // #warning FIXME + return Zeroes(); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::vshr(uint8_t const N) const +{ + return vshr_128(N); +} + +template <> +really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const +{ switch(N) { case 1: return {(m128) vec_sld((int16x8_t) vec_splat_s8(0), (int16x8_t) u.v128[0], 15)}; break; case 2: return {(m128) vec_sld((int16x8_t) vec_splat_s8(0), (int16x8_t) u.v128[0], 14)}; break; @@ -232,14 +535,8 @@ really_inline SuperVector<16> SuperVector<16>::rshift128_var(uint8_t const N) co } template <> -really_inline SuperVector<16> SuperVector<16>::operator>>(uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const { - return rshift128_var(N); -} - -template <> -really_inline SuperVector<16> SuperVector<16>::lshift128_var(uint8_t const N) const -{ switch(N) { case 1: return {(m128) vec_sld((int16x8_t) u.v128[0], (int16x8_t) vec_splat_s8(0), 1)}; break; case 2: return {(m128) vec_sld((int16x8_t) u.v128[0], (int16x8_t) vec_splat_s8(0), 2)}; break; @@ -262,12 +559,17 @@ really_inline SuperVector<16> SuperVector<16>::lshift128_var(uint8_t const N) co return *this; } -template <> -really_inline SuperVector<16> SuperVector<16>::operator<<(uint8_t const N) const +template<> +really_inline SuperVector<16> SuperVector<16>::Ones_vshr(uint8_t const N) { - return lshift128_var(N); + return Ones().vshr_128(N); } +template<> +really_inline SuperVector<16> SuperVector<16>::Ones_vshl(uint8_t const N) +{ + return Ones().vshl_128(N); +} template <> really_inline SuperVector<16> SuperVector<16>::loadu(void const *ptr)