diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index e7f3f6c9..83ab428b 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -56,6 +56,8 @@ SuperVector blockDoubleMask(SuperVector mask1_lo, SuperVector mask1_hi, #include "x86/shufti.hpp" #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) #include "arm/shufti.hpp" +#elif defined(ARCH_PPC64EL) +#include "ppc64el/shufti.hpp" #endif template diff --git a/src/nfa/truffle_simd.hpp b/src/nfa/truffle_simd.hpp index 8d61722b..b3a82266 100644 --- a/src/nfa/truffle_simd.hpp +++ b/src/nfa/truffle_simd.hpp @@ -49,6 +49,8 @@ const SuperVector blockSingleMask(SuperVector shuf_mask_lo_highclear, Supe #include "x86/truffle.hpp" #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) #include "arm/truffle.hpp" +#elif defined(ARCH_PPC64EL) +#include "ppc64el/truffle.hpp" #endif template diff --git a/src/util/supervector/arch/ppc64el/impl.cpp b/src/util/supervector/arch/ppc64el/impl.cpp index 478a195f..89fe89c6 100644 --- a/src/util/supervector/arch/ppc64el/impl.cpp +++ b/src/util/supervector/arch/ppc64el/impl.cpp @@ -444,7 +444,7 @@ really_inline SuperVector<16> SuperVector<16>::vshl(uint8_t const N) const } template <> -really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const UNUSED N) const { //if (N == 0) return *this; //if (N == 16) return Zeroes(); @@ -456,7 +456,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_8 (uint8_t const N) const } template <> -really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const UNUSED N) const { //if (N == 0) return *this; //if (N == 16) return Zeroes(); @@ -468,7 +468,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_16 (uint8_t const N) const } template <> -really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const UNUSED N) const { //if (N == 0) return *this; //if (N == 16) return Zeroes(); @@ -480,7 +480,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_32 (uint8_t const N) const } template <> -really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const UNUSED N) const { //if (N == 0) return *this; //if (N == 16) return Zeroes(); @@ -492,7 +492,7 @@ really_inline SuperVector<16> SuperVector<16>::vshr_64 (uint8_t const N) const } template <> -really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const N) const +really_inline SuperVector<16> SuperVector<16>::vshr_128(uint8_t const UNUSED N) const { //if (N == 0) return *this; //if (N == 16) return Zeroes(); @@ -594,12 +594,6 @@ really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint return mask & v; } -template<> -really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) -{ - return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]); -} - template<> really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset) { @@ -626,6 +620,24 @@ really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, in return *this; } +template<> +template<> +really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) +{ + return (m128) vec_permxor((int8x16_t)vec_splat_s8(0), (int8x16_t)u.v128[0], (int8x16_t) b.u.v128[0]); +} + +template<> +template<> +really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) +{ + /* On Intel, if bit 0x80 is set, then result is zero, otherwise which the lane it is &0xf. + In NEON, if >=16, then the result is zero, otherwise it is that lane. + btranslated is the version that is converted from Intel to NEON. */ + SuperVector<16> btranslated = b & SuperVector<16>::dup_s8(0x8f); + return pshufb(btranslated); +} + template<> really_inline SuperVector<16> SuperVector<16>::pshufb_maskz(SuperVector<16> b, uint8_t const len) @@ -635,6 +647,8 @@ really_inline SuperVector<16> SuperVector<16>::pshufb_maskz(SuperVector<16> b, u } + +/* template<> really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const N) { @@ -661,4 +675,5 @@ really_inline SuperVector<16> SuperVector<16>::rshift128(uint8_t const N) { return *this >> N; } +*/ #endif