diff --git a/src/hwlm/noodle_engine_simd.hpp b/src/hwlm/noodle_engine_simd.hpp index 7a535e8f..c3080f08 100644 --- a/src/hwlm/noodle_engine_simd.hpp +++ b/src/hwlm/noodle_engine_simd.hpp @@ -29,7 +29,7 @@ /* SIMD engine agnostic noodle scan parts */ -#include "util/simd/types.hpp" +#include "util/supervector/supervector.hpp" static u8 CASEMASK[] = { 0xff, 0xdf }; @@ -247,4 +247,4 @@ hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, const SuperVector mask2{getMask(n->key1, noCase)}; return scanDoubleMain(n, buf, len, start, caseMask, mask1, mask2, cbi); -} \ No newline at end of file +} diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index de1d7970..6e9ff3e8 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -38,29 +38,26 @@ #include "util/bitutils.h" #include "util/unaligned.h" -#include "util/simd/types.hpp" - -#define GET1_LO_4(chars, low4bits) and128(chars, low4bits) -#define GET1_HI_4(chars, low4bits) and128(rshift64_m128(chars, 4), low4bits) +#include "util/supervector/supervector.hpp" template static really_inline typename SuperVector::movemask_type block(SuperVector mask_lo, SuperVector mask_hi, SuperVector chars, const SuperVector low4bits) { SuperVector c_lo = chars & low4bits; - print_m128_16x8("c_lo", c_lo.u.v128[0]); + //printv_u8("c_lo", c_lo); c_lo = mask_lo.pshufb(c_lo); - print_m128_16x8("c_lo", c_lo.u.v128[0]); + //printv_u8("c_lo", c_lo); SuperVector c_hi = mask_hi.pshufb(chars.rshift64(4) & low4bits); SuperVector t = c_lo & c_hi; - print_m128_16x8("low4bits", low4bits.u.v128[0]); - print_m128_16x8("mask_lo", mask_lo.u.v128[0]); - print_m128_16x8("mask_hi", mask_hi.u.v128[0]); - print_m128_16x8("chars", chars.u.v128[0]); - print_m128_16x8("c_lo", c_lo.u.v128[0]); - print_m128_16x8("c_hi", c_hi.u.v128[0]); - print_m128_16x8("t", t.u.v128[0]); + /*printv_u8("low4bits", low4bits); + printv_u8("mask_lo", mask_lo); + printv_u8("mask_hi", mask_hi); + printv_u8("chars", chars); + printv_u8("c_lo", c_lo); + printv_u8("c_hi", c_hi); + printv_u8("t", t);*/ return t.eqmask(SuperVector::Zeroes()); } @@ -71,7 +68,6 @@ const u8 *firstMatch(const u8 *buf, typename SuperVector::movemask_type z); template const u8 *lastMatch(const u8 *buf, typename SuperVector::movemask_type z); - template <> really_inline const u8 *firstMatch<16>(const u8 *buf, typename SuperVector<16>::movemask_type z) { @@ -121,7 +117,7 @@ const u8 *shortShufti(SuperVector mask_lo, SuperVector mask_hi, const u8 * assert(len <= S); SuperVector chars = SuperVector::loadu_maskz(buf, static_cast(len)); - print_m128_16x8("chars", chars.u.v128[0]); + //printv_u8("chars", chars); uint8_t alignment = (uintptr_t)(buf) & 15; typename SuperVector::movemask_type maskb = 1 << alignment; typename SuperVector::movemask_type maske = SINGLE_LOAD_MASK(len - alignment); @@ -183,7 +179,7 @@ const u8 *shuftiExecReal(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *bu DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); DEBUG_PRINTF("b %s\n", buf); - const SuperVector low4bits = SuperVector::set1u_16x8(0xf); + const SuperVector low4bits = SuperVector::dup_u8(0xf); const SuperVector wide_mask_lo(mask_lo); const SuperVector wide_mask_hi(mask_hi); @@ -240,7 +236,7 @@ const u8 *rshuftiExecReal(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *b DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); DEBUG_PRINTF("b %s\n", buf); - const SuperVector low4bits = SuperVector::set1u_16x8(0xf); + const SuperVector low4bits = SuperVector::dup_u8(0xf); const SuperVector wide_mask_lo(mask_lo); const SuperVector wide_mask_hi(mask_hi); @@ -316,7 +312,7 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); DEBUG_PRINTF("b %s\n", buf); - const SuperVector low4bits = SuperVector::set1u_16x8(0xf); + const SuperVector low4bits = SuperVector::dup_u8(0xf); const SuperVector wide_mask1_lo(mask1_lo); const SuperVector wide_mask1_hi(mask1_hi); const SuperVector wide_mask2_lo(mask2_lo); diff --git a/src/nfa/truffle_simd.hpp b/src/nfa/truffle_simd.hpp index bc6c3d4c..bf421300 100644 --- a/src/nfa/truffle_simd.hpp +++ b/src/nfa/truffle_simd.hpp @@ -38,7 +38,7 @@ #include "util/bitutils.h" #include "util/unaligned.h" -#include "util/simd/types.hpp" +#include "util/supervector/supervector.hpp" template @@ -115,18 +115,18 @@ static really_inline typename SuperVector::movemask_type block(SuperVector shuf_mask_lo_highclear, SuperVector shuf_mask_lo_highset, SuperVector v){ - SuperVector highconst = SuperVector::set1_16x8(0x80); - print_m128_16x8("highconst", highconst.u.v128[0]); + SuperVector highconst = SuperVector::dup_u8(0x80); + printv_u8("highconst", highconst); - SuperVector shuf_mask_hi = SuperVector::set1_2x64(0x8040201008040201); - print_m128_2x64("shuf_mask_hi", shuf_mask_hi.u.v128[0]); + SuperVector shuf_mask_hi = SuperVector::dup_u64(0x8040201008040201); + printv_u64("shuf_mask_hi", shuf_mask_hi); SuperVector shuf1 = shuf_mask_lo_highclear.pshufb(v); SuperVector t1 = v ^ highconst; SuperVector shuf2 = shuf_mask_lo_highset.pshufb(t1); SuperVector t2 = highconst.opandnot(v.rshift64(4)); SuperVector shuf3 = shuf_mask_hi.pshufb(t2); - SuperVector tmp = shuf3 & (shuf1 | shuf2); + SuperVector tmp = (shuf1 | shuf2) & shuf3; return tmp.eqmask(SuperVector::Zeroes()); } diff --git a/src/util/simd/arch/arm/impl.cpp b/src/util/supervector/arch/arm/impl.cpp similarity index 81% rename from src/util/simd/arch/arm/impl.cpp rename to src/util/supervector/arch/arm/impl.cpp index fb2138d1..0e8648cd 100644 --- a/src/util/simd/arch/arm/impl.cpp +++ b/src/util/supervector/arch/arm/impl.cpp @@ -32,14 +32,14 @@ #include -#include "util/simd/arch/arm/types.hpp" +#include "util/supervector/arch/arm/types.hpp" // 128-bit NEON implementation template<> -really_inline SuperVector<16>::SuperVector(SuperVector const &o) +really_inline SuperVector<16>::SuperVector(SuperVector const &other) { - u.v128[0] = o.u.v128[0]; + u.v128[0] = other.u.v128[0]; } template<> @@ -50,72 +50,72 @@ really_inline SuperVector<16>::SuperVector(typename base_type::type const v) template<> template<> -really_inline SuperVector<16>::SuperVector(int8x16_t const o) +really_inline SuperVector<16>::SuperVector(int8x16_t const other) { - u.v128[0] = static_cast(o); + u.v128[0] = static_cast(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(uint8x16_t const o) +really_inline SuperVector<16>::SuperVector(uint8x16_t const other) { - u.v128[0] = static_cast(o); + u.v128[0] = static_cast(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(int8_t const o) +really_inline SuperVector<16>::SuperVector(int8_t const other) { - u.v128[0] = vdupq_n_s8(o); + u.v128[0] = vdupq_n_s8(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(uint8_t const o) +really_inline SuperVector<16>::SuperVector(uint8_t const other) { - u.v128[0] = vdupq_n_u8(o); + u.v128[0] = vdupq_n_u8(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(int16_t const o) +really_inline SuperVector<16>::SuperVector(int16_t const other) { - u.v128[0] = vdupq_n_s16(o); + u.v128[0] = vdupq_n_s16(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(uint16_t const o) +really_inline SuperVector<16>::SuperVector(uint16_t const other) { - u.v128[0] = vdupq_n_u16(o); + u.v128[0] = vdupq_n_u16(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(int32_t const o) +really_inline SuperVector<16>::SuperVector(int32_t const other) { - u.v128[0] = vdupq_n_s32(o); + u.v128[0] = vdupq_n_s32(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(uint32_t const o) +really_inline SuperVector<16>::SuperVector(uint32_t const other) { - u.v128[0] = vdupq_n_u32(o); + u.v128[0] = vdupq_n_u32(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(int64_t const o) +really_inline SuperVector<16>::SuperVector(int64_t const other) { - u.v128[0] = vdupq_n_s64(o); + u.v128[0] = vdupq_n_s64(other); } template<> template<> -really_inline SuperVector<16>::SuperVector(uint64_t const o) +really_inline SuperVector<16>::SuperVector(uint64_t const other) { - u.v128[0] = vdupq_n_u64(o); + u.v128[0] = vdupq_n_u64(other); } // Constants @@ -134,37 +134,43 @@ really_inline SuperVector<16> SuperVector<16>::Zeroes(void) // Methods template <> -really_inline void SuperVector<16>::operator=(SuperVector<16> const &o) +really_inline void SuperVector<16>::operator=(SuperVector<16> const &other) { - u.v128[0] = o.u.v128[0]; + u.v128[0] = other.u.v128[0]; } template <> -really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const b) const +really_inline SuperVector<16> SuperVector<16>::operator&(SuperVector<16> const &b) const { return {vandq_s8(u.v128[0], b.u.v128[0])}; } template <> -really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const b) const +really_inline SuperVector<16> SuperVector<16>::operator|(SuperVector<16> const &b) const { return {vorrq_s8(u.v128[0], b.u.v128[0])}; } template <> -really_inline SuperVector<16> SuperVector<16>::opand(SuperVector<16> const b) const +really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const &b) const +{ + return {veorq_s8(u.v128[0], b.u.v128[0])}; +} + +template <> +really_inline SuperVector<16> SuperVector<16>::opand(SuperVector<16> const &b) const { return {vandq_s8(u.v128[0], b.u.v128[0])}; } template <> -really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const b) const +really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const &b) const { return {vandq_s8(u.v128[0], b.u.v128[0])}; } template <> -really_inline SuperVector<16> SuperVector<16>::eq(SuperVector<16> const b) const +really_inline SuperVector<16> SuperVector<16>::eq(SuperVector<16> const &b) const { return {vceqq_s8((int16x8_t)u.v128[0], (int16x8_t)b.u.v128[0])}; } @@ -176,7 +182,7 @@ really_inline typename SuperVector<16>::movemask_type SuperVector<16>::movemask( // Compute the mask from the input uint64x2_t mask = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8((uint16x8_t)u.v128[0], powers)))); - uint64x2_t mask1 = (m128)vextq_s8(mask, zeroes128(), 7); + uint64x2_t mask1 = (m128)vextq_s8(mask, vdupq_n_u8(0), 7); mask = vorrq_u8(mask, mask1); // Get the resulting bytes @@ -285,32 +291,32 @@ really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint #ifndef HS_OPTIMIZE template<> -really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> r, int8_t offset) +really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset) { - return {vextq_s8((int16x8_t)r.u.v128[0], (int16x8_t)u.v128[0], 16 - offset)}; + return {vextq_s8((int16x8_t)other.u.v128[0], (int16x8_t)u.v128[0], 16 - offset)}; } #else template<> -really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> r, int8_t offset) +really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset) { switch(offset) { case 0: return *this; break; - case 1: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 15)}; break; - case 2: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 14)}; break; - case 3: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 13)}; break; - case 4: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 12)}; break; - case 5: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 11)}; break; - case 6: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 10)}; break; - case 7: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 9)}; break; - case 8: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 8)}; break; - case 9: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 7)}; break; - case 10: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 6)}; break; - case 11: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 5)}; break; - case 12: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 4)}; break; - case 13: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 3)}; break; - case 14: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 2)}; break; - case 15: return {vextq_s8((int16x8_t) r.u.v128[0], (int16x8_t) u.v128[0], 1)}; break; - case 16: return r; break; + case 1: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 15)}; break; + case 2: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 14)}; break; + case 3: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 13)}; break; + case 4: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 12)}; break; + case 5: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 11)}; break; + case 6: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 10)}; break; + case 7: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 9)}; break; + case 8: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 8)}; break; + case 9: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 7)}; break; + case 10: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 6)}; break; + case 11: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 5)}; break; + case 12: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 4)}; break; + case 13: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 3)}; break; + case 14: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 2)}; break; + case 15: return {vextq_s8((int16x8_t) other.u.v128[0], (int16x8_t) u.v128[0], 1)}; break; + case 16: return other; break; default: break; } return *this; @@ -329,15 +335,15 @@ really_inline SuperVector<16> SuperVector<16>::pshufb(SuperVector<16> b) #ifdef HS_OPTIMIZE template<> -really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const l) +really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const N) { - return {(m128)vshlq_n_s64(u.v128[0], l)}; + return {(m128)vshlq_n_s64(u.v128[0], N)}; } #else template<> -really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const l) +really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const N) { - switch(l) { + switch(N) { case 0: return {vshlq_n_s64(u.v128[0], 0)}; break; case 1: return {vshlq_n_s64(u.v128[0], 1)}; break; case 2: return {vshlq_n_s64(u.v128[0], 2)}; break; @@ -362,15 +368,15 @@ really_inline SuperVector<16> SuperVector<16>::lshift64(uint8_t const l) #ifdef HS_OPTIMIZE template<> -really_inline SuperVector<16> SuperVector<16>::rshift64(uint8_t const l) +really_inline SuperVector<16> SuperVector<16>::rshift64(uint8_t const N) { - return {(m128)vshrq_n_s64(u.v128[0], l)}; + return {(m128)vshrq_n_s64(u.v128[0], N)}; } #else template<> -really_inline SuperVector<16> SuperVector<16>::rshift64(uint8_t const l) +really_inline SuperVector<16> SuperVector<16>::rshift64(uint8_t const N) { - switch(l) { + switch(N) { case 0: return {vshrq_n_s64(u.v128[0], 0)}; break; case 1: return {vshrq_n_s64(u.v128[0], 1)}; break; case 2: return {vshrq_n_s64(u.v128[0], 2)}; break; diff --git a/src/util/simd/arch/arm/impl.hpp b/src/util/supervector/arch/arm/impl.hpp similarity index 100% rename from src/util/simd/arch/arm/impl.hpp rename to src/util/supervector/arch/arm/impl.hpp diff --git a/src/util/simd/arch/arm/types.hpp b/src/util/supervector/arch/arm/types.hpp similarity index 100% rename from src/util/simd/arch/arm/types.hpp rename to src/util/supervector/arch/arm/types.hpp diff --git a/src/util/simd/arch/x86/impl.cpp b/src/util/supervector/arch/x86/impl.cpp similarity index 98% rename from src/util/simd/arch/x86/impl.cpp rename to src/util/supervector/arch/x86/impl.cpp index e9298a98..a00b032a 100644 --- a/src/util/simd/arch/x86/impl.cpp +++ b/src/util/supervector/arch/x86/impl.cpp @@ -36,7 +36,7 @@ #include "ue2common.h" #include "util/arch.h" #include "util/unaligned.h" -#include "util/simd/types.hpp" +#include "util/supervector/arch/arm/types.hpp" #if !defined(m128) && defined(HAVE_SSE2) typedef __m128i m128; @@ -170,17 +170,6 @@ really_inline SuperVector<16> SuperVector<16>::operator^(SuperVector<16> const b return {_mm_xor_si128(u.v128[0], b.u.v128[0])}; } -template <> -really_inline SuperVector<16> SuperVector<16>::opand(SuperVector<16> const b) const -{ - return *this & b; -} - -template <> -really_inline SuperVector<16> SuperVector<16>::opxor(SuperVector<16> const b) const -{ - return *this ^ b; -} template <> really_inline SuperVector<16> SuperVector<16>::opandnot(SuperVector<16> const b) const diff --git a/src/util/simd/arch/x86/types.hpp b/src/util/supervector/arch/x86/types.hpp similarity index 100% rename from src/util/simd/arch/x86/types.hpp rename to src/util/supervector/arch/x86/types.hpp diff --git a/src/util/simd/types.hpp b/src/util/supervector/supervector.hpp similarity index 78% rename from src/util/simd/types.hpp rename to src/util/supervector/supervector.hpp index 2e4dc6bd..6506d500 100644 --- a/src/util/simd/types.hpp +++ b/src/util/supervector/supervector.hpp @@ -34,9 +34,9 @@ #include #if defined(ARCH_IA32) || defined(ARCH_X86_64) -#include "util/simd/arch/x86/types.hpp" +#include "util/supervector/arch/x86/types.hpp" #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) -#include "util/simd/arch/arm/types.hpp" +#include "util/supervector/arch/arm/types.hpp" #endif #if defined(HAVE_SIMD_512_BITS) @@ -54,7 +54,7 @@ using Z_TYPE = u32; #elif defined(HAVE_SIMD_128_BITS) using Z_TYPE = u32; #define Z_BITS 32 -#define Z_SHIFT 0 +#define Z_SHIFT 15 #define DOUBLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL) #define SINGLE_LOAD_MASK(l) (((1ULL) << (l)) - 1ULL) #endif @@ -156,35 +156,35 @@ public: double f64[SIZE / sizeof(double)]; } u; - SuperVector(SuperVector const &o); + SuperVector(SuperVector const &other); SuperVector(typename base_type::type const v); template - SuperVector(T const o); + SuperVector(T const other); - static SuperVector set1u_16x8(uint8_t o) { return {o}; }; - static SuperVector set1_16x8(int8_t o) { return {o}; }; - static SuperVector set1u_8x16(uint16_t o) { return {o}; }; - static SuperVector set1_8x16(int16_t o) { return {o}; }; - static SuperVector set1u_4x32(uint32_t o) { return {o}; }; - static SuperVector set1_4x32(int32_t o) { return {o}; }; - static SuperVector set1u_2x64(uint64_t o) { return {o}; }; - static SuperVector set1_2x64(int64_t o) { return {o}; }; + static SuperVector dup_u8 (uint8_t other) { return {other}; }; + static SuperVector dup_s8 (int8_t other) { return {other}; }; + static SuperVector dup_u16(uint16_t other) { return {other}; }; + static SuperVector dup_s16(int16_t other) { return {other}; }; + static SuperVector dup_u32(uint32_t other) { return {other}; }; + static SuperVector dup_s32(int32_t other) { return {other}; }; + static SuperVector dup_u64(uint64_t other) { return {other}; }; + static SuperVector dup_s64(int64_t other) { return {other}; }; - void operator=(SuperVector const &o); - - SuperVector operator&(SuperVector const b) const; - SuperVector operator|(SuperVector const b) const; - SuperVector operator^(SuperVector const b) const; - - - SuperVector opand(SuperVector const b) const; - SuperVector opor(SuperVector const b) const; - SuperVector opandnot(SuperVector const b) const; - SuperVector opxor(SuperVector const b) const; + void operator=(SuperVector const &other); - SuperVector eq(SuperVector const b) const; + + SuperVector operator&(SuperVector const &b) const; + SuperVector operator|(SuperVector const &b) const; + SuperVector operator^(SuperVector const &b) const; + + SuperVector opand(SuperVector const &b) const { return *this & b; } + SuperVector opor (SuperVector const &b) const { return *this | b; } + SuperVector opxor(SuperVector const &b) const { return *this ^ b; } + SuperVector opandnot(SuperVector const &b) const; + + SuperVector eq(SuperVector const &b) const; SuperVector operator<<(uint8_t const N) const; SuperVector operator>>(uint8_t const N) const; typename base_type::movemask_type movemask(void) const; @@ -193,11 +193,11 @@ public: static SuperVector loadu(void const *ptr); static SuperVector load(void const *ptr); static SuperVector loadu_maskz(void const *ptr, uint8_t const len); - SuperVector alignr(SuperVector l, int8_t offset); + SuperVector alignr(SuperVector &other, int8_t offset); SuperVector pshufb(SuperVector b); - SuperVector lshift64(uint8_t const l); - SuperVector rshift64(uint8_t const l); + SuperVector lshift64(uint8_t const N); + SuperVector rshift64(uint8_t const N); // Constants static SuperVector Ones(); @@ -211,41 +211,41 @@ public: #if defined(HS_OPTIMIZE) #if defined(ARCH_IA32) || defined(ARCH_X86_64) -#include "util/simd/arch/x86/impl.cpp" +#include "util/supervector/arch/x86/impl.cpp" #elif defined(ARCH_ARM32) || defined(ARCH_AARCH64) -#include "util/simd/arch/arm/impl.cpp" +#include "util/supervector/arch/arm/impl.cpp" #endif #endif template -static void printv_u8(const char *label, SuperVector &v) { +static void printv_u8(const char *label, SuperVector const &v) { printf("%s: ", label); - for(int i=0; i < S; i++) + for(size_t i=0; i < S; i++) printf("%02x ", v.u.u8[i]); printf("\n"); } template -static void printv_u16(const char *label, SuperVector &v) { +static void printv_u16(const char *label, SuperVector const &v) { printf("%s: ", label); - for(int i=0; i < S/sizeof(u16); i++) + for(size_t i=0; i < S/sizeof(u16); i++) printf("%04x ", v.u.u16[i]); printf("\n"); } template -static void printv_u32(const char *label, SuperVector &v) { +static void printv_u32(const char *label, SuperVector const &v) { printf("%s: ", label); - for(int i=0; i < S/sizeof(u32); i++) + for(size_t i=0; i < S/sizeof(u32); i++) printf("%08x ", v.u.u32[i]); printf("\n"); } template -static inline void printv_u64(const char *label, SuperVector &v) { +static inline void printv_u64(const char *label, SuperVector const &v) { printf("%s: ", label); - for(int i=0; i < S/sizeof(u64a); i++) + for(size_t i=0; i < S/sizeof(u64a); i++) printf("%016lx ", v.u.u64[i]); printf("\n"); } diff --git a/unit/internal/supervector.cpp b/unit/internal/supervector.cpp index bfa66331..f273f137 100644 --- a/unit/internal/supervector.cpp +++ b/unit/internal/supervector.cpp @@ -32,9 +32,7 @@ #include #include"gtest/gtest.h" #include"ue2common.h" -#include"util/arch.h" -#include"util/simd_utils.h" -#include"util/simd/types.hpp" +#include"util/supervector/supervector.hpp" TEST(SuperVectorUtilsTest, Zero128c) { @@ -303,14 +301,11 @@ TEST(SuperVectorUtilsTest,pshufbc) { } /*Define ALIGNR128 macro*/ -#define TEST_ALIGNR128(v1, v2, buf, l) { \ - auto v_aligned = v2.alignr(v1,l); \ - printv_u8("v1", v1); \ - printv_u8("v2", v2); \ - printv_u8("v_aligned", v_aligned); \ - for (size_t i=0; i<16; i++) { \ - ASSERT_EQ(v_aligned.u.u8[i], vec[16 -l + i]); \ - } \ +#define TEST_ALIGNR128(v1, v2, buf, l) { \ + auto v_aligned = v2.alignr(v1, l); \ + for (size_t i=0; i<16; i++) { \ + ASSERT_EQ(v_aligned.u.u8[i], vec[16 - l + i]); \ + } \ } TEST(SuperVectorUtilsTest,Alignr128c){