mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
initial SSE/AVX2 implementation
This commit is contained in:
parent
e5e2057ca9
commit
e7161fdfec
@ -51,7 +51,7 @@ typename SuperVector<S>::movemask_type block(SuperVector<S> mask_lo, SuperVector
|
||||
|
||||
SuperVector<S> c_lo = chars & low4bits;
|
||||
c_lo = mask_lo.pshufb(c_lo);
|
||||
SuperVector<S> c_hi = mask_hi.pshufb(chars.rshift64(4) & low4bits);
|
||||
SuperVector<S> c_hi = mask_hi.pshufb(chars.template vshr_64_imm<4>() & low4bits);
|
||||
SuperVector<S> t = c_lo & c_hi;
|
||||
|
||||
return t.eqmask(SuperVector<S>::Zeroes());
|
||||
@ -212,7 +212,7 @@ const u8 *fwdBlockDouble(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi, Super
|
||||
const SuperVector<S> low4bits = SuperVector<S>::dup_u8(0xf);
|
||||
SuperVector<S> chars_lo = chars & low4bits;
|
||||
chars_lo.print8("chars_lo");
|
||||
SuperVector<S> chars_hi = chars.rshift64(4) & low4bits;
|
||||
SuperVector<S> chars_hi = chars.template vshr_64_imm<4>() & low4bits;
|
||||
chars_hi.print8("chars_hi");
|
||||
SuperVector<S> c1_lo = mask1_lo.pshufb(chars_lo);
|
||||
c1_lo.print8("c1_lo");
|
||||
@ -227,8 +227,8 @@ const u8 *fwdBlockDouble(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi, Super
|
||||
c2_hi.print8("c2_hi");
|
||||
SuperVector<S> t2 = c2_lo | c2_hi;
|
||||
t2.print8("t2");
|
||||
t2.rshift128(1).print8("t2.rshift128(1)");
|
||||
SuperVector<S> t = t1 | (t2.rshift128(1));
|
||||
t2.template vshr_128_imm<1>().print8("t2.rshift128(1)");
|
||||
SuperVector<S> t = t1 | (t2.template vshr_128_imm<1>());
|
||||
t.print8("t");
|
||||
|
||||
typename SuperVector<S>::movemask_type z = t.eqmask(SuperVector<S>::Ones());
|
||||
@ -250,7 +250,7 @@ static really_inline const u8 *shuftiDoubleMini(SuperVector<S> mask1_lo, SuperVe
|
||||
|
||||
SuperVector<S> chars_lo = chars & low4bits;
|
||||
chars_lo.print8("chars_lo");
|
||||
SuperVector<S> chars_hi = chars.rshift64(4) & low4bits;
|
||||
SuperVector<S> chars_hi = chars.template vshr_64_imm<4>() & low4bits;
|
||||
chars_hi.print8("chars_hi");
|
||||
SuperVector<S> c1_lo = mask1_lo.pshufb_maskz(chars_lo, len);
|
||||
c1_lo.print8("c1_lo");
|
||||
@ -265,8 +265,8 @@ static really_inline const u8 *shuftiDoubleMini(SuperVector<S> mask1_lo, SuperVe
|
||||
c2_hi.print8("c2_hi");
|
||||
SuperVector<S> t2 = c2_lo | c2_hi;
|
||||
t2.print8("t2");
|
||||
t2.rshift128(1).print8("t2.rshift128(1)");
|
||||
SuperVector<S> t = t1 | (t2.rshift128(1));
|
||||
t2.template vshr_128_imm<1>().print8("t2.rshift128(1)");
|
||||
SuperVector<S> t = t1 | (t2.template vshr_128_imm<1>());
|
||||
t.print8("t");
|
||||
|
||||
typename SuperVector<S>::movemask_type z = t.eqmask(SuperVector<S>::Ones());
|
||||
|
@ -53,7 +53,7 @@ typename SuperVector<S>::movemask_type block(SuperVector<S> shuf_mask_lo_highcle
|
||||
SuperVector<S> shuf1 = shuf_mask_lo_highclear.pshufb(v);
|
||||
SuperVector<S> t1 = v ^ highconst;
|
||||
SuperVector<S> shuf2 = shuf_mask_lo_highset.pshufb(t1);
|
||||
SuperVector<S> t2 = highconst.opandnot(v.rshift64(4));
|
||||
SuperVector<S> t2 = highconst.opandnot(v.template vshr_64_imm<4>());
|
||||
SuperVector<S> shuf3 = shuf_mask_hi.pshufb(t2);
|
||||
SuperVector<S> tmp = (shuf1 | shuf2) & shuf3;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -217,15 +217,63 @@ public:
|
||||
|
||||
SuperVector pshufb(SuperVector b);
|
||||
SuperVector pshufb_maskz(SuperVector b, uint8_t const len);
|
||||
SuperVector lshift64(uint8_t const N);
|
||||
SuperVector rshift64(uint8_t const N);
|
||||
SuperVector lshift128(uint8_t const N);
|
||||
SuperVector rshift128(uint8_t const N);
|
||||
SuperVector lshift128_var(uint8_t const N) const;
|
||||
SuperVector rshift128_var(uint8_t const N) const;
|
||||
|
||||
// Shift instructions
|
||||
template<uint8_t N>
|
||||
SuperVector vshl_8_imm() const;
|
||||
template<uint8_t N>
|
||||
SuperVector vshl_16_imm() const;
|
||||
template<uint8_t N>
|
||||
SuperVector vshl_32_imm() const;
|
||||
template<uint8_t N>
|
||||
SuperVector vshl_64_imm() const;
|
||||
template<uint8_t N>
|
||||
SuperVector vshl_128_imm() const;
|
||||
#if defined(HAVE_SIMD_256_BITS)
|
||||
template<uint8_t N>
|
||||
SuperVector vshl_256_imm() const;
|
||||
#endif
|
||||
template<uint8_t N>
|
||||
SuperVector vshl_imm() const;
|
||||
template<uint8_t N>
|
||||
SuperVector vshr_8_imm() const;
|
||||
template<uint8_t N>
|
||||
SuperVector vshr_16_imm() const;
|
||||
template<uint8_t N>
|
||||
SuperVector vshr_32_imm() const;
|
||||
template<uint8_t N>
|
||||
SuperVector vshr_64_imm() const;
|
||||
template<uint8_t N>
|
||||
SuperVector vshr_128_imm() const;
|
||||
#if defined(HAVE_SIMD_256_BITS)
|
||||
template<uint8_t N>
|
||||
SuperVector vshr_256_imm() const;
|
||||
#endif
|
||||
template<uint8_t N>
|
||||
SuperVector vshr_imm() const;
|
||||
SuperVector vshl_8 (uint8_t const N) const;
|
||||
SuperVector vshl_16 (uint8_t const N) const;
|
||||
SuperVector vshl_32 (uint8_t const N) const;
|
||||
SuperVector vshl_64 (uint8_t const N) const;
|
||||
SuperVector vshl_128(uint8_t const N) const;
|
||||
#if defined(HAVE_SIMD_256_BITS)
|
||||
SuperVector vshl_256(uint8_t const N) const;
|
||||
#endif
|
||||
SuperVector vshl (uint8_t const N) const;
|
||||
SuperVector vshr_8 (uint8_t const N) const;
|
||||
SuperVector vshr_16 (uint8_t const N) const;
|
||||
SuperVector vshr_32 (uint8_t const N) const;
|
||||
SuperVector vshr_64 (uint8_t const N) const;
|
||||
SuperVector vshr_128(uint8_t const N) const;
|
||||
#if defined(HAVE_SIMD_256_BITS)
|
||||
SuperVector vshr_256(uint8_t const N) const;
|
||||
#endif
|
||||
SuperVector vshr (uint8_t const N) const;
|
||||
|
||||
// Constants
|
||||
static SuperVector Ones();
|
||||
static SuperVector Ones_vshr(uint8_t const N);
|
||||
static SuperVector Ones_vshl(uint8_t const N);
|
||||
static SuperVector Zeroes();
|
||||
|
||||
#if defined(DEBUG)
|
||||
@ -264,6 +312,25 @@ public:
|
||||
#endif
|
||||
};
|
||||
|
||||
template <std::size_t Begin, std::size_t End>
|
||||
struct Unroller
|
||||
{
|
||||
template<typename Action>
|
||||
static void iterator(Action &&action)
|
||||
{
|
||||
action(std::integral_constant<int, Begin>());
|
||||
Unroller<Begin + 1, End>::iterator(action);
|
||||
}
|
||||
};
|
||||
|
||||
template <std::size_t End>
|
||||
struct Unroller<End, End>
|
||||
{
|
||||
template<typename Action>
|
||||
static void iterator(Action &&action UNUSED)
|
||||
{}
|
||||
};
|
||||
|
||||
#if defined(HS_OPTIMIZE)
|
||||
#if defined(ARCH_IA32) || defined(ARCH_X86_64)
|
||||
#include "util/supervector/arch/x86/impl.cpp"
|
||||
|
@ -230,7 +230,7 @@ TEST(SuperVectorUtilsTest,LShift64_128c){
|
||||
u64a vec[2] = {128, 512};
|
||||
auto SP = SuperVector<16>::loadu(vec);
|
||||
for(int s = 0; s<16; s++) {
|
||||
auto SP_after_shift = SP.lshift64(s);
|
||||
auto SP_after_shift = SP.vshl_64(s);
|
||||
for (int i=0; i<2; i++) {
|
||||
ASSERT_EQ(SP_after_shift.u.u64[i], vec[i] << s);
|
||||
}
|
||||
@ -241,7 +241,7 @@ TEST(SuperVectorUtilsTest,RShift64_128c){
|
||||
u64a vec[2] = {128, 512};
|
||||
auto SP = SuperVector<16>::loadu(vec);
|
||||
for(int s = 0; s<16; s++) {
|
||||
auto SP_after_shift = SP.rshift64(s);
|
||||
auto SP_after_shift = SP.vshr_64(s);
|
||||
for (int i=0; i<2; i++) {
|
||||
ASSERT_EQ(SP_after_shift.u.u64[i], vec[i] >> s);
|
||||
}
|
||||
@ -293,7 +293,7 @@ TEST(SuperVectorUtilsTest,pshufb128c) {
|
||||
|
||||
/*Define LSHIFT128_128 macro*/
|
||||
#define TEST_LSHIFT128_128(buf, vec, v, l) { \
|
||||
auto v_shifted = v.lshift128(l); \
|
||||
auto v_shifted = v.vshl_128(l); \
|
||||
for (int i=15; i>= l; --i) { \
|
||||
buf[i] = vec[i-l]; \
|
||||
} \
|
||||
@ -317,7 +317,7 @@ TEST(SuperVectorUtilsTest,LShift128_128c){
|
||||
|
||||
/*Define RSHIFT128_128 macro*/
|
||||
#define TEST_RSHIFT128_128(buf, vec, v, l) { \
|
||||
auto v_shifted = v.rshift128(l); \
|
||||
auto v_shifted = v.vshr_128(l); \
|
||||
for (int i=0; i<16-l; i++) { \
|
||||
buf[i] = vec[i+l]; \
|
||||
} \
|
||||
@ -578,7 +578,7 @@ TEST(SuperVectorUtilsTest,LShift64_256c){
|
||||
u64a vec[4] = {128, 512, 256, 1024};
|
||||
auto SP = SuperVector<32>::loadu(vec);
|
||||
for(int s = 0; s<32; s++) {
|
||||
auto SP_after_shift = SP.lshift64(s);
|
||||
auto SP_after_shift = SP.vshl_64(s);
|
||||
for (int i=0; i<4; i++) {
|
||||
ASSERT_EQ(SP_after_shift.u.u64[i], vec[i] << s);
|
||||
}
|
||||
@ -589,7 +589,7 @@ TEST(SuperVectorUtilsTest,RShift64_256c){
|
||||
u64a vec[4] = {128, 512, 256, 1024};
|
||||
auto SP = SuperVector<32>::loadu(vec);
|
||||
for(int s = 0; s<32; s++) {
|
||||
auto SP_after_shift = SP.rshift64(s);
|
||||
auto SP_after_shift = SP.vshr_64(s);
|
||||
for (int i=0; i<4; i++) {
|
||||
ASSERT_EQ(SP_after_shift.u.u64[i], vec[i] >> s);
|
||||
}
|
||||
@ -627,7 +627,7 @@ TEST(SuperVectorUtilsTest,RShift256c){
|
||||
|
||||
/*Define LSHIFT128_256 macro*/
|
||||
#define TEST_LSHIFT128_256(buf, vec, v, l) { \
|
||||
auto v_shifted = v.lshift128(l); \
|
||||
auto v_shifted = v.vshl_128(l); \
|
||||
for (int i=15; i>= l; --i) { \
|
||||
buf[i] = vec[i-l]; \
|
||||
buf[i+16] = vec[(16+i)-l]; \
|
||||
@ -653,7 +653,7 @@ TEST(SuperVectorUtilsTest,LShift128_256c){
|
||||
|
||||
/*Define RSHIFT128_128 macro*/
|
||||
#define TEST_RSHIFT128_256(buf, vec, v, l) { \
|
||||
auto v_shifted = v.rshift128(l); \
|
||||
auto v_shifted = v.vshr_128(l); \
|
||||
for (int i=0; i<16-l; i++) { \
|
||||
buf[i] = vec[i+l]; \
|
||||
buf[i+16] = vec[(i+16)+l]; \
|
||||
@ -966,7 +966,7 @@ TEST(SuperVectorUtilsTest,RShift512c){
|
||||
|
||||
/*Define RSHIFT128_512 macro*/
|
||||
#define TEST_RSHIFT128_512(buf, vec, v, l) { \
|
||||
auto v_shifted = v.rshift128(l); \
|
||||
auto v_shifted = v.vshr_128(l); \
|
||||
for (int i=0; i<16-l; i++) { \
|
||||
buf[i] = vec[i+l]; \
|
||||
buf[i+16] = vec[(i+16)+l]; \
|
||||
@ -995,7 +995,7 @@ TEST(SuperVectorUtilsTest,RShift128_512c){
|
||||
|
||||
/*Define LSHIFT512 macro*/
|
||||
#define TEST_LSHIFT128_512(buf, vec, v, l) { \
|
||||
auto v_shifted = v.lshift128(l); \
|
||||
auto v_shifted = v.vshl_128(l); \
|
||||
for (int i=15; i>=l; --i) { \
|
||||
buf[i] = vec[i-l]; \
|
||||
buf[i+16] = vec[(i+16)-l]; \
|
||||
|
Loading…
x
Reference in New Issue
Block a user