rename supervector class header, use dup_*() functions names instead of set1_*(), minor fixes

2026-01-17 16:00:26 +03:00 · 2021-07-02 19:11:44 +03:00
parent 9685095379
commit 2753dbb3b0
10 changed files with 131 additions and 145 deletions
--- a/src/nfa/shufti_simd.hpp
+++ b/src/nfa/shufti_simd.hpp
@@ -38,29 +38,26 @@
 #include "util/bitutils.h"
 #include "util/unaligned.h"

-#include "util/simd/types.hpp"
-
-#define GET1_LO_4(chars, low4bits) and128(chars, low4bits)
-#define GET1_HI_4(chars, low4bits) and128(rshift64_m128(chars, 4), low4bits)
+#include "util/supervector/supervector.hpp"

 template <uint16_t S>
 static really_inline
 typename SuperVector<S>::movemask_type block(SuperVector<S> mask_lo, SuperVector<S> mask_hi,
            SuperVector<S> chars, const SuperVector<S> low4bits) {
    SuperVector<S> c_lo = chars & low4bits;
-    print_m128_16x8("c_lo", c_lo.u.v128[0]);
+    //printv_u8("c_lo", c_lo);
    c_lo = mask_lo.pshufb(c_lo);
-    print_m128_16x8("c_lo", c_lo.u.v128[0]);
+    //printv_u8("c_lo", c_lo);
    SuperVector<S> c_hi = mask_hi.pshufb(chars.rshift64(4) & low4bits);
    SuperVector<S> t = c_lo & c_hi;

-    print_m128_16x8("low4bits", low4bits.u.v128[0]);
-    print_m128_16x8("mask_lo", mask_lo.u.v128[0]);
-    print_m128_16x8("mask_hi", mask_hi.u.v128[0]);
-    print_m128_16x8("chars", chars.u.v128[0]);
-    print_m128_16x8("c_lo", c_lo.u.v128[0]);
-    print_m128_16x8("c_hi", c_hi.u.v128[0]);
-    print_m128_16x8("t", t.u.v128[0]);
+    /*printv_u8("low4bits", low4bits);
+    printv_u8("mask_lo", mask_lo);
+    printv_u8("mask_hi", mask_hi);
+    printv_u8("chars", chars);
+    printv_u8("c_lo", c_lo);
+    printv_u8("c_hi", c_hi);
+    printv_u8("t", t);*/

    return t.eqmask(SuperVector<S>::Zeroes());
 }
@@ -71,7 +68,6 @@ const u8 *firstMatch(const u8 *buf, typename SuperVector<S>::movemask_type z);
 template <uint16_t S>
 const u8 *lastMatch(const u8 *buf, typename SuperVector<S>::movemask_type z);

-
 template <>
 really_inline
 const u8 *firstMatch<16>(const u8 *buf, typename SuperVector<16>::movemask_type z) {
@@ -121,7 +117,7 @@ const u8 *shortShufti(SuperVector<S> mask_lo, SuperVector<S> mask_hi, const u8 *
    assert(len <= S);

    SuperVector<S> chars = SuperVector<S>::loadu_maskz(buf, static_cast<uint8_t>(len));
-    print_m128_16x8("chars", chars.u.v128[0]);
+    //printv_u8("chars", chars);
    uint8_t alignment = (uintptr_t)(buf) & 15;
    typename SuperVector<S>::movemask_type maskb = 1 << alignment;
    typename SuperVector<S>::movemask_type maske = SINGLE_LOAD_MASK(len - alignment);
@@ -183,7 +179,7 @@ const u8 *shuftiExecReal(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *bu
    DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf);
    DEBUG_PRINTF("b %s\n", buf);

-    const SuperVector<S> low4bits = SuperVector<S>::set1u_16x8(0xf);
+    const SuperVector<S> low4bits = SuperVector<S>::dup_u8(0xf);
    const SuperVector<S> wide_mask_lo(mask_lo);
    const SuperVector<S> wide_mask_hi(mask_hi);

@@ -240,7 +236,7 @@ const u8 *rshuftiExecReal(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *b
    DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf);
    DEBUG_PRINTF("b %s\n", buf);

-    const SuperVector<S> low4bits = SuperVector<S>::set1u_16x8(0xf);
+    const SuperVector<S> low4bits = SuperVector<S>::dup_u8(0xf);
    const SuperVector<S> wide_mask_lo(mask_lo);
    const SuperVector<S> wide_mask_hi(mask_hi);

@@ -316,7 +312,7 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi,
    DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf);
    DEBUG_PRINTF("b %s\n", buf);

-    const SuperVector<S> low4bits = SuperVector<S>::set1u_16x8(0xf);
+    const SuperVector<S> low4bits = SuperVector<S>::dup_u8(0xf);
    const SuperVector<S> wide_mask1_lo(mask1_lo);
    const SuperVector<S> wide_mask1_hi(mask1_hi);
    const SuperVector<S> wide_mask2_lo(mask2_lo);
--- a/src/nfa/truffle_simd.hpp
+++ b/src/nfa/truffle_simd.hpp
@@ -38,7 +38,7 @@
 #include "util/bitutils.h"
 #include "util/unaligned.h"

-#include "util/simd/types.hpp"
+#include "util/supervector/supervector.hpp"


 template <uint16_t S>
@@ -115,18 +115,18 @@ static really_inline
 typename SuperVector<S>::movemask_type block(SuperVector<S> shuf_mask_lo_highclear, SuperVector<S> shuf_mask_lo_highset,
            SuperVector<S> v){

-    SuperVector<S> highconst = SuperVector<S>::set1_16x8(0x80);           
-    print_m128_16x8("highconst", highconst.u.v128[0]);
+    SuperVector<S> highconst = SuperVector<S>::dup_u8(0x80);
+    printv_u8("highconst", highconst);
    
-    SuperVector<S> shuf_mask_hi = SuperVector<S>::set1_2x64(0x8040201008040201);
-    print_m128_2x64("shuf_mask_hi", shuf_mask_hi.u.v128[0]);
+    SuperVector<S> shuf_mask_hi = SuperVector<S>::dup_u64(0x8040201008040201);
+    printv_u64("shuf_mask_hi", shuf_mask_hi);
    
    SuperVector<S> shuf1 = shuf_mask_lo_highclear.pshufb(v);
    SuperVector<S> t1 = v ^ highconst;
    SuperVector<S> shuf2 = shuf_mask_lo_highset.pshufb(t1);
    SuperVector<S> t2 = highconst.opandnot(v.rshift64(4));
    SuperVector<S> shuf3 = shuf_mask_hi.pshufb(t2);
-    SuperVector<S> tmp = shuf3 & (shuf1 | shuf2);
+    SuperVector<S> tmp = (shuf1 | shuf2) & shuf3;

    return tmp.eqmask(SuperVector<S>::Zeroes());
 }