diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index 9abbf325..2f18e8d8 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -221,6 +221,7 @@ template static really_inline const u8 *fwdBlockDouble(SuperVector mask1_lo, SuperVector mask1_hi, SuperVector mask2_lo, SuperVector mask2_hi, SuperVector chars, const SuperVector low4bits, const u8 *buf) { + SuperVector chars_lo = chars & low4bits; SuperVector chars_hi = chars.rshift64(4) & low4bits; SuperVector c1_lo = mask1_lo.pshufb(chars_lo); @@ -230,7 +231,7 @@ const u8 *fwdBlockDouble(SuperVector mask1_lo, SuperVector mask1_hi, Super SuperVector c2_lo = mask2_lo.pshufb(chars_lo); SuperVector c2_hi = mask2_hi.pshufb(chars_hi); SuperVector t2 = c2_lo | c2_hi; - SuperVector t = t1 | (t2 >> 1); + SuperVector t = t1 | (t2.rshift128(1)); typename SuperVector::movemask_type z = t.eqmask(SuperVector::Ones()); DEBUG_PRINTF(" z: 0x%016llx\n", (u64a)z); @@ -264,6 +265,7 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, if (d1 != d) { SuperVector chars = SuperVector::loadu(d); rv = fwdBlockDouble(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, chars, low4bits, d); + DEBUG_PRINTF("rv %p \n", rv); if (rv) return rv; d = d1; }