From b67cd7dfd09d1b3bb45eafdac29f7c56727b4d34 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Tue, 20 Jul 2021 14:33:03 +0300 Subject: [PATCH] use rshift128() instead of vector-wide right shift --- src/nfa/shufti_simd.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index 9abbf325..2f18e8d8 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -221,6 +221,7 @@ template static really_inline const u8 *fwdBlockDouble(SuperVector mask1_lo, SuperVector mask1_hi, SuperVector mask2_lo, SuperVector mask2_hi, SuperVector chars, const SuperVector low4bits, const u8 *buf) { + SuperVector chars_lo = chars & low4bits; SuperVector chars_hi = chars.rshift64(4) & low4bits; SuperVector c1_lo = mask1_lo.pshufb(chars_lo); @@ -230,7 +231,7 @@ const u8 *fwdBlockDouble(SuperVector mask1_lo, SuperVector mask1_hi, Super SuperVector c2_lo = mask2_lo.pshufb(chars_lo); SuperVector c2_hi = mask2_hi.pshufb(chars_hi); SuperVector t2 = c2_lo | c2_hi; - SuperVector t = t1 | (t2 >> 1); + SuperVector t = t1 | (t2.rshift128(1)); typename SuperVector::movemask_type z = t.eqmask(SuperVector::Ones()); DEBUG_PRINTF(" z: 0x%016llx\n", (u64a)z); @@ -264,6 +265,7 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, if (d1 != d) { SuperVector chars = SuperVector::loadu(d); rv = fwdBlockDouble(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, chars, low4bits, d); + DEBUG_PRINTF("rv %p \n", rv); if (rv) return rv; d = d1; }