diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index bc9916b5..46ad3d36 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -212,7 +212,7 @@ const u8 *rshuftiExecReal(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *b rv = shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, d); // rv = shortShufti(wide_mask_lo, wide_mask_hi, buf_end - S, buf_end, low4bits); DEBUG_PRINTF("rv %p \n", rv); - if (rv != d - 1) return rv; + if (rv) return rv; } return buf - 1; diff --git a/src/nfa/truffle_simd.hpp b/src/nfa/truffle_simd.hpp index 21056337..0d57650b 100644 --- a/src/nfa/truffle_simd.hpp +++ b/src/nfa/truffle_simd.hpp @@ -48,10 +48,7 @@ typename SuperVector::movemask_type block(SuperVector shuf_mask_lo_highcle SuperVector v){ SuperVector highconst = SuperVector::dup_u8(0x80); - printv_u8("highconst", highconst); - SuperVector shuf_mask_hi = SuperVector::dup_u64(0x8040201008040201); - printv_u64("shuf_mask_hi", shuf_mask_hi); SuperVector shuf1 = shuf_mask_lo_highclear.pshufb(v); SuperVector t1 = v ^ highconst; @@ -68,7 +65,9 @@ static really_inline const u8 *truffleMini(SuperVector shuf_mask_lo_highclear const u8 *buf, const u8 *buf_end){ uintptr_t len = buf_end - buf; assert(len < 16); - SuperVector chars = SuperVector::loadu(buf); + + SuperVector chars = SuperVector::Zeroes(); + memcpy(&chars.u.u8[0], buf, len); u32 mask = (0xffff >> (16 - len)) ^ 0xffff; typename SuperVector::movemask_type z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); @@ -81,7 +80,6 @@ static really_inline const u8 *truffleMini(SuperVector shuf_mask_lo_highclear } } - template static really_inline const u8 *fwdBlock(SuperVector shuf_mask_lo_highclear, SuperVector shuf_mask_lo_highset, SuperVector v,