diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index e7f3f6c9..09850c00 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -63,7 +63,7 @@ static really_inline const u8 *fwdBlock(SuperVector mask_lo, SuperVector mask_hi, SuperVector chars, const u8 *buf) { SuperVector v = blockSingleMask(mask_lo, mask_hi, chars); - return firstMatch(buf, v); + return first_zero_match_inverted(buf, v); } template @@ -71,7 +71,7 @@ static really_inline const u8 *revBlock(SuperVector mask_lo, SuperVector mask_hi, SuperVector chars, const u8 *buf) { SuperVector v = blockSingleMask(mask_lo, mask_hi, chars); - return lastMatch(buf, v); + return last_zero_match_inverted(buf, v); } template @@ -80,7 +80,7 @@ const u8 *fwdBlockDouble(SuperVector mask1_lo, SuperVector mask1_hi, Super SuperVector mask = blockDoubleMask(mask1_lo, mask1_hi, mask2_lo, mask2_hi, chars); - return firstMatch(buf, mask); + return first_zero_match_inverted(buf, mask); } template diff --git a/src/nfa/truffle_simd.hpp b/src/nfa/truffle_simd.hpp index 8d61722b..13a5e787 100644 --- a/src/nfa/truffle_simd.hpp +++ b/src/nfa/truffle_simd.hpp @@ -56,7 +56,7 @@ static really_inline const u8 *fwdBlock(SuperVector shuf_mask_lo_highclear, SuperVector shuf_mask_lo_highset, SuperVector chars, const u8 *buf) { SuperVector res = blockSingleMask(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - return firstMatch(buf, res); + return first_zero_match_inverted(buf, res); } template @@ -120,7 +120,7 @@ static really_inline const u8 *revBlock(SuperVector shuf_mask_lo_highclear, SuperVector shuf_mask_lo_highset, SuperVector v, const u8 *buf) { SuperVector res = blockSingleMask(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return lastMatch(buf, res); + return last_zero_match_inverted(buf, res); } template diff --git a/src/nfa/x86/shufti.hpp b/src/nfa/x86/shufti.hpp index 79ef7481..6fb34b2f 100644 --- a/src/nfa/x86/shufti.hpp +++ b/src/nfa/x86/shufti.hpp @@ -31,12 +31,6 @@ * \brief Shufti: character class acceleration. */ -#ifndef SHUFTI_SIMD_X86_HPP -#define SHUFTI_SIMD_X86_HPP - -#include "util/supervector/supervector.hpp" -#include "util/match.hpp" - template static really_inline const SuperVector blockSingleMask(SuperVector mask_lo, SuperVector mask_hi, SuperVector chars) { @@ -44,12 +38,10 @@ const SuperVector blockSingleMask(SuperVector mask_lo, SuperVector mask SuperVector c_lo = chars & low4bits; SuperVector c_hi = chars.template vshr_64_imm<4>() & low4bits; - c_lo = mask_lo.template pshufb(c_lo); - c_hi = mask_hi.template pshufb(c_hi); + c_lo = mask_lo.pshufb(c_lo); + c_hi = mask_hi.pshufb(c_hi); - SuperVector c = c_lo & c_hi; - - return c.eq(SuperVector::Zeroes()); + return (c_lo & c_hi).eq(SuperVector::Zeroes()); } template @@ -80,5 +72,3 @@ SuperVector blockDoubleMask(SuperVector mask1_lo, SuperVector mask1_hi, return c.eq(SuperVector::Ones()); } - -#endif // SHUFTI_SIMD_X86_HPP diff --git a/src/util/arch/arm/match.hpp b/src/util/arch/arm/match.hpp index e7f757bd..ba5f797f 100644 --- a/src/util/arch/arm/match.hpp +++ b/src/util/arch/arm/match.hpp @@ -29,7 +29,44 @@ template <> really_really_inline -const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> mask) { +const u8 *first_non_zero_match<16>(const u8 *buf, SuperVector<16> mask) { + uint32x4_t m = mask.u.u32x4[0]; + uint64_t vmax = vgetq_lane_u64 (vreinterpretq_u64_u32 (vpmaxq_u32(m, m)), 0); + if (vmax != 0) { + typename SuperVector<16>::movemask_type z = mask.movemask(); + DEBUG_PRINTF("z %08x\n", z); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + u32 pos = ctz32(z & 0xffff); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 16); + DEBUG_PRINTF("buf + pos %p\n", buf + pos); + return buf + pos; + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *last_non_zero_match<16>(const u8 *buf, SuperVector<16> mask) { + uint32x4_t m = mask.u.u32x4[0]; + uint64_t vmax = vgetq_lane_u64 (vreinterpretq_u64_u32 (vpmaxq_u32(m, m)), 0); + if (vmax != 0) { + typename SuperVector<16>::movemask_type z = mask.movemask(); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + u32 pos = clz32(z & 0xffff); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos >= 16 && pos < 32); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *first_zero_match_inverted<16>(const u8 *buf, SuperVector<16> mask) { uint32x4_t m = mask.u.u32x4[0]; uint64_t vmax = vgetq_lane_u64 (vreinterpretq_u64_u32 (vpmaxq_u32(m, m)), 0); if (vmax != 0) { @@ -48,7 +85,7 @@ const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> mask) { template <> really_really_inline -const u8 *lastMatch<16>(const u8 *buf, SuperVector<16> mask) { +const u8 *last_zero_match_inverted<16>(const u8 *buf, SuperVector<16> mask) { uint32x4_t m = mask.u.u32x4[0]; uint64_t vmax = vgetq_lane_u64 (vreinterpretq_u64_u32 (vpmaxq_u32(m, m)), 0); if (vmax != 0) { diff --git a/src/util/arch/x86/match.hpp b/src/util/arch/x86/match.hpp index 159f7355..26283ca7 100644 --- a/src/util/arch/x86/match.hpp +++ b/src/util/arch/x86/match.hpp @@ -29,7 +29,98 @@ template <> really_really_inline -const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> v) { +const u8 *first_non_zero_match<16>(const u8 *buf, SuperVector<16> v) { + SuperVector<16>::movemask_type z = v.movemask(); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + if (unlikely(z)) { + u32 pos = ctz32(z); + DEBUG_PRINTF("~z %08x\n", ~z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 16); + return buf + pos; + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *first_non_zero_match<32>(const u8 *buf, SuperVector<32> v) { + SuperVector<32>::movemask_type z = v.movemask(); + DEBUG_PRINTF("z 0x%08x\n", z); + if (unlikely(z)) { + u32 pos = ctz32(z); + assert(pos < 32); + DEBUG_PRINTF("match @ pos %u\n", pos); + return buf + pos; + } else { + return NULL; // no match + } +} +template <> +really_really_inline +const u8 *first_non_zero_match<64>(const u8 *buf, SuperVector<64>v) { + SuperVector<64>::movemask_type z = v.movemask(); + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z)) { + u32 pos = ctz64(z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 64); + return buf + pos; + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *last_non_zero_match<16>(const u8 *buf, SuperVector<16> v) { + SuperVector<16>::movemask_type z = v.movemask(); + DEBUG_PRINTF("buf %p z %08x \n", buf, z); + DEBUG_PRINTF("z %08x\n", z); + if (unlikely(z)) { + u32 pos = clz32(z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos >= 16 && pos < 32); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *last_non_zero_match<32>(const u8 *buf, SuperVector<32> v) { + SuperVector<32>::movemask_type z = v.movemask(); + DEBUG_PRINTF("z 0x%08x\n", z); + if (unlikely(z)) { + u32 pos = clz32(z); + assert(pos < 32); + DEBUG_PRINTF("match @ pos %u\n", pos); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} +template <> +really_really_inline +const u8 *last_non_zero_match<64>(const u8 *buf, SuperVector<64>v) { + SuperVector<64>::movemask_type z = v.movemask(); + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z)) { + u32 pos = clz64(z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 64); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} + +template <> +really_really_inline +const u8 *first_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v) { SuperVector<16>::movemask_type z = v.movemask(); DEBUG_PRINTF("buf %p z %08x \n", buf, z); DEBUG_PRINTF("z %08x\n", z); @@ -46,7 +137,7 @@ const u8 *firstMatch<16>(const u8 *buf, SuperVector<16> v) { template <> really_really_inline -const u8 *firstMatch<32>(const u8 *buf, SuperVector<32> v) { +const u8 *first_zero_match_inverted<32>(const u8 *buf, SuperVector<32> v) { SuperVector<32>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%08x\n", z); if (unlikely(z != 0xffffffff)) { @@ -60,7 +151,7 @@ const u8 *firstMatch<32>(const u8 *buf, SuperVector<32> v) { } template <> really_really_inline -const u8 *firstMatch<64>(const u8 *buf, SuperVector<64>v) { +const u8 *first_zero_match_inverted<64>(const u8 *buf, SuperVector<64>v) { SuperVector<64>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%016llx\n", z); if (unlikely(z != ~0ULL)) { @@ -75,7 +166,7 @@ const u8 *firstMatch<64>(const u8 *buf, SuperVector<64>v) { template <> really_really_inline -const u8 *lastMatch<16>(const u8 *buf, SuperVector<16> v) { +const u8 *last_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v) { SuperVector<16>::movemask_type z = v.movemask(); DEBUG_PRINTF("buf %p z %08x \n", buf, z); DEBUG_PRINTF("z %08x\n", z); @@ -92,7 +183,7 @@ const u8 *lastMatch<16>(const u8 *buf, SuperVector<16> v) { template<> really_really_inline -const u8 *lastMatch<32>(const u8 *buf, SuperVector<32> v) { +const u8 *last_zero_match_inverted<32>(const u8 *buf, SuperVector<32> v) { SuperVector<32>::movemask_type z = v.movemask(); if (unlikely(z != 0xffffffff)) { u32 pos = clz32(~z); @@ -106,7 +197,7 @@ const u8 *lastMatch<32>(const u8 *buf, SuperVector<32> v) { template <> really_really_inline -const u8 *lastMatch<64>(const u8 *buf, SuperVector<64> v) { +const u8 *last_zero_match_inverted<64>(const u8 *buf, SuperVector<64> v) { SuperVector<64>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%016llx\n", z); if (unlikely(z != ~0ULL)) { diff --git a/src/util/match.hpp b/src/util/match.hpp index 9331d1f8..9b3c8fb9 100644 --- a/src/util/match.hpp +++ b/src/util/match.hpp @@ -38,10 +38,16 @@ #include "util/supervector/supervector.hpp" template -const u8 *firstMatch(const u8 *buf, SuperVector v); +const u8 *first_non_zero_match(const u8 *buf, SuperVector v); template -const u8 *lastMatch(const u8 *buf, SuperVector v); +const u8 *last_non_zero_match(const u8 *buf, SuperVector v); + +template +const u8 *first_zero_match_inverted(const u8 *buf, SuperVector v); + +template +const u8 *last_zero_match_inverted(const u8 *buf, SuperVector v); #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/arch/x86/match.hpp"