diff --git a/src/nfa/vermicelli_simd.cpp b/src/nfa/vermicelli_simd.cpp index c2215651..e8b7caaf 100644 --- a/src/nfa/vermicelli_simd.cpp +++ b/src/nfa/vermicelli_simd.cpp @@ -41,42 +41,46 @@ template static really_inline -const u8 *vermicelliBlock(SuperVector data, SuperVector chars, SuperVector casemask, const u8 *buf) { +const u8 *vermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { SuperVector mask = chars.eq(casemask & data); - return first_non_zero_match(buf, mask); + return first_non_zero_match(buf, mask, len); } template static really_inline -const u8 *vermicelliBlockNeg(SuperVector data, SuperVector chars, SuperVector casemask, const u8 *buf) { +const u8 *vermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { SuperVector mask = chars.eq(casemask & data); - return first_zero_match_inverted(buf, mask); + return first_zero_match_inverted(buf, mask, len); } template static really_inline -const u8 *rvermicelliBlock(SuperVector data, SuperVector chars, SuperVector casemask, const u8 *buf) { +const u8 *rvermicelliBlock(SuperVector const data, SuperVector const chars, SuperVector const casemask, u8 const *buf, u16 const len) { SuperVector mask = chars.eq(casemask & data); - return last_non_zero_match(buf, mask); + return last_non_zero_match(buf, mask, len); } template static really_inline -const u8 *rvermicelliBlockNeg(SuperVector data, SuperVector chars, SuperVector casemask, const u8 *buf) { +const u8 *rvermicelliBlockNeg(SuperVector const data, SuperVector const chars, SuperVector const casemask, const u8 *buf, u16 const len) { + data.print8("data"); + chars.print8("chars"); + casemask.print8("casemask"); SuperVector mask = chars.eq(casemask & data); - return last_zero_match_inverted(buf, mask); + mask.print8("mask"); + return last_zero_match_inverted(buf, mask, len); } template static really_inline -const u8 *vermicelliDoubleBlock(SuperVector data, SuperVector chars1, SuperVector chars2, SuperVector casemask, - u8 const c1, u8 const c2, u8 const casechar, const u8 *buf) { +const u8 *vermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { SuperVector v = casemask & data; SuperVector mask1 = chars1.eq(v); @@ -88,13 +92,13 @@ const u8 *vermicelliDoubleBlock(SuperVector data, SuperVector chars1, Supe DEBUG_PRINTF("partial = %d\n", partial_match); if (partial_match) return buf - 1; - return first_non_zero_match(buf, mask); + return first_non_zero_match(buf, mask, len); } template static really_inline -const u8 *rvermicelliDoubleBlock(SuperVector data, SuperVector chars1, SuperVector chars2, SuperVector casemask, - u8 const c1, u8 const c2, u8 const casechar, const u8 *buf) { +const u8 *rvermicelliDoubleBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, SuperVector const casemask, + u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) { SuperVector v = casemask & data; SuperVector mask1 = chars1.eq(v); @@ -108,14 +112,14 @@ const u8 *rvermicelliDoubleBlock(SuperVector data, SuperVector chars1, Sup mask = mask | (SuperVector::Ones() >> (S-1)); } - return last_non_zero_match(buf, mask); + return last_non_zero_match(buf, mask, len); } template static really_inline -const u8 *vermicelliDoubleMaskedBlock(SuperVector data, SuperVector chars1, SuperVector chars2, - SuperVector mask1, SuperVector mask2, - u8 const c1, u8 const c2, u8 const m1, u8 const m2, const u8 *buf) { +const u8 *vermicelliDoubleMaskedBlock(SuperVector const data, SuperVector const chars1, SuperVector const chars2, + SuperVector const mask1, SuperVector const mask2, + u8 const c1, u8 const c2, u8 const m1, u8 const m2, u8 const *buf, u16 const len) { SuperVector v1 = chars1.eq(data & mask1); SuperVector v2 = chars2.eq(data & mask2); @@ -126,11 +130,11 @@ const u8 *vermicelliDoubleMaskedBlock(SuperVector data, SuperVector chars1 DEBUG_PRINTF("partial = %d\n", partial_match); if (partial_match) return buf - 1; - return first_non_zero_match(buf, mask); + return first_non_zero_match(buf, mask, len); } template -static const u8 *vermicelliExecReal(SuperVector const chars, SuperVector const casemask, const u8 *buf, const u8 *buf_end) { +static const u8 *vermicelliExecReal(SuperVector const chars, SuperVector const casemask, u8 const *buf, u8 const *buf_end) { assert(buf && buf_end); assert(buf < buf_end); DEBUG_PRINTF("verm %p len %zu\n", buf, buf_end - buf); @@ -149,17 +153,18 @@ static const u8 *vermicelliExecReal(SuperVector const chars, SuperVector c // Reach vector aligned boundaries DEBUG_PRINTF("until aligned %p \n", ROUNDUP_PTR(d, S)); if (!ISALIGNED_N(d, S)) { + u8 const *d1 = ROUNDUP_PTR(d, S); SuperVector data = SuperVector::loadu(d); - rv = vermicelliBlock(data, chars, casemask, d); + rv = vermicelliBlock(data, chars, casemask, d, S); if (rv) return rv; - d = ROUNDUP_PTR(d, S); + d = d1; } while(d + S <= buf_end) { __builtin_prefetch(d + 64); DEBUG_PRINTF("d %p \n", d); SuperVector data = SuperVector::load(d); - rv = vermicelliBlock(data, chars, casemask, d); + rv = vermicelliBlock(data, chars, casemask, d, S); if (rv) return rv; d += S; } @@ -170,7 +175,7 @@ static const u8 *vermicelliExecReal(SuperVector const chars, SuperVector c if (d != buf_end) { SuperVector data = SuperVector::loadu_maskz(d, buf_end - d); - rv = vermicelliBlock(data, chars, casemask, d); + rv = vermicelliBlock(data, chars, casemask, d, buf_end - d); DEBUG_PRINTF("rv %p \n", rv); if (rv && rv < buf_end) return rv; } @@ -198,17 +203,18 @@ static const u8 *nvermicelliExecReal(SuperVector const chars, SuperVector // Reach vector aligned boundaries DEBUG_PRINTF("until aligned %p \n", ROUNDUP_PTR(d, S)); if (!ISALIGNED_N(d, S)) { + u8 const *d1 = ROUNDUP_PTR(d, S); SuperVector data = SuperVector::loadu(d); - rv = vermicelliBlockNeg(data, chars, casemask, d); + rv = vermicelliBlockNeg(data, chars, casemask, d, S); if (rv) return rv; - d = ROUNDUP_PTR(d, S); + d = d1; } while(d + S <= buf_end) { __builtin_prefetch(d + 64); DEBUG_PRINTF("d %p \n", d); SuperVector data = SuperVector::load(d); - rv = vermicelliBlockNeg(data, chars, casemask, d); + rv = vermicelliBlockNeg(data, chars, casemask, d, S); if (rv) return rv; d += S; } @@ -219,7 +225,7 @@ static const u8 *nvermicelliExecReal(SuperVector const chars, SuperVector if (d != buf_end) { SuperVector data = SuperVector::loadu_maskz(d, buf_end - d); - rv = vermicelliBlockNeg(data, chars, casemask, d); + rv = vermicelliBlockNeg(data, chars, casemask, d, buf_end - d); DEBUG_PRINTF("rv %p \n", rv); if (rv && rv < buf_end) return rv; } @@ -249,11 +255,12 @@ const u8 *rvermicelliExecReal(SuperVector const chars, SuperVector const c // Reach vector aligned boundaries DEBUG_PRINTF("until aligned %p \n", ROUNDDOWN_PTR(d, S)); if (!ISALIGNED_N(d, S)) { + u8 const *d1 = ROUNDDOWN_PTR(d, S); SuperVector data = SuperVector::loadu(d - S); - rv = rvermicelliBlock(data, chars, casemask, d - S); + rv = rvermicelliBlock(data, chars, casemask, d - S, S); DEBUG_PRINTF("rv %p \n", rv); if (rv) return rv; - d = ROUNDDOWN_PTR(d, S); + d = d1; } while (d - S >= buf) { @@ -263,7 +270,7 @@ const u8 *rvermicelliExecReal(SuperVector const chars, SuperVector const c d -= S; SuperVector data = SuperVector::load(d); - rv = rvermicelliBlock(data, chars, casemask, d); + rv = rvermicelliBlock(data, chars, casemask, d, S); if (rv) return rv; } } @@ -273,7 +280,7 @@ const u8 *rvermicelliExecReal(SuperVector const chars, SuperVector const c if (d != buf) { SuperVector data = SuperVector::loadu(buf); - rv = rvermicelliBlock(data, chars, casemask, buf); + rv = rvermicelliBlock(data, chars, casemask, buf, d - buf); DEBUG_PRINTF("rv %p \n", rv); if (rv && rv < buf_end) return rv; } @@ -303,11 +310,12 @@ const u8 *rnvermicelliExecReal(SuperVector const chars, SuperVector const // Reach vector aligned boundaries DEBUG_PRINTF("until aligned %p \n", ROUNDDOWN_PTR(d, S)); if (!ISALIGNED_N(d, S)) { + u8 const *d1 = ROUNDDOWN_PTR(d, S); SuperVector data = SuperVector::loadu(d - S); - rv = rvermicelliBlockNeg(data, chars, casemask, d - S); + rv = rvermicelliBlockNeg(data, chars, casemask, d - S, S); DEBUG_PRINTF("rv %p \n", rv); if (rv) return rv; - d = ROUNDDOWN_PTR(d, S); + d = d1; } while (d - S >= buf) { @@ -317,7 +325,7 @@ const u8 *rnvermicelliExecReal(SuperVector const chars, SuperVector const d -= S; SuperVector data = SuperVector::load(d); - rv = rvermicelliBlockNeg(data, chars, casemask, d); + rv = rvermicelliBlockNeg(data, chars, casemask, d, S); if (rv) return rv; } } @@ -327,7 +335,7 @@ const u8 *rnvermicelliExecReal(SuperVector const chars, SuperVector const if (d != buf) { SuperVector data = SuperVector::loadu(buf); - rv = rvermicelliBlockNeg(data, chars, casemask, buf); + rv = rvermicelliBlockNeg(data, chars, casemask, buf, d - buf); DEBUG_PRINTF("rv %p \n", rv); if (rv && rv < buf_end) return rv; } @@ -360,17 +368,18 @@ static const u8 *vermicelliDoubleExecReal(u8 const c1, u8 const c2, SuperVector< // Reach vector aligned boundaries DEBUG_PRINTF("until aligned %p \n", ROUNDUP_PTR(d, S)); if (!ISALIGNED_N(d, S)) { + u8 const *d1 = ROUNDUP_PTR(d, S); SuperVector data = SuperVector::loadu(d); - rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d); + rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, S); if (rv) return rv; - d = ROUNDUP_PTR(d, S); + d = d1; } while(d + S <= buf_end) { __builtin_prefetch(d + 64); DEBUG_PRINTF("d %p \n", d); SuperVector data = SuperVector::load(d); - rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d); + rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, S); if (rv) return rv; d += S; } @@ -381,7 +390,7 @@ static const u8 *vermicelliDoubleExecReal(u8 const c1, u8 const c2, SuperVector< if (d != buf_end) { SuperVector data = SuperVector::loadu_maskz(d, buf_end - d); - rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d); + rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, buf_end - d); DEBUG_PRINTF("rv %p \n", rv); if (rv && rv < buf_end) return rv; } @@ -424,11 +433,12 @@ const u8 *rvermicelliDoubleExecReal(char c1, char c2, SuperVector const casem // Reach vector aligned boundaries DEBUG_PRINTF("until aligned %p \n", ROUNDDOWN_PTR(d, S)); if (!ISALIGNED_N(d, S)) { + u8 const *d1 = ROUNDDOWN_PTR(d, S); SuperVector data = SuperVector::loadu(d - S); - rv = rvermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d - S); + rv = rvermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d - S, S); DEBUG_PRINTF("rv %p \n", rv); if (rv && rv < buf_end) return rv; - d = ROUNDDOWN_PTR(d, S); + d = d1; } while (d - S >= buf) { @@ -438,7 +448,7 @@ const u8 *rvermicelliDoubleExecReal(char c1, char c2, SuperVector const casem d -= S; SuperVector data = SuperVector::load(d); - rv = rvermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d); + rv = rvermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, S); if (rv) return rv; } } @@ -448,7 +458,7 @@ const u8 *rvermicelliDoubleExecReal(char c1, char c2, SuperVector const casem if (d != buf) { SuperVector data = SuperVector::loadu(buf); - rv = rvermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, buf); + rv = rvermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, buf, d - buf); DEBUG_PRINTF("rv %p \n", rv); if (rv && rv < buf_end) return rv; } @@ -482,17 +492,18 @@ static const u8 *vermicelliDoubleMaskedExecReal(u8 const c1, u8 const c2, u8 con // Reach vector aligned boundaries DEBUG_PRINTF("until aligned %p \n", ROUNDUP_PTR(d, S)); if (!ISALIGNED_N(d, S)) { + u8 const *d1 = ROUNDUP_PTR(d, S); SuperVector data = SuperVector::loadu(d); - rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d); + rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d, S); if (rv) return rv; - d = ROUNDUP_PTR(d, S); + d = d1; } while(d + S <= buf_end) { __builtin_prefetch(d + 64); DEBUG_PRINTF("d %p \n", d); SuperVector data = SuperVector::load(d); - rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d); + rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d, S); if (rv) return rv; d += S; } @@ -503,7 +514,7 @@ static const u8 *vermicelliDoubleMaskedExecReal(u8 const c1, u8 const c2, u8 con if (d != buf_end) { SuperVector data = SuperVector::loadu_maskz(d, buf_end - d); - rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d); + rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d, buf_end - d); DEBUG_PRINTF("rv %p \n", rv); if (rv && rv < buf_end) return rv; } @@ -591,4 +602,4 @@ extern "C" const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char assert(buf < buf_end); return vermicelliDoubleMaskedExecReal(c1, c2, m1, m2, buf, buf_end); -} \ No newline at end of file +} diff --git a/src/util/arch/x86/match.hpp b/src/util/arch/x86/match.hpp index 26283ca7..cbf4ab6b 100644 --- a/src/util/arch/x86/match.hpp +++ b/src/util/arch/x86/match.hpp @@ -29,7 +29,7 @@ template <> really_really_inline -const u8 *first_non_zero_match<16>(const u8 *buf, SuperVector<16> v) { +const u8 *first_non_zero_match<16>(const u8 *buf, SuperVector<16> v, u16 const UNUSED len) { SuperVector<16>::movemask_type z = v.movemask(); DEBUG_PRINTF("buf %p z %08x \n", buf, z); DEBUG_PRINTF("z %08x\n", z); @@ -46,7 +46,7 @@ const u8 *first_non_zero_match<16>(const u8 *buf, SuperVector<16> v) { template <> really_really_inline -const u8 *first_non_zero_match<32>(const u8 *buf, SuperVector<32> v) { +const u8 *first_non_zero_match<32>(const u8 *buf, SuperVector<32> v, u16 const UNUSED len) { SuperVector<32>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%08x\n", z); if (unlikely(z)) { @@ -60,9 +60,13 @@ const u8 *first_non_zero_match<32>(const u8 *buf, SuperVector<32> v) { } template <> really_really_inline -const u8 *first_non_zero_match<64>(const u8 *buf, SuperVector<64>v) { +const u8 *first_non_zero_match<64>(const u8 *buf, SuperVector<64>v, u16 const len) { SuperVector<64>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%016llx\n", z); + u64a mask = (~0ULL) >> (64 - len); + DEBUG_PRINTF("mask %016llx\n", mask); + z &= mask; + DEBUG_PRINTF("z 0x%016llx\n", z); if (unlikely(z)) { u32 pos = ctz64(z); DEBUG_PRINTF("match @ pos %u\n", pos); @@ -75,7 +79,7 @@ const u8 *first_non_zero_match<64>(const u8 *buf, SuperVector<64>v) { template <> really_really_inline -const u8 *last_non_zero_match<16>(const u8 *buf, SuperVector<16> v) { +const u8 *last_non_zero_match<16>(const u8 *buf, SuperVector<16> v, u16 const UNUSED len) { SuperVector<16>::movemask_type z = v.movemask(); DEBUG_PRINTF("buf %p z %08x \n", buf, z); DEBUG_PRINTF("z %08x\n", z); @@ -91,7 +95,7 @@ const u8 *last_non_zero_match<16>(const u8 *buf, SuperVector<16> v) { template <> really_really_inline -const u8 *last_non_zero_match<32>(const u8 *buf, SuperVector<32> v) { +const u8 *last_non_zero_match<32>(const u8 *buf, SuperVector<32> v, u16 const UNUSED len) { SuperVector<32>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%08x\n", z); if (unlikely(z)) { @@ -105,14 +109,18 @@ const u8 *last_non_zero_match<32>(const u8 *buf, SuperVector<32> v) { } template <> really_really_inline -const u8 *last_non_zero_match<64>(const u8 *buf, SuperVector<64>v) { +const u8 *last_non_zero_match<64>(const u8 *buf, SuperVector<64>v, u16 const len) { SuperVector<64>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%016llx\n", z); + u64a mask = (~0ULL) >> (64 - len); + DEBUG_PRINTF("mask %016llx\n", mask); + z &= mask; + DEBUG_PRINTF("z 0x%016llx\n", z); if (unlikely(z)) { u32 pos = clz64(z); DEBUG_PRINTF("match @ pos %u\n", pos); assert(pos < 64); - return buf + (31 - pos); + return buf + (63 - pos); } else { return NULL; // no match } @@ -120,7 +128,7 @@ const u8 *last_non_zero_match<64>(const u8 *buf, SuperVector<64>v) { template <> really_really_inline -const u8 *first_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v) { +const u8 *first_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v, u16 const UNUSED len) { SuperVector<16>::movemask_type z = v.movemask(); DEBUG_PRINTF("buf %p z %08x \n", buf, z); DEBUG_PRINTF("z %08x\n", z); @@ -137,7 +145,7 @@ const u8 *first_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v) { template <> really_really_inline -const u8 *first_zero_match_inverted<32>(const u8 *buf, SuperVector<32> v) { +const u8 *first_zero_match_inverted<32>(const u8 *buf, SuperVector<32> v, u16 const UNUSED len) { SuperVector<32>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%08x\n", z); if (unlikely(z != 0xffffffff)) { @@ -151,11 +159,15 @@ const u8 *first_zero_match_inverted<32>(const u8 *buf, SuperVector<32> v) { } template <> really_really_inline -const u8 *first_zero_match_inverted<64>(const u8 *buf, SuperVector<64>v) { +const u8 *first_zero_match_inverted<64>(const u8 *buf, SuperVector<64>v, u16 const len) { SuperVector<64>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%016llx\n", z); - if (unlikely(z != ~0ULL)) { - u32 pos = ctz64(~z); + u64a mask = (~0ULL) >> (64 - len); + DEBUG_PRINTF("mask %016llx\n", mask); + z = ~z & mask; + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z)) { + u32 pos = ctz64(z); DEBUG_PRINTF("match @ pos %u\n", pos); assert(pos < 64); return buf + pos; @@ -166,7 +178,7 @@ const u8 *first_zero_match_inverted<64>(const u8 *buf, SuperVector<64>v) { template <> really_really_inline -const u8 *last_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v) { +const u8 *last_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v, uint16_t UNUSED len ) { SuperVector<16>::movemask_type z = v.movemask(); DEBUG_PRINTF("buf %p z %08x \n", buf, z); DEBUG_PRINTF("z %08x\n", z); @@ -183,10 +195,10 @@ const u8 *last_zero_match_inverted<16>(const u8 *buf, SuperVector<16> v) { template<> really_really_inline -const u8 *last_zero_match_inverted<32>(const u8 *buf, SuperVector<32> v) { +const u8 *last_zero_match_inverted<32>(const u8 *buf, SuperVector<32> v, uint16_t UNUSED len) { SuperVector<32>::movemask_type z = v.movemask(); if (unlikely(z != 0xffffffff)) { - u32 pos = clz32(~z); + u32 pos = clz32(~z & 0xffffffff); DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); assert(pos < 32); return buf + (31 - pos); @@ -197,11 +209,17 @@ const u8 *last_zero_match_inverted<32>(const u8 *buf, SuperVector<32> v) { template <> really_really_inline -const u8 *last_zero_match_inverted<64>(const u8 *buf, SuperVector<64> v) { +const u8 *last_zero_match_inverted<64>(const u8 *buf, SuperVector<64> v, uint16_t len) { + v.print8("v"); SuperVector<64>::movemask_type z = v.movemask(); DEBUG_PRINTF("z 0x%016llx\n", z); - if (unlikely(z != ~0ULL)) { - u32 pos = clz64(~z); + u64a mask = (~0ULL) >> (64 - len); + DEBUG_PRINTF("mask %016llx\n", mask); + z = ~z & mask; + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z)) { + u32 pos = clz64(z); + DEBUG_PRINTF("~z 0x%016llx\n", ~z); DEBUG_PRINTF("match @ pos %u\n", pos); assert(pos < 64); return buf + (63 - pos); diff --git a/src/util/match.hpp b/src/util/match.hpp index 9b3c8fb9..030db9bb 100644 --- a/src/util/match.hpp +++ b/src/util/match.hpp @@ -38,16 +38,16 @@ #include "util/supervector/supervector.hpp" template -const u8 *first_non_zero_match(const u8 *buf, SuperVector v); +const u8 *first_non_zero_match(const u8 *buf, SuperVector v, u16 const len = S); template -const u8 *last_non_zero_match(const u8 *buf, SuperVector v); +const u8 *last_non_zero_match(const u8 *buf, SuperVector v, u16 const len = S); template -const u8 *first_zero_match_inverted(const u8 *buf, SuperVector v); +const u8 *first_zero_match_inverted(const u8 *buf, SuperVector v, u16 const len = S); template -const u8 *last_zero_match_inverted(const u8 *buf, SuperVector v); +const u8 *last_zero_match_inverted(const u8 *buf, SuperVector v, u16 len = S); #if defined(ARCH_IA32) || defined(ARCH_X86_64) #include "util/arch/x86/match.hpp"