Fix all ASAN issues in vectorscan

This commit is contained in:
Danila Kutenin 2022-02-18 17:14:51 +00:00
parent 2819dc3d1b
commit 9af996b936
10 changed files with 262 additions and 75 deletions

View File

@ -58,12 +58,10 @@ hwlm_error_t double_zscan(const struct noodTable *n,const u8 *d, const u8 *buf,
return HWLM_SUCCESS;
}
// The short scan routine. It is used both to scan data up to an
// alignment boundary if needed and to finish off data that the aligned scan
// function can't handle (due to small/unaligned chunk at end)
template<uint16_t S>
static really_inline
hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
SuperVector<S> caseMask, SuperVector<S> mask1,
const struct cb_info *cbi, size_t len, size_t start,
size_t end) {
@ -76,7 +74,36 @@ hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
return HWLM_SUCCESS;
}
SuperVector<S> v = SuperVector<S>::Zeroes();
memcpy(&v.u, d, l);
typename SuperVector<S>::movemask_type mask = SINGLE_LOAD_MASK(l);
v = v & caseMask;
typename SuperVector<S>::movemask_type z = mask & mask1.eqmask(v);
return single_zscan(n, d, buf, z, len, cbi);
}
// The short scan routine. It is used both to scan data up to an
// alignment boundary if needed and to finish off data that the aligned scan
// function can't handle (due to small/unaligned chunk at end)
template<uint16_t S>
static really_inline
hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
SuperVector<S> caseMask, SuperVector<S> mask1,
const struct cb_info *cbi, size_t len, size_t offset,
size_t start,
size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
const size_t l = end - start;
DEBUG_PRINTF("l = %ld\n", l);
assert(l <= 64);
if (!l) {
return HWLM_SUCCESS;
}
size_t buf_off = start - offset;
typename SuperVector<S>::movemask_type mask = SINGLE_LOAD_MASK(l) << buf_off;
SuperVector<S> v = SuperVector<S>::loadu(d) & caseMask;
typename SuperVector<S>::movemask_type z = mask & mask1.eqmask(v);
@ -85,8 +112,8 @@ hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
template<uint16_t S>
static really_inline
hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
SuperVector<S> caseMask, SuperVector<S> mask1, SuperVector<S> mask2, typename SuperVector<S>::movemask_type *lastz1,
hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
SuperVector<S> caseMask, SuperVector<S> mask1, SuperVector<S> mask2,
const struct cb_info *cbi, size_t len, size_t start, size_t end) {
const u8 *d = buf + start;
DEBUG_PRINTF("start %zu end %zu\n", start, end);
@ -95,13 +122,36 @@ hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
if (!l) {
return HWLM_SUCCESS;
}
SuperVector<S> v = SuperVector<S>::loadu(d) & caseMask;
SuperVector<S> v = SuperVector<S>::Zeroes();
memcpy(&v.u, d, l);
v = v & caseMask;
typename SuperVector<S>::movemask_type mask = DOUBLE_LOAD_MASK(l);
typename SuperVector<S>::movemask_type z1 = mask1.eqmask(v);
typename SuperVector<S>::movemask_type z2 = mask2.eqmask(v);
typename SuperVector<S>::movemask_type z = mask & (*lastz1 | z1 << 1) & z2;
*lastz1 = z1 >> (l -1);
typename SuperVector<S>::movemask_type z = mask & (z1 << 1) & z2;
return double_zscan(n, d, buf, z, len, cbi);
}
template<uint16_t S>
static really_inline
hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
SuperVector<S> caseMask, SuperVector<S> mask1, SuperVector<S> mask2,
const struct cb_info *cbi, size_t len, size_t offset, size_t start, size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
const size_t l = end - start;
assert(l <= S);
if (!l) {
return HWLM_SUCCESS;
}
SuperVector<S> v = SuperVector<S>::loadu(d) & caseMask;
size_t buf_off = start - offset;
typename SuperVector<S>::movemask_type mask = DOUBLE_LOAD_MASK(l) << buf_off;
typename SuperVector<S>::movemask_type z1 = mask1.eqmask(v);
typename SuperVector<S>::movemask_type z2 = mask2.eqmask(v);
typename SuperVector<S>::movemask_type z = mask & (z1 << 1) & z2;
return double_zscan(n, d, buf, z, len, cbi);
}
@ -119,11 +169,14 @@ hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf,
const u8 *e = buf + end;
DEBUG_PRINTF("start %p end %p \n", d, e);
assert(d < e);
if (e - d < S) {
return scanSingleShort(n, buf, caseMask, mask1, cbi, len, start, end);
}
if (d + S <= e) {
// peel off first part to cacheline boundary
const u8 *d1 = ROUNDUP_PTR(d, S);
DEBUG_PRINTF("until aligned %p \n", d1);
if (scanSingleUnaligned(n, buf, caseMask, mask1, cbi, len, start, d1 - buf) == HWLM_TERMINATED) {
if (scanSingleUnaligned(n, buf, caseMask, mask1, cbi, len, start, start, d1 - buf) == HWLM_TERMINATED) {
return HWLM_TERMINATED;
}
d = d1;
@ -147,8 +200,12 @@ hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf,
DEBUG_PRINTF("d %p e %p \n", d, e);
// finish off tail
size_t s2End = ROUNDDOWN_PTR(e, S) - buf;
if (s2End == end) {
return HWLM_SUCCESS;
}
return scanSingleUnaligned(n, buf, caseMask, mask1, cbi, len, d - buf, end);
return scanSingleUnaligned(n, buf, caseMask, mask1, cbi, len, end - S, s2End, len);
}
template <uint16_t S>
@ -169,14 +226,17 @@ hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf,
const u8 *e = buf + end;
DEBUG_PRINTF("start %p end %p \n", d, e);
assert(d < e);
if (e - d < S) {
return scanDoubleShort(n, buf, caseMask, mask1, mask2, cbi, len, d - buf, end);
}
if (d + S <= e) {
// peel off first part to cacheline boundary
const u8 *d1 = ROUNDUP_PTR(d, S);
const u8 *d1 = ROUNDUP_PTR(d, S) + 1;
DEBUG_PRINTF("until aligned %p \n", d1);
if (scanDoubleUnaligned(n, buf, caseMask, mask1, mask2, &lastz1, cbi, len, start, d1 - buf) == HWLM_TERMINATED) {
if (scanDoubleUnaligned(n, buf, caseMask, mask1, mask2, cbi, len, start, start, d1 - buf) == HWLM_TERMINATED) {
return HWLM_TERMINATED;
}
d = d1;
d = d1 - 1;
size_t loops = (end - (d - buf)) / S;
DEBUG_PRINTF("loops %ld \n", loops);
@ -196,12 +256,16 @@ hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf,
hwlm_error_t rv = double_zscan(n, d, buf, z, len, cbi);
RETURN_IF_TERMINATED(rv);
}
if (loops == 0) {
d = d1;
}
}
DEBUG_PRINTF("d %p e %p \n", d, e);
// finish off tail
return scanDoubleUnaligned(n, buf, caseMask, mask1, mask2, &lastz1, cbi, len, d - buf, end);
size_t s2End = ROUNDDOWN_PTR(e, S) - buf;
if (s2End == end) {
return HWLM_SUCCESS;
}
return scanDoubleUnaligned(n, buf, caseMask, mask1, mask2, cbi, len, end - S, d - buf, end);
}
// Single-character specialisation, used when keyLen = 1

View File

@ -67,7 +67,7 @@ const u8 *rvermicelliBlockNeg(SuperVector<S> const data, SuperVector<S> const ch
return last_zero_match_inverted<S>(buf, mask, len);
}
template <uint16_t S>
template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
@ -78,14 +78,16 @@ const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const
SuperVector<S> mask = mask1 & (mask2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) return buf - 1;
if (partial_match) {
mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
}
return first_non_zero_match<S>(buf, mask, len);
}
template <uint16_t S>
template <uint16_t S, bool check_partial>
static really_inline
const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
@ -96,7 +98,7 @@ const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const
SuperVector<S> mask = (mask1 << 1)& mask2;
DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) {
mask = mask | (SuperVector<S>::Ones() >> (S-1));
@ -105,7 +107,7 @@ const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const
return last_non_zero_match<S>(buf, mask, len);
}
template <uint16_t S>
template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2,
SuperVector<S> const mask1, SuperVector<S> const mask2,
@ -116,9 +118,11 @@ const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S>
SuperVector<S> mask = v1 & (v2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1));
bool partial_match = (check_partial && ((buf[0] & m2) == c2) && ((buf[-1] & m1) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) return buf - 1;
if (partial_match) {
mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
}
return first_non_zero_match<S>(buf, mask, len);
}

View File

@ -67,7 +67,7 @@ const u8 *rvermicelliBlockNeg(SuperVector<S> const data, SuperVector<S> const ch
return last_zero_match_inverted<S>(buf, mask, len);
}
template <uint16_t S>
template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
@ -78,14 +78,16 @@ const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const
SuperVector<S> mask = mask1 & (mask2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) return buf - 1;
if (partial_match) {
mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
}
return first_non_zero_match<S>(buf, mask, len);
}
template <uint16_t S>
template <uint16_t S, bool check_partial>
static really_inline
const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
@ -96,7 +98,7 @@ const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const
SuperVector<S> mask = (mask1 << 1)& mask2;
DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) {
mask = mask | (SuperVector<S>::Ones() >> (S-1));
@ -105,7 +107,7 @@ const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const
return last_non_zero_match<S>(buf, mask, len);
}
template <uint16_t S>
template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2,
SuperVector<S> const mask1, SuperVector<S> const mask2,
@ -116,9 +118,11 @@ const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S>
SuperVector<S> mask = v1 & (v2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1));
bool partial_match = (check_partial && ((buf[0] & m2) == c2) && ((buf[-1] & m1) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) return buf - 1;
if (partial_match) {
mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
}
return first_non_zero_match<S>(buf, mask, len);
}

View File

@ -128,8 +128,8 @@ const u8 *shuftiExecReal(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *bu
// finish off tail
if (d != buf_end) {
SuperVector<S> chars = SuperVector<S>::loadu_maskz(d, buf_end - d);
rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, d);
SuperVector<S> chars = SuperVector<S>::loadu(buf_end - S);
rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - S);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
@ -240,22 +240,36 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128
// finish off tail
if (d != buf_end) {
SuperVector<S> chars = SuperVector<S>::loadu(d);
rv = fwdBlockDouble(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, chars, d);
SuperVector<S> chars = SuperVector<S>::Zeroes();
const u8 *end_buf;
if (buf_end - buf < S) {
memcpy(&chars.u, buf, buf_end - buf);
end_buf = buf;
} else {
chars = SuperVector<S>::loadu(buf_end - S);
end_buf = buf_end - S;
}
rv = fwdBlockDouble(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, chars, end_buf);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
return buf_end;
}
const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end) {
return shuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
if (buf_end - buf < VECTORSIZE) {
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end);
}
return shuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
}
const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end) {
if (buf_end - buf < VECTORSIZE) {
return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end);
}
return rshuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
}

View File

@ -107,8 +107,16 @@ const u8 *truffleExecReal(m128 &shuf_mask_lo_highclear, m128 shuf_mask_lo_highse
// finish off tail
if (d != buf_end) {
SuperVector<S> chars = SuperVector<S>::loadu_maskz(d, buf_end - d);
rv = fwdBlock(wide_shuf_mask_lo_highclear, wide_shuf_mask_lo_highset, chars, d);
SuperVector<S> chars = SuperVector<S>::Zeroes();
const u8* end_buf;
if (buf_end - buf < S) {
memcpy(&chars.u, buf, buf_end - buf);
end_buf = buf;
} else {
chars = SuperVector<S>::loadu(buf_end - S);
end_buf = buf_end - S;
}
rv = fwdBlock(wide_shuf_mask_lo_highclear, wide_shuf_mask_lo_highset, chars, end_buf);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
@ -171,7 +179,12 @@ const u8 *rtruffleExecReal(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highse
// finish off head
if (d != buf) {
SuperVector<S> chars = SuperVector<S>::loadu(buf);
SuperVector<S> chars = SuperVector<S>::Zeroes();
if (buf_end - buf < S) {
memcpy(&chars.u, buf, buf_end - buf);
} else {
chars = SuperVector<S>::loadu(buf);
}
rv = revBlock(wide_shuf_mask_lo_highclear, wide_shuf_mask_lo_highset, chars, buf);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;

View File

@ -55,17 +55,17 @@ template <uint16_t S>
static really_inline
const u8 *rvermicelliBlockNeg(SuperVector<S> const data, SuperVector<S> const chars, SuperVector<S> const casemask, const u8 *buf, u16 const len);
template <uint16_t S>
template <uint16_t S, bool check_partial = true>
static really_inline
const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len);
template <uint16_t S>
template <uint16_t S, bool check_partial = true>
static really_inline
const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len);
template <uint16_t S>
template <uint16_t S, bool check_partial = true>
static really_inline
const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2,
SuperVector<S> const mask1, SuperVector<S> const mask2,
@ -120,8 +120,8 @@ static const u8 *vermicelliExecReal(SuperVector<S> const chars, SuperVector<S> c
// finish off tail
if (d != buf_end) {
SuperVector<S> data = SuperVector<S>::loadu_maskz(d, buf_end - d);
rv = vermicelliBlock(data, chars, casemask, d, buf_end - d);
SuperVector<S> data = SuperVector<S>::loadu(buf_end - S);
rv = vermicelliBlock(data, chars, casemask, buf_end - S, buf_end - d);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
@ -170,8 +170,8 @@ static const u8 *nvermicelliExecReal(SuperVector<S> const chars, SuperVector<S>
// finish off tail
if (d != buf_end) {
SuperVector<S> data = SuperVector<S>::loadu_maskz(d, buf_end - d);
rv = vermicelliBlockNeg(data, chars, casemask, d, buf_end - d);
SuperVector<S> data = SuperVector<S>::loadu(buf_end - S);
rv = vermicelliBlockNeg(data, chars, casemask, buf_end - S, buf_end - d);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
@ -316,17 +316,17 @@ static const u8 *vermicelliDoubleExecReal(u8 const c1, u8 const c2, SuperVector<
if (!ISALIGNED_N(d, S)) {
u8 const *d1 = ROUNDUP_PTR(d, S);
SuperVector<S> data = SuperVector<S>::loadu(d);
rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, S);
if (rv) return rv;
rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d + S, S);
if (rv) return rv - S;
d = d1;
}
while(d + S <= buf_end) {
while(d + S < buf_end) {
__builtin_prefetch(d + 64);
DEBUG_PRINTF("d %p \n", d);
SuperVector<S> data = SuperVector<S>::load(d);
rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, S);
if (rv) return rv;
rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d + S, S);
if (rv) return rv - S;
d += S;
}
}
@ -335,8 +335,16 @@ static const u8 *vermicelliDoubleExecReal(u8 const c1, u8 const c2, SuperVector<
// finish off tail
if (d != buf_end) {
SuperVector<S> data = SuperVector<S>::loadu_maskz(d, buf_end - d);
rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, buf_end - d);
SuperVector<S> data = SuperVector<S>::Zeroes();
const u8* end_buf;
if (buf_end - buf < S) {
memcpy(&data.u, buf, buf_end - buf);
end_buf = buf;
} else {
data = SuperVector<S>::loadu(buf_end - S);
end_buf = buf_end - S;
}
rv = vermicelliDoubleBlock<S, false>(data, chars1, chars2, casemask, c1, c2, casechar, end_buf, buf_end - d);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
@ -403,8 +411,13 @@ const u8 *rvermicelliDoubleExecReal(char c1, char c2, SuperVector<S> const casem
// finish off head
if (d != buf) {
SuperVector<S> data = SuperVector<S>::loadu(buf);
rv = rvermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, buf, d - buf);
SuperVector<S> data = SuperVector<S>::Zeroes();
if (d - buf < S) {
memcpy(&data.u, buf, d - buf);
} else {
data = SuperVector<S>::loadu(buf);
}
rv = rvermicelliDoubleBlock<S, false>(data, chars1, chars2, casemask, c1, c2, casechar, buf, d - buf);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
@ -440,17 +453,17 @@ static const u8 *vermicelliDoubleMaskedExecReal(u8 const c1, u8 const c2, u8 con
if (!ISALIGNED_N(d, S)) {
u8 const *d1 = ROUNDUP_PTR(d, S);
SuperVector<S> data = SuperVector<S>::loadu(d);
rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d, S);
if (rv) return rv;
rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d + S, S);
if (rv) return rv - S;
d = d1;
}
while(d + S <= buf_end) {
while(d + S < buf_end) {
__builtin_prefetch(d + 64);
DEBUG_PRINTF("d %p \n", d);
SuperVector<S> data = SuperVector<S>::load(d);
rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d, S);
if (rv) return rv;
rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d + S, S);
if (rv) return rv - S;
d += S;
}
}
@ -459,8 +472,16 @@ static const u8 *vermicelliDoubleMaskedExecReal(u8 const c1, u8 const c2, u8 con
// finish off tail
if (d != buf_end) {
SuperVector<S> data = SuperVector<S>::loadu_maskz(d, buf_end - d);
rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d, buf_end - d);
SuperVector<S> data = SuperVector<S>::Zeroes();
const u8* end_buf;
if (buf_end - buf < S) {
memcpy(&data.u, buf, buf_end - buf);
end_buf = buf;
} else {
data = SuperVector<S>::loadu(buf_end - S);
end_buf = buf_end - S;
}
rv = vermicelliDoubleMaskedBlock<S, false>(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, end_buf, buf_end - d);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
@ -480,6 +501,20 @@ extern "C" const u8 *vermicelliExec(char c, char nocase, const u8 *buf, const u8
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
// Small ranges.
if (buf_end - buf < VECTORSIZE) {
for (; buf < buf_end; buf++) {
char cur = (char)*buf;
if (nocase) {
cur &= CASE_CLEAR;
}
if (cur == c) {
break;
}
}
return buf;
}
const SuperVector<VECTORSIZE> chars = SuperVector<VECTORSIZE>::dup_u8(c);
const SuperVector<VECTORSIZE> casemask{nocase ? getCaseMask<VECTORSIZE>() : SuperVector<VECTORSIZE>::Ones()};
@ -493,6 +528,20 @@ extern "C" const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, const u
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
// Small ranges.
if (buf_end - buf < VECTORSIZE) {
for (; buf < buf_end; buf++) {
char cur = *buf;
if (nocase) {
cur &= CASE_CLEAR;
}
if (cur != c) {
break;
}
}
return buf;
}
const SuperVector<VECTORSIZE> chars = SuperVector<VECTORSIZE>::dup_u8(c);
const SuperVector<VECTORSIZE> casemask{nocase ? getCaseMask<VECTORSIZE>() : SuperVector<VECTORSIZE>::Ones()};
@ -504,6 +553,20 @@ extern "C" const u8 *rvermicelliExec(char c, char nocase, const u8 *buf, const u
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
// Small ranges.
if (buf_end - buf < VECTORSIZE) {
for (buf_end--; buf_end >= buf; buf_end--) {
char cur = (char)*buf_end;
if (nocase) {
cur &= CASE_CLEAR;
}
if (cur == c) {
break;
}
}
return buf_end;
}
const SuperVector<VECTORSIZE> chars = SuperVector<VECTORSIZE>::dup_u8(c);
const SuperVector<VECTORSIZE> casemask{nocase ? getCaseMask<VECTORSIZE>() : SuperVector<VECTORSIZE>::Ones()};
@ -515,6 +578,20 @@ extern "C" const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf, const
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
// Small ranges.
if (buf_end - buf < VECTORSIZE) {
for (buf_end--; buf_end >= buf; buf_end--) {
char cur = (char)*buf_end;
if (nocase) {
cur &= CASE_CLEAR;
}
if (cur != c) {
break;
}
}
return buf_end;
}
const SuperVector<VECTORSIZE> chars = SuperVector<VECTORSIZE>::dup_u8(c);
const SuperVector<VECTORSIZE> casemask{nocase ? getCaseMask<VECTORSIZE>() : SuperVector<VECTORSIZE>::Ones()};

View File

@ -67,7 +67,7 @@ const u8 *rvermicelliBlockNeg(SuperVector<S> const data, SuperVector<S> const ch
return last_zero_match_inverted<S>(buf, mask, len);
}
template <uint16_t S>
template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
@ -78,14 +78,16 @@ const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const
SuperVector<S> mask = mask1 & (mask2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) return buf - 1;
if (partial_match) {
mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
}
return first_non_zero_match<S>(buf, mask, len);
}
template <uint16_t S>
template <uint16_t S, bool check_partial>
static really_inline
const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
@ -96,7 +98,7 @@ const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const
SuperVector<S> mask = (mask1 << 1)& mask2;
DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) {
mask = mask | (SuperVector<S>::Ones() >> (S-1));
@ -105,7 +107,7 @@ const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const
return last_non_zero_match<S>(buf, mask, len);
}
template <uint16_t S>
template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2,
SuperVector<S> const mask1, SuperVector<S> const mask2,
@ -116,9 +118,11 @@ const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S>
SuperVector<S> mask = v1 & (v2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1));
bool partial_match = (check_partial && ((buf[0] & m2) == c2) && ((buf[-1] & m1) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) return buf - 1;
if (partial_match) {
mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
}
return first_non_zero_match<S>(buf, mask, len);
}

View File

@ -99,7 +99,7 @@ TEST(CustomAllocator, TwoAlignedCompileError) {
ASSERT_NE(nullptr, compile_err);
EXPECT_STREQ("Allocator returned misaligned memory.", compile_err->message);
hs_free_compile_error(compile_err);
hs_set_database_allocator(nullptr, nullptr);
hs_set_misc_allocator(nullptr, nullptr);
}
TEST(CustomAllocator, TwoAlignedDatabaseInfo) {

View File

@ -36,6 +36,9 @@
#include"util/supervector/supervector.hpp"
#include "nfa/limex_shuffle.hpp"
#ifdef setbit
#undef setbit
#endif
namespace {

View File

@ -33,6 +33,10 @@
#include "util/bytecode_ptr.h"
#include "util/simd_utils.h"
#ifdef setbit
#undef setbit
#endif
using namespace std;
using namespace ue2;