mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-20 02:47:11 +03:00
Add SVE2 support for dvermicelli
Change-Id: I056ef15e162ab6fb1f78964321ce893f4096367e
This commit is contained in:
committed by
Konstantinos Margaritis
parent
3296d538ea
commit
b6a7ee7e84
@@ -48,82 +48,6 @@
|
||||
#include "vermicelli_sse.h"
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
|
||||
const u8 *buf_end) {
|
||||
DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
|
||||
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
|
||||
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
|
||||
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
|
||||
|
||||
#ifdef HAVE_AVX512
|
||||
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||
const u8 *ptr = nocase
|
||||
? dvermMiniNocase(chars1, chars2, buf, buf_end)
|
||||
: dvermMini(chars1, chars2, buf, buf_end);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* check for partial match at end */
|
||||
u8 mask = nocase ? CASE_CLEAR : 0xff;
|
||||
if ((buf_end[-1] & mask) == (u8)c1) {
|
||||
DEBUG_PRINTF("partial!!!\n");
|
||||
return buf_end - 1;
|
||||
}
|
||||
|
||||
return buf_end;
|
||||
}
|
||||
#endif
|
||||
|
||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||
uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
|
||||
if (min) {
|
||||
// Input isn't aligned, so we need to run one iteration with an
|
||||
// unaligned load, then skip buf forward to the next aligned address.
|
||||
// There's some small overlap here, but we don't mind scanning it twice
|
||||
// if we can do it quickly, do we?
|
||||
const u8 *ptr = nocase
|
||||
? dvermPreconditionNocase(chars1, chars2, buf)
|
||||
: dvermPrecondition(chars1, chars2, buf);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
buf += VERM_BOUNDARY - min;
|
||||
assert(buf < buf_end);
|
||||
}
|
||||
|
||||
// Aligned loops from here on in
|
||||
const u8 *ptr = nocase ? dvermSearchAlignedNocase(chars1, chars2, c1, c2,
|
||||
buf, buf_end)
|
||||
: dvermSearchAligned(chars1, chars2, c1, c2, buf,
|
||||
buf_end);
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Tidy up the mess at the end
|
||||
ptr = nocase ? dvermPreconditionNocase(chars1, chars2,
|
||||
buf_end - VERM_BOUNDARY)
|
||||
: dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY);
|
||||
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* check for partial match at end */
|
||||
u8 mask = nocase ? CASE_CLEAR : 0xff;
|
||||
if ((buf_end[-1] & mask) == (u8)c1) {
|
||||
DEBUG_PRINTF("partial!!!\n");
|
||||
return buf_end - 1;
|
||||
}
|
||||
|
||||
return buf_end;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
|
||||
const u8 *buf, const u8 *buf_end) {
|
||||
@@ -194,60 +118,4 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
|
||||
return buf_end;
|
||||
}
|
||||
|
||||
/* returns highest offset of c2 (NOTE: not c1) */
|
||||
static really_inline
|
||||
const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
|
||||
const u8 *buf_end) {
|
||||
DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n",
|
||||
nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf));
|
||||
assert(buf < buf_end);
|
||||
|
||||
VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */
|
||||
VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */
|
||||
|
||||
#ifdef HAVE_AVX512
|
||||
if (buf_end - buf <= VERM_BOUNDARY) {
|
||||
const u8 *ptr = nocase
|
||||
? rdvermMiniNocase(chars1, chars2, buf, buf_end)
|
||||
: rdvermMini(chars1, chars2, buf, buf_end);
|
||||
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// check for partial match at end ???
|
||||
return buf - 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
assert((buf_end - buf) >= VERM_BOUNDARY);
|
||||
size_t min = (size_t)buf_end % VERM_BOUNDARY;
|
||||
if (min) {
|
||||
// input not aligned, so we need to run one iteration with an unaligned
|
||||
// load, then skip buf forward to the next aligned address. There's
|
||||
// some small overlap here, but we don't mind scanning it twice if we
|
||||
// can do it quickly, do we?
|
||||
const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2,
|
||||
buf_end - VERM_BOUNDARY)
|
||||
: rdvermPrecondition(chars1, chars2,
|
||||
buf_end - VERM_BOUNDARY);
|
||||
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
buf_end -= min;
|
||||
if (buf >= buf_end) {
|
||||
return buf_end;
|
||||
}
|
||||
}
|
||||
|
||||
// Aligned loops from here on in
|
||||
if (nocase) {
|
||||
return rdvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end);
|
||||
} else {
|
||||
return rdvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* VERMICELLI_H */
|
||||
|
||||
Reference in New Issue
Block a user