mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Implement new DoubleVermicelli16 acceleration functions using SVE2
Change-Id: Id4a8ffca840caab930a6e78cc0dfd0fe7d320b4e
This commit is contained in:
parent
25183089fd
commit
6c6aee9682
@ -91,6 +91,28 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
|||||||
|
|
||||||
rv = vermicelli16Exec(accel->verm16.mask, c, c_end);
|
rv = vermicelli16Exec(accel->verm16.mask, c, c_end);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ACCEL_DVERM16:
|
||||||
|
DEBUG_PRINTF("accel dverm16 %p %p\n", c, c_end);
|
||||||
|
if (c_end - c < 18) {
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* need to stop one early to get an accurate end state */
|
||||||
|
rv = vermicelliDouble16Exec(accel->dverm16.mask, accel->dverm16.firsts,
|
||||||
|
c, c_end - 1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ACCEL_DVERM16_MASKED:
|
||||||
|
DEBUG_PRINTF("accel dverm16 masked %p %p\n", c, c_end);
|
||||||
|
if (c_end - c < 18) {
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* need to stop one early to get an accurate end state */
|
||||||
|
rv = vermicelliDoubleMasked16Exec(accel->mdverm16.mask, accel->mdverm16.c1,
|
||||||
|
accel->mdverm16.m1, c, c_end - 1);
|
||||||
|
break;
|
||||||
#endif // HAVE_SVE2
|
#endif // HAVE_SVE2
|
||||||
|
|
||||||
case ACCEL_DVERM_MASKED:
|
case ACCEL_DVERM_MASKED:
|
||||||
|
@ -63,7 +63,9 @@ enum AccelType {
|
|||||||
ACCEL_TRUFFLE,
|
ACCEL_TRUFFLE,
|
||||||
ACCEL_RED_TAPE,
|
ACCEL_RED_TAPE,
|
||||||
ACCEL_DVERM_MASKED,
|
ACCEL_DVERM_MASKED,
|
||||||
ACCEL_VERM16
|
ACCEL_VERM16,
|
||||||
|
ACCEL_DVERM16,
|
||||||
|
ACCEL_DVERM16_MASKED,
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \brief Structure for accel framework. */
|
/** \brief Structure for accel framework. */
|
||||||
@ -104,6 +106,19 @@ union AccelAux {
|
|||||||
u8 offset;
|
u8 offset;
|
||||||
m128 mask;
|
m128 mask;
|
||||||
} verm16;
|
} verm16;
|
||||||
|
struct {
|
||||||
|
u8 accel_type;
|
||||||
|
u8 offset;
|
||||||
|
u64a firsts;
|
||||||
|
m128 mask;
|
||||||
|
} dverm16;
|
||||||
|
struct {
|
||||||
|
u8 accel_type;
|
||||||
|
u8 offset;
|
||||||
|
u8 c1; // used for partial match
|
||||||
|
u8 m1; // used for partial match
|
||||||
|
m128 mask;
|
||||||
|
} mdverm16;
|
||||||
struct {
|
struct {
|
||||||
u8 accel_type;
|
u8 accel_type;
|
||||||
u8 offset;
|
u8 offset;
|
||||||
|
@ -442,8 +442,8 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (double_byte_ok(info) && info.double_cr.none() &&
|
if (double_byte_ok(info) && info.double_cr.none()) {
|
||||||
(info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
|
if ((info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
|
|
||||||
assert(!info.double_byte.empty());
|
assert(!info.double_byte.empty());
|
||||||
@ -470,18 +470,48 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
|
|||||||
u8 m1;
|
u8 m1;
|
||||||
u8 m2;
|
u8 m2;
|
||||||
if (buildDvermMask(info.double_byte, &m1, &m2)) {
|
if (buildDvermMask(info.double_byte, &m1, &m2)) {
|
||||||
|
u8 c1 = info.double_byte.begin()->first & m1;
|
||||||
|
u8 c2 = info.double_byte.begin()->second & m2;
|
||||||
|
#ifdef HAVE_SVE2
|
||||||
|
if (vermicelliDoubleMasked16Build(c1, c2, m1, m2, (u8 *)&accel->mdverm16.mask)) {
|
||||||
|
accel->accel_type = ACCEL_DVERM16_MASKED;
|
||||||
|
accel->mdverm16.offset = verify_u8(info.double_offset);
|
||||||
|
accel->mdverm16.c1 = c1;
|
||||||
|
accel->mdverm16.m1 = m1;
|
||||||
|
DEBUG_PRINTF("building maskeddouble16-vermicelli for 0x%02hhx%02hhx\n",
|
||||||
|
c1, c2);
|
||||||
|
return;
|
||||||
|
} else if (info.double_byte.size() <= 8 &&
|
||||||
|
vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask,
|
||||||
|
(u8 *)&accel->dverm16.firsts)) {
|
||||||
|
accel->accel_type = ACCEL_DVERM16;
|
||||||
|
accel->dverm16.offset = verify_u8(info.double_offset);
|
||||||
|
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif // HAVE_SVE2
|
||||||
accel->accel_type = ACCEL_DVERM_MASKED;
|
accel->accel_type = ACCEL_DVERM_MASKED;
|
||||||
accel->dverm.offset = verify_u8(info.double_offset);
|
accel->dverm.offset = verify_u8(info.double_offset);
|
||||||
accel->dverm.c1 = info.double_byte.begin()->first & m1;
|
accel->dverm.c1 = c1;
|
||||||
accel->dverm.c2 = info.double_byte.begin()->second & m2;
|
accel->dverm.c2 = c2;
|
||||||
accel->dverm.m1 = m1;
|
accel->dverm.m1 = m1;
|
||||||
accel->dverm.m2 = m2;
|
accel->dverm.m2 = m2;
|
||||||
DEBUG_PRINTF(
|
DEBUG_PRINTF(
|
||||||
"building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
|
"building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", c1, c2);
|
||||||
accel->dverm.c1, accel->dverm.c2);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef HAVE_SVE2
|
||||||
|
if (info.double_byte.size() <= 8 &&
|
||||||
|
vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask,
|
||||||
|
(u8 *)&accel->dverm16.firsts)) {
|
||||||
|
accel->accel_type = ACCEL_DVERM16;
|
||||||
|
accel->dverm16.offset = verify_u8(info.double_offset);
|
||||||
|
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif // HAVE_SVE2
|
||||||
|
}
|
||||||
|
|
||||||
if (double_byte_ok(info) &&
|
if (double_byte_ok(info) &&
|
||||||
shuftiBuildDoubleMasks(
|
shuftiBuildDoubleMasks(
|
||||||
|
@ -207,16 +207,45 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
|||||||
u8 m2;
|
u8 m2;
|
||||||
|
|
||||||
if (buildDvermMask(info.double_stop2, &m1, &m2)) {
|
if (buildDvermMask(info.double_stop2, &m1, &m2)) {
|
||||||
aux->accel_type = ACCEL_DVERM_MASKED;
|
u8 c1 = info.double_stop2.begin()->first & m1;
|
||||||
aux->dverm.offset = offset;
|
u8 c2 = info.double_stop2.begin()->second & m2;
|
||||||
aux->dverm.c1 = info.double_stop2.begin()->first & m1;
|
#ifdef HAVE_SVE2
|
||||||
aux->dverm.c2 = info.double_stop2.begin()->second & m2;
|
if (vermicelliDoubleMasked16Build(c1, c2, m1, m2, (u8 *)&aux->mdverm16.mask)) {
|
||||||
aux->dverm.m1 = m1;
|
aux->accel_type = ACCEL_DVERM16_MASKED;
|
||||||
aux->dverm.m2 = m2;
|
aux->mdverm16.offset = offset;
|
||||||
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
|
aux->mdverm16.c1 = c1;
|
||||||
aux->dverm.c1, aux->dverm.c2);
|
aux->mdverm16.m1 = m1;
|
||||||
|
DEBUG_PRINTF("building maskeddouble16-vermicelli for 0x%02hhx%02hhx\n",
|
||||||
|
c1, c2);
|
||||||
|
return;
|
||||||
|
} else if (outs2 <= 8 &&
|
||||||
|
vermicelliDouble16Build(info.double_stop2, (u8 *)&aux->dverm16.mask,
|
||||||
|
(u8 *)&aux->dverm16.firsts)) {
|
||||||
|
aux->accel_type = ACCEL_DVERM16;
|
||||||
|
aux->dverm16.offset = offset;
|
||||||
|
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
#endif // HAVE_SVE2
|
||||||
|
aux->accel_type = ACCEL_DVERM_MASKED;
|
||||||
|
aux->dverm.offset = offset;
|
||||||
|
aux->dverm.c1 = c1;
|
||||||
|
aux->dverm.c2 = c2;
|
||||||
|
aux->dverm.m1 = m1;
|
||||||
|
aux->dverm.m2 = m2;
|
||||||
|
DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", c1, c2);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef HAVE_SVE2
|
||||||
|
if (outs2 <= 8 &&
|
||||||
|
vermicelliDouble16Build(info.double_stop2, (u8 *)&aux->dverm16.mask,
|
||||||
|
(u8 *)&aux->dverm16.firsts)) {
|
||||||
|
aux->accel_type = ACCEL_DVERM16;
|
||||||
|
aux->dverm16.offset = offset;
|
||||||
|
DEBUG_PRINTF("building double16-vermicelli\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif // HAVE_SVE2
|
||||||
}
|
}
|
||||||
|
|
||||||
if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
|
if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
|
||||||
|
@ -267,9 +267,7 @@ const u8 *rvermSearch(svuint8_t chars, const u8 *buf, const u8 *buf_end,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *dvermSearch(char c1, char c2, bool nocase, const u8 *buf,
|
const u8 *dvermSearch(svuint8_t chars, const u8 *buf, const u8 *buf_end) {
|
||||||
const u8 *buf_end) {
|
|
||||||
svuint16_t chars = getCharMaskDouble(c1, c2, nocase);
|
|
||||||
size_t len = buf_end - buf;
|
size_t len = buf_end - buf;
|
||||||
if (len <= svcntb()) {
|
if (len <= svcntb()) {
|
||||||
return dvermSearchOnce(chars, buf, buf_end);
|
return dvermSearchOnce(chars, buf, buf_end);
|
||||||
@ -374,7 +372,8 @@ const u8 *vermicelliDoubleExec(char c1, char c2, bool nocase, const u8 *buf,
|
|||||||
assert(buf < buf_end);
|
assert(buf < buf_end);
|
||||||
if (buf_end - buf > 1) {
|
if (buf_end - buf > 1) {
|
||||||
++buf;
|
++buf;
|
||||||
const u8 *ptr = dvermSearch(c1, c2, nocase, buf, buf_end);
|
svuint16_t chars = getCharMaskDouble(c1, c2, nocase);
|
||||||
|
const u8 *ptr = dvermSearch(chars, buf, buf_end);
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
@ -406,42 +405,92 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, bool nocase, const u8 *buf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
svuint8_t getDupSVEMaskFrom128(m128 _mask) {
|
svuint8_t getDupSVEMaskFrom128(m128 mask) {
|
||||||
return svld1rq_u8(svptrue_b8(), (const uint8_t *)&_mask);
|
return svld1rq_u8(svptrue_b8(), (const uint8_t *)&mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *vermicelli16Exec(const m128 _chars, const u8 *buf,
|
const u8 *vermicelli16Exec(const m128 mask, const u8 *buf,
|
||||||
const u8 *buf_end) {
|
const u8 *buf_end) {
|
||||||
DEBUG_PRINTF("verm16 scan over %td bytes\n", buf_end - buf);
|
DEBUG_PRINTF("verm16 scan over %td bytes\n", buf_end - buf);
|
||||||
svuint8_t chars = getDupSVEMaskFrom128(_chars);
|
svuint8_t chars = getDupSVEMaskFrom128(mask);
|
||||||
const u8 *ptr = vermSearch(chars, buf, buf_end, false);
|
const u8 *ptr = vermSearch(chars, buf, buf_end, false);
|
||||||
return ptr ? ptr : buf_end;
|
return ptr ? ptr : buf_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *nvermicelli16Exec(const m128 _chars, const u8 *buf,
|
const u8 *nvermicelli16Exec(const m128 mask, const u8 *buf,
|
||||||
const u8 *buf_end) {
|
const u8 *buf_end) {
|
||||||
DEBUG_PRINTF("nverm16 scan over %td bytes\n", buf_end - buf);
|
DEBUG_PRINTF("nverm16 scan over %td bytes\n", buf_end - buf);
|
||||||
svuint8_t chars = getDupSVEMaskFrom128(_chars);
|
svuint8_t chars = getDupSVEMaskFrom128(mask);
|
||||||
const u8 *ptr = vermSearch(chars, buf, buf_end, true);
|
const u8 *ptr = vermSearch(chars, buf, buf_end, true);
|
||||||
return ptr ? ptr : buf_end;
|
return ptr ? ptr : buf_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *rvermicelli16Exec(const m128 _chars, const u8 *buf,
|
const u8 *rvermicelli16Exec(const m128 mask, const u8 *buf,
|
||||||
const u8 *buf_end) {
|
const u8 *buf_end) {
|
||||||
DEBUG_PRINTF("rverm16 scan over %td bytes\n", buf_end - buf);
|
DEBUG_PRINTF("rverm16 scan over %td bytes\n", buf_end - buf);
|
||||||
svuint8_t chars = getDupSVEMaskFrom128(_chars);
|
svuint8_t chars = getDupSVEMaskFrom128(mask);
|
||||||
const u8 *ptr = rvermSearch(chars, buf, buf_end, false);
|
const u8 *ptr = rvermSearch(chars, buf, buf_end, false);
|
||||||
return ptr ? ptr : buf - 1;
|
return ptr ? ptr : buf - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static really_inline
|
static really_inline
|
||||||
const u8 *rnvermicelli16Exec(const m128 _chars, const u8 *buf,
|
const u8 *rnvermicelli16Exec(const m128 mask, const u8 *buf,
|
||||||
const u8 *buf_end) {
|
const u8 *buf_end) {
|
||||||
DEBUG_PRINTF("rnverm16 scan over %td bytes\n", buf_end - buf);
|
DEBUG_PRINTF("rnverm16 scan over %td bytes\n", buf_end - buf);
|
||||||
svuint8_t chars = getDupSVEMaskFrom128(_chars);
|
svuint8_t chars = getDupSVEMaskFrom128(mask);
|
||||||
const u8 *ptr = rvermSearch(chars, buf, buf_end, true);
|
const u8 *ptr = rvermSearch(chars, buf, buf_end, true);
|
||||||
return ptr ? ptr : buf - 1;
|
return ptr ? ptr : buf - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
bool vermicelliDouble16CheckPartial(const u64a first_chars, const u8 *buf_end) {
|
||||||
|
svuint8_t firsts = svreinterpret_u8(svdup_u64(first_chars));
|
||||||
|
svbool_t matches = svcmpeq(svptrue_b8(), firsts, svdup_u8(buf_end[-1]));
|
||||||
|
return svptest_any(svptrue_b8(), matches);
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *vermicelliDouble16Exec(const m128 mask, const u64a firsts,
|
||||||
|
const u8 *buf, const u8 *buf_end) {
|
||||||
|
assert(buf < buf_end);
|
||||||
|
DEBUG_PRINTF("double verm16 scan over %td bytes\n", buf_end - buf);
|
||||||
|
if (buf_end - buf > 1) {
|
||||||
|
++buf;
|
||||||
|
svuint16_t chars = svreinterpret_u16(getDupSVEMaskFrom128(mask));
|
||||||
|
const u8 *ptr = dvermSearch(chars, buf, buf_end);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* check for partial match at end */
|
||||||
|
if (vermicelliDouble16CheckPartial(firsts, buf_end)) {
|
||||||
|
DEBUG_PRINTF("partial!!!\n");
|
||||||
|
return buf_end - 1;
|
||||||
|
}
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
const u8 *vermicelliDoubleMasked16Exec(const m128 mask, char c1, char m1,
|
||||||
|
const u8 *buf, const u8 *buf_end) {
|
||||||
|
assert(buf < buf_end);
|
||||||
|
DEBUG_PRINTF("double verm16 masked scan over %td bytes\n", buf_end - buf);
|
||||||
|
if (buf_end - buf > 1) {
|
||||||
|
++buf;
|
||||||
|
svuint16_t chars = svreinterpret_u16(getDupSVEMaskFrom128(mask));
|
||||||
|
const u8 *ptr = dvermSearch(chars, buf, buf_end);
|
||||||
|
if (ptr) {
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* check for partial match at end */
|
||||||
|
if ((buf_end[-1] & m1) == (u8)c1) {
|
||||||
|
DEBUG_PRINTF("partial!!!\n");
|
||||||
|
return buf_end - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf_end;
|
||||||
|
}
|
||||||
|
@ -50,4 +50,207 @@ bool vermicelli16Build(const CharReach &chars, u8 *rv) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool vermicelliDouble16Build(const flat_set<std::pair<u8, u8>> &twochar,
|
||||||
|
u8 *chars, u8 *firsts) {
|
||||||
|
constexpr size_t count_limit = 8;
|
||||||
|
if (twochar.size() > count_limit) return false;
|
||||||
|
size_t count = 0;
|
||||||
|
for (const auto &p : twochar) {
|
||||||
|
firsts[count] = p.first;
|
||||||
|
chars[2 * count] = p.first;
|
||||||
|
chars[(2 * count) + 1] = p.second;
|
||||||
|
++count;
|
||||||
|
}
|
||||||
|
for(; count < count_limit; ++count) {
|
||||||
|
firsts[count] = chars[0];
|
||||||
|
chars[2 * count] = chars[0];
|
||||||
|
chars[(2 * count) + 1] = chars[1];
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void fillMask(u8 matches[], size_t len, u8 *rv) {
|
||||||
|
for (size_t i = 0; i < 16; ++i) {
|
||||||
|
rv[i] = matches[i % len];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void getTwoCases(u8 cases[2], u8 bit, char c) {
|
||||||
|
const u8 set = 1UL << bit;
|
||||||
|
cases[0] = c & (~set);
|
||||||
|
cases[1] = c | set;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void getFourCases(u8 cases[4], u8 bit, char case1, char case2) {
|
||||||
|
const u8 set = 1UL << bit;
|
||||||
|
cases[0] = case1 & (~set);
|
||||||
|
cases[1] = case1 | set;
|
||||||
|
cases[2] = case2 & (~set);
|
||||||
|
cases[3] = case2 | set;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
void getEightCases(u8 cases[8], u8 bit, char case1, char case2,
|
||||||
|
char case3, char case4) {
|
||||||
|
const u8 set = 1UL << bit;
|
||||||
|
cases[0] = case1 & (~set);
|
||||||
|
cases[1] = case1 | set;
|
||||||
|
cases[2] = case2 & (~set);
|
||||||
|
cases[3] = case2 | set;
|
||||||
|
cases[4] = case3 & (~set);
|
||||||
|
cases[5] = case3 | set;
|
||||||
|
cases[6] = case4 & (~set);
|
||||||
|
cases[7] = case4 | set;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
bool getDoubleMatchesForBits(u8 c1, u8 c2, u8 holes[3], u8 c1_holes,
|
||||||
|
u8 c2_holes, u8 *rv) {
|
||||||
|
u8 cases[8];
|
||||||
|
switch (c1_holes) {
|
||||||
|
case 0:
|
||||||
|
switch (c2_holes) {
|
||||||
|
case 0: {
|
||||||
|
u8 matches[2] = { c1, c2 };
|
||||||
|
fillMask(matches, 2, rv);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case 1: {
|
||||||
|
getTwoCases(cases, holes[0], c2);
|
||||||
|
u8 matches[4] = { c1, cases[0], c1, cases[1] };
|
||||||
|
fillMask(matches, 4, rv);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case 2: {
|
||||||
|
getTwoCases(cases, holes[0], c2);
|
||||||
|
getFourCases(&cases[2], holes[1], cases[0], cases[1]);
|
||||||
|
u8 matches[8] = { c1, cases[2], c1, cases[3],
|
||||||
|
c1, cases[4], c1, cases[5] };
|
||||||
|
fillMask(matches, 8, rv);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case 3: {
|
||||||
|
getTwoCases(cases, holes[0], c2);
|
||||||
|
getFourCases(&cases[4], holes[1], cases[0], cases[1]);
|
||||||
|
getEightCases(cases, holes[2], cases[4], cases[5],
|
||||||
|
cases[6], cases[7]);
|
||||||
|
u8 matches[16] = { c1, cases[0], c1, cases[1],
|
||||||
|
c1, cases[2], c1, cases[3],
|
||||||
|
c1, cases[4], c1, cases[5],
|
||||||
|
c1, cases[6], c1, cases[7] };
|
||||||
|
memcpy(rv, matches, sizeof(matches));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
assert(c2_holes < 4);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
getTwoCases(cases, holes[0], c1);
|
||||||
|
switch (c2_holes) {
|
||||||
|
case 0: {
|
||||||
|
u8 matches[4] = { cases[0] , c2, cases[1], c2 };
|
||||||
|
fillMask(matches, 4, rv);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case 1: {
|
||||||
|
getTwoCases(&cases[2], holes[1], c2);
|
||||||
|
u8 matches[8] = { cases[0], cases[2],
|
||||||
|
cases[0], cases[3],
|
||||||
|
cases[1], cases[2],
|
||||||
|
cases[1], cases[3] };
|
||||||
|
fillMask(matches, 8, rv);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case 2: {
|
||||||
|
getTwoCases(&cases[2], holes[1], c2);
|
||||||
|
getFourCases(&cases[4], holes[2], cases[2], cases[3]);
|
||||||
|
u8 matches[16] = { cases[0], cases[4], cases[0], cases[5],
|
||||||
|
cases[0], cases[6], cases[0], cases[7],
|
||||||
|
cases[1], cases[4], cases[1], cases[5],
|
||||||
|
cases[1], cases[6], cases[1], cases[7] };
|
||||||
|
memcpy(rv, matches, sizeof(matches));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
assert(c2_holes < 3);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
getTwoCases(cases, holes[0], c1);
|
||||||
|
getFourCases(&cases[2], holes[1], cases[0], cases[1]);
|
||||||
|
switch (c2_holes) {
|
||||||
|
case 0: {
|
||||||
|
u8 matches[8] = { cases[2], c2, cases[3], c2,
|
||||||
|
cases[4], c2, cases[5], c2 };
|
||||||
|
fillMask(matches, 8, rv);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case 1: {
|
||||||
|
getTwoCases(&cases[6], holes[2], c2);
|
||||||
|
u8 matches[16] = { cases[2], cases[6], cases[3], cases[6],
|
||||||
|
cases[4], cases[6], cases[5], cases[6],
|
||||||
|
cases[2], cases[7], cases[3], cases[7],
|
||||||
|
cases[4], cases[7], cases[5], cases[7] };
|
||||||
|
memcpy(rv, matches, sizeof(matches));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
assert(c2_holes < 2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 3: {
|
||||||
|
assert(!c2_holes);
|
||||||
|
getTwoCases(cases, holes[0], c1);
|
||||||
|
getFourCases(&cases[4], holes[1], cases[0], cases[1]);
|
||||||
|
getEightCases(cases, holes[2], cases[4], cases[5],
|
||||||
|
cases[6], cases[7]);
|
||||||
|
u8 matches[16] = { cases[0], c2, cases[1], c2,
|
||||||
|
cases[2], c2, cases[3], c2,
|
||||||
|
cases[4], c2, cases[5], c2,
|
||||||
|
cases[6], c2, cases[7], c2 };
|
||||||
|
memcpy(rv, matches, sizeof(matches));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static really_inline
|
||||||
|
bool getDoubleMatchesForMask(char c1, char c2, char m1, char m2,
|
||||||
|
u8 c1_holes, u8 c2_holes, u8 *rv) {
|
||||||
|
u8 holes[3] = { 0 };
|
||||||
|
int count = 0;
|
||||||
|
if (c1_holes) {
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
if (!(m1 & (1UL << i))) {
|
||||||
|
holes[count++] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (c2_holes) {
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
if (!(m2 & (1UL << i))) {
|
||||||
|
holes[count++] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return getDoubleMatchesForBits(c1, c2, holes, c1_holes, c2_holes, rv);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool vermicelliDoubleMasked16Build(char c1, char c2, char m1, char m2, u8 *rv) {
|
||||||
|
u8 c1_holes = 8 - __builtin_popcount(m1);
|
||||||
|
u8 c2_holes = 8 - __builtin_popcount(m2);
|
||||||
|
if (c1_holes + c2_holes > 3) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return getDoubleMatchesForMask(c1, c2, m1, m2, c1_holes, c2_holes, rv);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -43,6 +43,11 @@ namespace ue2 {
|
|||||||
|
|
||||||
bool vermicelli16Build(const CharReach &chars, u8 *rv);
|
bool vermicelli16Build(const CharReach &chars, u8 *rv);
|
||||||
|
|
||||||
|
bool vermicelliDouble16Build(const flat_set<std::pair<u8, u8>> &twochar,
|
||||||
|
u8 *chars, u8 *firsts);
|
||||||
|
|
||||||
|
bool vermicelliDoubleMasked16Build(char c1, char c2, char m1, char m2, u8 *rv);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif // VERM_COMPILE_H
|
#endif // VERM_COMPILE_H
|
||||||
|
@ -311,11 +311,6 @@ TEST(RDoubleVermicelli, Exec5) {
|
|||||||
#include "nfa/vermicellicompile.h"
|
#include "nfa/vermicellicompile.h"
|
||||||
using namespace ue2;
|
using namespace ue2;
|
||||||
|
|
||||||
union Matches {
|
|
||||||
u8 val8[16];
|
|
||||||
m128 val128;
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST(RVermicelli16, ExecNoMatch1) {
|
TEST(RVermicelli16, ExecNoMatch1) {
|
||||||
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
|
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
|
||||||
|
|
||||||
@ -323,8 +318,8 @@ TEST(RVermicelli16, ExecNoMatch1) {
|
|||||||
chars.set('a');
|
chars.set('a');
|
||||||
chars.set('B');
|
chars.set('B');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
@ -332,7 +327,7 @@ TEST(RVermicelli16, ExecNoMatch1) {
|
|||||||
const u8 *begin = (const u8 *)t1 + i;
|
const u8 *begin = (const u8 *)t1 + i;
|
||||||
const u8 *end = (const u8 *)t1 + strlen(t1) - j;
|
const u8 *end = (const u8 *)t1 + strlen(t1) - j;
|
||||||
|
|
||||||
const u8 *rv = rvermicelli16Exec(matches.val128, begin, end);
|
const u8 *rv = rvermicelli16Exec(matches, begin, end);
|
||||||
ASSERT_EQ(begin - 1, rv);
|
ASSERT_EQ(begin - 1, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -345,12 +340,12 @@ TEST(RVermicelli16, Exec1) {
|
|||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('a');
|
chars.set('a');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = rvermicelli16Exec(matches.val128, buf, buf + strlen(t1) - i);
|
const u8 *rv = rvermicelli16Exec(matches, buf, buf + strlen(t1) - i);
|
||||||
ASSERT_EQ(buf + 48, rv);
|
ASSERT_EQ(buf + 48, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -362,12 +357,12 @@ TEST(RVermicelli16, Exec2) {
|
|||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('a');
|
chars.set('a');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = rvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1));
|
const u8 *rv = rvermicelli16Exec(matches, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 48, rv);
|
ASSERT_EQ(buf + 48, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -378,20 +373,20 @@ TEST(RVermicelli16, Exec3) {
|
|||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('a');
|
chars.set('a');
|
||||||
Matches matches_a;
|
m128 matches_a;
|
||||||
bool ret = vermicelli16Build(chars, matches_a.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches_a);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches_A;
|
m128 matches_A;
|
||||||
ret = vermicelli16Build(chars, matches_A.val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches_A);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = rvermicelli16Exec(matches_a.val128, buf, buf + strlen(t1) - i);
|
const u8 *rv = rvermicelli16Exec(matches_a, buf, buf + strlen(t1) - i);
|
||||||
ASSERT_EQ(buf + 47, rv);
|
ASSERT_EQ(buf + 47, rv);
|
||||||
|
|
||||||
rv = rvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1) - i);
|
rv = rvermicelli16Exec(matches_A, buf, buf + strlen(t1) - i);
|
||||||
ASSERT_EQ(buf + 48, rv);
|
ASSERT_EQ(buf + 48, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -402,21 +397,21 @@ TEST(RVermicelli16, Exec4) {
|
|||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('a');
|
chars.set('a');
|
||||||
Matches matches_a;
|
m128 matches_a;
|
||||||
bool ret = vermicelli16Build(chars, matches_a.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches_a);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches_A;
|
m128 matches_A;
|
||||||
ret = vermicelli16Build(chars, matches_A.val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches_A);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 31; i++) {
|
for (size_t i = 0; i < 31; i++) {
|
||||||
t1[16 + i] = 'a';
|
t1[16 + i] = 'a';
|
||||||
const u8 *rv = rvermicelli16Exec(matches_a.val128, buf, buf + strlen(t1));
|
const u8 *rv = rvermicelli16Exec(matches_a, buf, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 16 + i, rv);
|
ASSERT_EQ(buf + 16 + i, rv);
|
||||||
|
|
||||||
rv = rvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1));
|
rv = rvermicelli16Exec(matches_A, buf, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 16 + i, rv);
|
ASSERT_EQ(buf + 16 + i, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -426,18 +421,18 @@ TEST(RVermicelli16, Exec5) {
|
|||||||
const u8 *buf = (const u8 *)t1;
|
const u8 *buf = (const u8 *)t1;
|
||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
Matches matches[16];
|
m128 matches[16];
|
||||||
bool ret;
|
bool ret;
|
||||||
|
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
chars.set('a' + i);
|
chars.set('a' + i);
|
||||||
ret = vermicelli16Build(chars, matches[i].val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches[i]);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < 16; ++j) {
|
for (int j = 0; j < 16; ++j) {
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = rvermicelli16Exec(matches[j].val128, buf, buf + strlen(t1) - i);
|
const u8 *rv = rvermicelli16Exec(matches[j], buf, buf + strlen(t1) - i);
|
||||||
ASSERT_EQ(buf + j + 17, rv);
|
ASSERT_EQ(buf + j + 17, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -451,13 +446,13 @@ TEST(RNVermicelli16, ExecNoMatch1) {
|
|||||||
chars.set('b');
|
chars.set('b');
|
||||||
chars.set('B');
|
chars.set('B');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
for (size_t j = 0; j < 16; j++) {
|
for (size_t j = 0; j < 16; j++) {
|
||||||
const u8 *rv = rnvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j);
|
const u8 *rv = rnvermicelli16Exec(matches, buf + i, buf + strlen(t1) - j);
|
||||||
ASSERT_EQ(buf + i - 1, rv);
|
ASSERT_EQ(buf + i - 1, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -470,12 +465,12 @@ TEST(RNVermicelli16, Exec1) {
|
|||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('b');
|
chars.set('b');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = rnvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - i);
|
const u8 *rv = rnvermicelli16Exec(matches, buf + i, buf + strlen(t1) - i);
|
||||||
ASSERT_EQ(buf + 48, rv);
|
ASSERT_EQ(buf + 48, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -487,12 +482,12 @@ TEST(RNVermicelli16, Exec2) {
|
|||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('b');
|
chars.set('b');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = rnvermicelli16Exec(matches.val128, buf, buf + strlen(t1) - i);
|
const u8 *rv = rnvermicelli16Exec(matches, buf, buf + strlen(t1) - i);
|
||||||
ASSERT_EQ(buf + 48, rv);
|
ASSERT_EQ(buf + 48, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -503,20 +498,20 @@ TEST(RNVermicelli16, Exec3) {
|
|||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('b');
|
chars.set('b');
|
||||||
Matches matches_b;
|
m128 matches_b;
|
||||||
bool ret = vermicelli16Build(chars, matches_b.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches_b);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches_A;
|
m128 matches_A;
|
||||||
ret = vermicelli16Build(chars, matches_A.val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches_A);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = rnvermicelli16Exec(matches_b.val128, buf + i, buf + strlen(t1));
|
const u8 *rv = rnvermicelli16Exec(matches_b, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 48, rv);
|
ASSERT_EQ(buf + 48, rv);
|
||||||
|
|
||||||
rv = rnvermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1));
|
rv = rnvermicelli16Exec(matches_A, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 47, rv);
|
ASSERT_EQ(buf + 47, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -527,21 +522,21 @@ TEST(RNVermicelli16, Exec4) {
|
|||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('b');
|
chars.set('b');
|
||||||
Matches matches_b;
|
m128 matches_b;
|
||||||
bool ret = vermicelli16Build(chars, matches_b.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches_b);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches_A;
|
m128 matches_A;
|
||||||
ret = vermicelli16Build(chars, matches_A.val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches_A);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 31; i++) {
|
for (size_t i = 0; i < 31; i++) {
|
||||||
t1[16 + i] = 'a';
|
t1[16 + i] = 'a';
|
||||||
const u8 *rv = rnvermicelli16Exec(matches_b.val128, buf, buf + strlen(t1));
|
const u8 *rv = rnvermicelli16Exec(matches_b, buf, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 16 + i, rv);
|
ASSERT_EQ(buf + 16 + i, rv);
|
||||||
|
|
||||||
rv = rnvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1));
|
rv = rnvermicelli16Exec(matches_A, buf, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 16 + i, rv);
|
ASSERT_EQ(buf + 16 + i, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -551,18 +546,18 @@ TEST(RNVermicelli16, Exec5) {
|
|||||||
const u8 *buf = (const u8 *)t1;
|
const u8 *buf = (const u8 *)t1;
|
||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
Matches matches[16];
|
m128 matches[16];
|
||||||
bool ret;
|
bool ret;
|
||||||
|
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
chars.set('q' - i);
|
chars.set('q' - i);
|
||||||
ret = vermicelli16Build(chars, matches[i].val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches[i]);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < 16; ++j) {
|
for (int j = 0; j < 16; ++j) {
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = rnvermicelli16Exec(matches[j].val128, buf, buf + strlen(t1) - i);
|
const u8 *rv = rnvermicelli16Exec(matches[j], buf, buf + strlen(t1) - i);
|
||||||
ASSERT_EQ(buf - j + 32, rv);
|
ASSERT_EQ(buf - j + 32, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -528,11 +528,6 @@ TEST(DoubleVermicelliMasked, Exec4) {
|
|||||||
#include "nfa/vermicellicompile.h"
|
#include "nfa/vermicellicompile.h"
|
||||||
using namespace ue2;
|
using namespace ue2;
|
||||||
|
|
||||||
union Matches {
|
|
||||||
u8 val8[16];
|
|
||||||
m128 val128;
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST(Vermicelli16, ExecNoMatch1) {
|
TEST(Vermicelli16, ExecNoMatch1) {
|
||||||
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
|
char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
|
||||||
const u8 *buf = (const u8 *)t1;
|
const u8 *buf = (const u8 *)t1;
|
||||||
@ -541,13 +536,13 @@ TEST(Vermicelli16, ExecNoMatch1) {
|
|||||||
chars.set('a');
|
chars.set('a');
|
||||||
chars.set('B');
|
chars.set('B');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
for (size_t j = 0; j < 16; j++) {
|
for (size_t j = 0; j < 16; j++) {
|
||||||
const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j);
|
const u8 *rv = vermicelli16Exec(matches, buf + i, buf + strlen(t1) - j);
|
||||||
ASSERT_EQ(buf + strlen(t1) - j, rv);
|
ASSERT_EQ(buf + strlen(t1) - j, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -560,12 +555,12 @@ TEST(Vermicelli16, Exec1) {
|
|||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('a');
|
chars.set('a');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1));
|
const u8 *rv = vermicelli16Exec(matches, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 17, rv);
|
ASSERT_EQ(buf + 17, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -577,12 +572,12 @@ TEST(Vermicelli16, Exec2) {
|
|||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('a');
|
chars.set('a');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1));
|
const u8 *rv = vermicelli16Exec(matches, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 17, rv);
|
ASSERT_EQ(buf + 17, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -593,20 +588,20 @@ TEST(Vermicelli16, Exec3) {
|
|||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('a');
|
chars.set('a');
|
||||||
Matches matches_a;
|
m128 matches_a;
|
||||||
bool ret = vermicelli16Build(chars, matches_a.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches_a);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches_A;
|
m128 matches_A;
|
||||||
ret = vermicelli16Build(chars, matches_A.val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches_A);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = vermicelli16Exec(matches_a.val128, buf + i, buf + strlen(t1));
|
const u8 *rv = vermicelli16Exec(matches_a, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 18, rv);
|
ASSERT_EQ(buf + 18, rv);
|
||||||
|
|
||||||
rv = vermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1));
|
rv = vermicelli16Exec(matches_A, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 17, rv);
|
ASSERT_EQ(buf + 17, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -617,21 +612,21 @@ TEST(Vermicelli16, Exec4) {
|
|||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('a');
|
chars.set('a');
|
||||||
Matches matches_a;
|
m128 matches_a;
|
||||||
bool ret = vermicelli16Build(chars, matches_a.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches_a);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches_A;
|
m128 matches_A;
|
||||||
ret = vermicelli16Build(chars, matches_A.val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches_A);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 31; i++) {
|
for (size_t i = 0; i < 31; i++) {
|
||||||
t1[48 - i] = 'a';
|
t1[48 - i] = 'a';
|
||||||
const u8 *rv = vermicelli16Exec(matches_a.val128, buf, buf + strlen(t1));
|
const u8 *rv = vermicelli16Exec(matches_a, buf, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 48 - i, rv);
|
ASSERT_EQ(buf + 48 - i, rv);
|
||||||
|
|
||||||
rv = vermicelli16Exec(matches_A.val128, buf, buf + strlen(t1));
|
rv = vermicelli16Exec(matches_A, buf, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 48 - i, rv);
|
ASSERT_EQ(buf + 48 - i, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -641,18 +636,18 @@ TEST(Vermicelli16, Exec5) {
|
|||||||
const u8 *buf = (const u8 *)t1;
|
const u8 *buf = (const u8 *)t1;
|
||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
Matches matches[16];
|
m128 matches[16];
|
||||||
bool ret;
|
bool ret;
|
||||||
|
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
chars.set('p' - i);
|
chars.set('p' - i);
|
||||||
ret = vermicelli16Build(chars, matches[i].val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches[i]);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < 16; ++j) {
|
for (int j = 0; j < 16; ++j) {
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = vermicelli16Exec(matches[j].val128, buf + i,buf + strlen(t1));
|
const u8 *rv = vermicelli16Exec(matches[j], buf + i,buf + strlen(t1));
|
||||||
ASSERT_EQ(buf - j + 32, rv);
|
ASSERT_EQ(buf - j + 32, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -666,13 +661,13 @@ TEST(NVermicelli16, ExecNoMatch1) {
|
|||||||
chars.set('b');
|
chars.set('b');
|
||||||
chars.set('B');
|
chars.set('B');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
for (size_t j = 0; j < 16; j++) {
|
for (size_t j = 0; j < 16; j++) {
|
||||||
const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j);
|
const u8 *rv = nvermicelli16Exec(matches, buf + i, buf + strlen(t1) - j);
|
||||||
ASSERT_EQ((buf + strlen(t1) - j), rv);
|
ASSERT_EQ((buf + strlen(t1) - j), rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -685,12 +680,12 @@ TEST(NVermicelli16, Exec1) {
|
|||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('b');
|
chars.set('b');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1));
|
const u8 *rv = nvermicelli16Exec(matches, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 17, rv);
|
ASSERT_EQ(buf + 17, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -702,12 +697,12 @@ TEST(NVermicelli16, Exec2) {
|
|||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('b');
|
chars.set('b');
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches;
|
m128 matches;
|
||||||
bool ret = vermicelli16Build(chars, matches.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1));
|
const u8 *rv = nvermicelli16Exec(matches, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 17, rv);
|
ASSERT_EQ(buf + 17, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -718,20 +713,20 @@ TEST(NVermicelli16, Exec3) {
|
|||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('b');
|
chars.set('b');
|
||||||
Matches matches_b;
|
m128 matches_b;
|
||||||
bool ret = vermicelli16Build(chars, matches_b.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches_b);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches_A;
|
m128 matches_A;
|
||||||
ret = vermicelli16Build(chars, matches_A.val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches_A);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = nvermicelli16Exec(matches_b.val128, buf + i, buf + strlen(t1));
|
const u8 *rv = nvermicelli16Exec(matches_b, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 17, rv);
|
ASSERT_EQ(buf + 17, rv);
|
||||||
|
|
||||||
rv = nvermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1));
|
rv = nvermicelli16Exec(matches_A, buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 18, rv);
|
ASSERT_EQ(buf + 18, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -742,21 +737,21 @@ TEST(NVermicelli16, Exec4) {
|
|||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
chars.set('b');
|
chars.set('b');
|
||||||
Matches matches_b;
|
m128 matches_b;
|
||||||
bool ret = vermicelli16Build(chars, matches_b.val8);
|
bool ret = vermicelli16Build(chars, (u8 *)&matches_b);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
chars.set('A');
|
chars.set('A');
|
||||||
Matches matches_A;
|
m128 matches_A;
|
||||||
ret = vermicelli16Build(chars, matches_A.val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches_A);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
for (size_t i = 0; i < 31; i++) {
|
for (size_t i = 0; i < 31; i++) {
|
||||||
t1[48 - i] = 'a';
|
t1[48 - i] = 'a';
|
||||||
const u8 *rv = nvermicelli16Exec(matches_b.val128, buf, buf + strlen(t1));
|
const u8 *rv = nvermicelli16Exec(matches_b, buf, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 48 - i, rv);
|
ASSERT_EQ(buf + 48 - i, rv);
|
||||||
|
|
||||||
rv = nvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1));
|
rv = nvermicelli16Exec(matches_A, buf, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + 48 - i, rv);
|
ASSERT_EQ(buf + 48 - i, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -766,21 +761,393 @@ TEST(NVermicelli16, Exec5) {
|
|||||||
const u8 *buf = (const u8 *)t1;
|
const u8 *buf = (const u8 *)t1;
|
||||||
|
|
||||||
CharReach chars;
|
CharReach chars;
|
||||||
Matches matches[16];
|
m128 matches[16];
|
||||||
bool ret;
|
bool ret;
|
||||||
|
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < 16; ++i) {
|
||||||
chars.set('a' + i);
|
chars.set('a' + i);
|
||||||
ret = vermicelli16Build(chars, matches[i].val8);
|
ret = vermicelli16Build(chars, (u8 *)&matches[i]);
|
||||||
ASSERT_TRUE(ret);
|
ASSERT_TRUE(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < 16; ++j) {
|
for (int j = 0; j < 16; ++j) {
|
||||||
for (size_t i = 0; i < 16; i++) {
|
for (size_t i = 0; i < 16; i++) {
|
||||||
const u8 *rv = nvermicelli16Exec(matches[j].val128, buf + i, buf + strlen(t1));
|
const u8 *rv = nvermicelli16Exec(matches[j], buf + i, buf + strlen(t1));
|
||||||
ASSERT_EQ(buf + j + 18, rv);
|
ASSERT_EQ(buf + j + 18, rv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelli16, ExecNoMatch1) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches;
|
||||||
|
u64a firsts;
|
||||||
|
flat_set<std::pair<u8, u8>> pairs;
|
||||||
|
for (int i = 0; i < 16; i += 2) {
|
||||||
|
pairs.insert(std::make_pair('a' + i, 'a' + i + 1));
|
||||||
|
}
|
||||||
|
bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
for (size_t j = 0; j < 16; j++) {
|
||||||
|
const u8 *rv = vermicelliDouble16Exec(matches, firsts,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i - j);
|
||||||
|
ASSERT_EQ(t1_raw + t1.length() - i - j, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelli16, ExecNoMatch2) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches;
|
||||||
|
u64a firsts;
|
||||||
|
flat_set<std::pair<u8, u8>> pairs;
|
||||||
|
pairs.insert(std::make_pair('a', 'b'));
|
||||||
|
pairs.insert(std::make_pair('A', 'B'));
|
||||||
|
pairs.insert(std::make_pair('B', 'A'));
|
||||||
|
pairs.insert(std::make_pair('B', 'B'));
|
||||||
|
bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
for (size_t j = 0; j < 16; j++) {
|
||||||
|
const u8 *rv = vermicelliDouble16Exec(matches, firsts,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i - j);
|
||||||
|
ASSERT_EQ(t1_raw + t1.length() - i - j, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelli16, ExecNoMatch3) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches;
|
||||||
|
u64a firsts;
|
||||||
|
flat_set<std::pair<u8, u8>> pairs;
|
||||||
|
pairs.insert(std::make_pair('a', 'b'));
|
||||||
|
pairs.insert(std::make_pair('B', 'B'));
|
||||||
|
pairs.insert(std::make_pair('A', 'B'));
|
||||||
|
pairs.insert(std::make_pair('b', 'a'));
|
||||||
|
bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
for (size_t j = 0; j < 16; j++) {
|
||||||
|
/* partial match */
|
||||||
|
const u8 *rv = vermicelliDouble16Exec(matches, firsts,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i - j);
|
||||||
|
ASSERT_EQ(t1_raw + t1.length() - i - j - 1, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelli16, Exec1) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches;
|
||||||
|
u64a firsts;
|
||||||
|
flat_set<std::pair<u8, u8>> pairs;
|
||||||
|
pairs.insert(std::make_pair('a', 'b'));
|
||||||
|
bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
const u8 *rv = vermicelliDouble16Exec(matches, firsts,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 18, rv);
|
||||||
|
}
|
||||||
|
|
||||||
|
pairs.insert(std::make_pair('b', 'a'));
|
||||||
|
ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
const u8 *rv = vermicelliDouble16Exec(matches, firsts,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelli16, Exec2) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches;
|
||||||
|
u64a firsts;
|
||||||
|
flat_set<std::pair<u8, u8>> pairs;
|
||||||
|
pairs.insert(std::make_pair('a', 'a'));
|
||||||
|
bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
const u8 *rv = vermicelliDouble16Exec(matches, firsts,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelliMasked16, ExecNoMatch1) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches1;
|
||||||
|
bool ret = vermicelliDoubleMasked16Build('a', 'b', 0xff, 0xff, (u8 *)&matches1);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches2;
|
||||||
|
ret = vermicelliDoubleMasked16Build('B', 'B', 0xff, CASE_CLEAR, (u8 *)&matches2);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches3;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'B', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches3);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches4;
|
||||||
|
ret = vermicelliDoubleMasked16Build('B', 'B', CASE_CLEAR, 0xff, (u8 *)&matches4);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches5;
|
||||||
|
ret = vermicelliDoubleMasked16Build('B', 'A', 0xff, 0xff, (u8 *)&matches5);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
for (size_t j = 0; j < 16; j++) {
|
||||||
|
const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i - j);
|
||||||
|
ASSERT_EQ(t1_raw + t1.length() - i - j, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches2, 'B', 0xff, t1_raw + i,
|
||||||
|
t1_raw + t1.length() - i - j);
|
||||||
|
ASSERT_EQ(t1_raw + t1.length() - i - j, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches3, 'A', CASE_CLEAR,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i - j);
|
||||||
|
ASSERT_EQ(t1_raw + t1.length() - i - j, rv);
|
||||||
|
|
||||||
|
/* partial match */
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches4, 'B', CASE_CLEAR,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i - j);
|
||||||
|
ASSERT_EQ(t1_raw + t1.length() - i - j - 1, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches5, 'B', 0xff, t1_raw + i,
|
||||||
|
t1_raw + t1.length() - i - j);
|
||||||
|
ASSERT_EQ(t1_raw + t1.length() - i - j, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelliMasked16, Exec1) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches1;
|
||||||
|
bool ret = vermicelliDoubleMasked16Build('a', 'b', 0xff, 0xff, (u8 *)&matches1);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches2;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'B', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches2);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches3;
|
||||||
|
ret = vermicelliDoubleMasked16Build('a', 'B', 0xff, CASE_CLEAR, (u8 *)&matches3);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches4;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'b', CASE_CLEAR, 0xff, (u8 *)&matches4);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches5;
|
||||||
|
ret = vermicelliDoubleMasked16Build('b', 'a', 0xff, 0xff, (u8 *)&matches5);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches6;
|
||||||
|
ret = vermicelliDoubleMasked16Build('B', 'A', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches6);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 18, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches2, 'A', CASE_CLEAR,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 18, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches3, 'a', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 18, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches4, 'A', CASE_CLEAR,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 18, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches5, 'b', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches6, 'B', CASE_CLEAR,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelliMasked16, Exec2) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches1;
|
||||||
|
bool ret = vermicelliDoubleMasked16Build('a', 'a', 0xff, 0xff, (u8 *)&matches1);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches2;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'A', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches2);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches3;
|
||||||
|
ret = vermicelliDoubleMasked16Build('a', 'A', 0xff, CASE_CLEAR, (u8 *)&matches3);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches4;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'a', CASE_CLEAR, 0xff, (u8 *)&matches4);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches2, 'A', CASE_CLEAR,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches3, 'a', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches4, 'A', CASE_CLEAR,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelliMasked16, Exec3) {
|
||||||
|
/* 012345678901234567890123 */
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbaAaaAAaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches1;
|
||||||
|
bool ret = vermicelliDoubleMasked16Build('A', 'a', 0xff, 0xff, (u8 *)&matches1);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches2;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'A', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches2);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches3;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'A', 0xff, 0xff, (u8 *)&matches3);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches4;
|
||||||
|
ret = vermicelliDoubleMasked16Build('a', 'A', 0xff, 0xff, (u8 *)&matches4);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches5;
|
||||||
|
ret = vermicelliDoubleMasked16Build('a', 'A', 0xff, CASE_CLEAR, (u8 *)&matches5);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches6;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'a', CASE_CLEAR, 0xff, (u8 *)&matches6);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'A', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 18, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches2, 'A', CASE_CLEAR,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches3, 'A', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 21, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches4, 'a', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches5, 'a', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches6, 'A', CASE_CLEAR,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 18, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelliMasked16, Exec4) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches1;
|
||||||
|
bool ret = vermicelliDoubleMasked16Build('a', 'a', 0xff, 0xff, (u8 *)&matches1);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches2;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'A', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches2);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 31; i++) {
|
||||||
|
t1[48 - i] = 'a';
|
||||||
|
t1[48 - i + 1] = 'a';
|
||||||
|
const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff, t1_raw,
|
||||||
|
t1_raw + t1.length());
|
||||||
|
ASSERT_EQ(t1_raw + 48 - i, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches2, 'A', CASE_CLEAR, t1_raw,
|
||||||
|
t1_raw + t1.length());
|
||||||
|
ASSERT_EQ(t1_raw + 48 - i, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DoubleVermicelliMasked16, Exec5) {
|
||||||
|
std::string t1("bbbbbbbbbbbbbbbbbaCaGaOCaChBfcNgBFGiLbbbbbbbbbbbbbbbbbbbbbbbb");
|
||||||
|
const u8 *t1_raw = (const u8 *)t1.c_str();
|
||||||
|
|
||||||
|
m128 matches1;
|
||||||
|
bool ret = vermicelliDoubleMasked16Build('a', 'B', 0xff, 0xde, (u8 *)&matches1);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches2;
|
||||||
|
ret = vermicelliDoubleMasked16Build('a', 'D', 0xff, 0xdc, (u8 *)&matches2);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches3;
|
||||||
|
ret = vermicelliDoubleMasked16Build('D', 'a', 0xdc, 0xff, (u8 *)&matches3);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches4;
|
||||||
|
ret = vermicelliDoubleMasked16Build('A', 'B', 0xdf, 0xde, (u8 *)&matches4);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches5;
|
||||||
|
ret = vermicelliDoubleMasked16Build('B', 'a', 0xde, 0xff, (u8 *)&matches5);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
m128 matches6;
|
||||||
|
ret = vermicelliDoubleMasked16Build('B', 'A', 0xde, 0xdf, (u8 *)&matches6);
|
||||||
|
ASSERT_TRUE(ret);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 16; i++) {
|
||||||
|
const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches2, 'a', 0xff,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 19, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches3, 'D', 0xdc,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 20, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches4, 'A', 0xdf,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 17, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches5, 'B', 0xde,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 16, rv);
|
||||||
|
|
||||||
|
rv = vermicelliDoubleMasked16Exec(matches6, 'B', 0xde,
|
||||||
|
t1_raw + i, t1_raw + t1.length() - i);
|
||||||
|
ASSERT_EQ(t1_raw + 16, rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // HAVE_SVE2
|
#endif // HAVE_SVE2
|
Loading…
x
Reference in New Issue
Block a user