diff --git a/src/nfa/accel.c b/src/nfa/accel.c index 8c9b6e72..34bd24a9 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -91,6 +91,28 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = vermicelli16Exec(accel->verm16.mask, c, c_end); break; + + case ACCEL_DVERM16: + DEBUG_PRINTF("accel dverm16 %p %p\n", c, c_end); + if (c_end - c < 18) { + return c; + } + + /* need to stop one early to get an accurate end state */ + rv = vermicelliDouble16Exec(accel->dverm16.mask, accel->dverm16.firsts, + c, c_end - 1); + break; + + case ACCEL_DVERM16_MASKED: + DEBUG_PRINTF("accel dverm16 masked %p %p\n", c, c_end); + if (c_end - c < 18) { + return c; + } + + /* need to stop one early to get an accurate end state */ + rv = vermicelliDoubleMasked16Exec(accel->mdverm16.mask, accel->mdverm16.c1, + accel->mdverm16.m1, c, c_end - 1); + break; #endif // HAVE_SVE2 case ACCEL_DVERM_MASKED: diff --git a/src/nfa/accel.h b/src/nfa/accel.h index 0676239a..3fccdd7b 100644 --- a/src/nfa/accel.h +++ b/src/nfa/accel.h @@ -63,7 +63,9 @@ enum AccelType { ACCEL_TRUFFLE, ACCEL_RED_TAPE, ACCEL_DVERM_MASKED, - ACCEL_VERM16 + ACCEL_VERM16, + ACCEL_DVERM16, + ACCEL_DVERM16_MASKED, }; /** \brief Structure for accel framework. */ @@ -104,6 +106,19 @@ union AccelAux { u8 offset; m128 mask; } verm16; + struct { + u8 accel_type; + u8 offset; + u64a firsts; + m128 mask; + } dverm16; + struct { + u8 accel_type; + u8 offset; + u8 c1; // used for partial match + u8 m1; // used for partial match + m128 mask; + } mdverm16; struct { u8 accel_type; u8 offset; diff --git a/src/nfa/accel_dfa_build_strat.cpp b/src/nfa/accel_dfa_build_strat.cpp index cfca9397..6793a65c 100644 --- a/src/nfa/accel_dfa_build_strat.cpp +++ b/src/nfa/accel_dfa_build_strat.cpp @@ -442,45 +442,75 @@ accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, return; } - if (double_byte_ok(info) && info.double_cr.none() && - (info.double_byte.size() == 2 || info.double_byte.size() == 4)) { - bool ok = true; + if (double_byte_ok(info) && info.double_cr.none()) { + if ((info.double_byte.size() == 2 || info.double_byte.size() == 4)) { + bool ok = true; - assert(!info.double_byte.empty()); - u8 firstC = info.double_byte.begin()->first & CASE_CLEAR; - u8 secondC = info.double_byte.begin()->second & CASE_CLEAR; + assert(!info.double_byte.empty()); + u8 firstC = info.double_byte.begin()->first & CASE_CLEAR; + u8 secondC = info.double_byte.begin()->second & CASE_CLEAR; - for (const pair &p : info.double_byte) { - if ((p.first & CASE_CLEAR) != firstC || - (p.second & CASE_CLEAR) != secondC) { - ok = false; - break; + for (const pair &p : info.double_byte) { + if ((p.first & CASE_CLEAR) != firstC || + (p.second & CASE_CLEAR) != secondC) { + ok = false; + break; + } + } + + if (ok) { + accel->accel_type = ACCEL_DVERM_NOCASE; + accel->dverm.c1 = firstC; + accel->dverm.c2 = secondC; + accel->dverm.offset = verify_u8(info.double_offset); + DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); + return; + } + + u8 m1; + u8 m2; + if (buildDvermMask(info.double_byte, &m1, &m2)) { + u8 c1 = info.double_byte.begin()->first & m1; + u8 c2 = info.double_byte.begin()->second & m2; +#ifdef HAVE_SVE2 + if (vermicelliDoubleMasked16Build(c1, c2, m1, m2, (u8 *)&accel->mdverm16.mask)) { + accel->accel_type = ACCEL_DVERM16_MASKED; + accel->mdverm16.offset = verify_u8(info.double_offset); + accel->mdverm16.c1 = c1; + accel->mdverm16.m1 = m1; + DEBUG_PRINTF("building maskeddouble16-vermicelli for 0x%02hhx%02hhx\n", + c1, c2); + return; + } else if (info.double_byte.size() <= 8 && + vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask, + (u8 *)&accel->dverm16.firsts)) { + accel->accel_type = ACCEL_DVERM16; + accel->dverm16.offset = verify_u8(info.double_offset); + DEBUG_PRINTF("building double16-vermicelli\n"); + return; + } +#endif // HAVE_SVE2 + accel->accel_type = ACCEL_DVERM_MASKED; + accel->dverm.offset = verify_u8(info.double_offset); + accel->dverm.c1 = c1; + accel->dverm.c2 = c2; + accel->dverm.m1 = m1; + accel->dverm.m2 = m2; + DEBUG_PRINTF( + "building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", c1, c2); + return; } } - - if (ok) { - accel->accel_type = ACCEL_DVERM_NOCASE; - accel->dverm.c1 = firstC; - accel->dverm.c2 = secondC; - accel->dverm.offset = verify_u8(info.double_offset); - DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); - return; - } - - u8 m1; - u8 m2; - if (buildDvermMask(info.double_byte, &m1, &m2)) { - accel->accel_type = ACCEL_DVERM_MASKED; - accel->dverm.offset = verify_u8(info.double_offset); - accel->dverm.c1 = info.double_byte.begin()->first & m1; - accel->dverm.c2 = info.double_byte.begin()->second & m2; - accel->dverm.m1 = m1; - accel->dverm.m2 = m2; - DEBUG_PRINTF( - "building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", - accel->dverm.c1, accel->dverm.c2); +#ifdef HAVE_SVE2 + if (info.double_byte.size() <= 8 && + vermicelliDouble16Build(info.double_byte, (u8 *)&accel->dverm16.mask, + (u8 *)&accel->dverm16.firsts)) { + accel->accel_type = ACCEL_DVERM16; + accel->dverm16.offset = verify_u8(info.double_offset); + DEBUG_PRINTF("building double16-vermicelli\n"); return; } +#endif // HAVE_SVE2 } if (double_byte_ok(info) && diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index f68ed1b9..e0be910d 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -207,16 +207,45 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { u8 m2; if (buildDvermMask(info.double_stop2, &m1, &m2)) { + u8 c1 = info.double_stop2.begin()->first & m1; + u8 c2 = info.double_stop2.begin()->second & m2; +#ifdef HAVE_SVE2 + if (vermicelliDoubleMasked16Build(c1, c2, m1, m2, (u8 *)&aux->mdverm16.mask)) { + aux->accel_type = ACCEL_DVERM16_MASKED; + aux->mdverm16.offset = offset; + aux->mdverm16.c1 = c1; + aux->mdverm16.m1 = m1; + DEBUG_PRINTF("building maskeddouble16-vermicelli for 0x%02hhx%02hhx\n", + c1, c2); + return; + } else if (outs2 <= 8 && + vermicelliDouble16Build(info.double_stop2, (u8 *)&aux->dverm16.mask, + (u8 *)&aux->dverm16.firsts)) { + aux->accel_type = ACCEL_DVERM16; + aux->dverm16.offset = offset; + DEBUG_PRINTF("building double16-vermicelli\n"); + return; + } +#endif // HAVE_SVE2 aux->accel_type = ACCEL_DVERM_MASKED; aux->dverm.offset = offset; - aux->dverm.c1 = info.double_stop2.begin()->first & m1; - aux->dverm.c2 = info.double_stop2.begin()->second & m2; + aux->dverm.c1 = c1; + aux->dverm.c2 = c2; aux->dverm.m1 = m1; aux->dverm.m2 = m2; - DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); + DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", c1, c2); return; } +#ifdef HAVE_SVE2 + if (outs2 <= 8 && + vermicelliDouble16Build(info.double_stop2, (u8 *)&aux->dverm16.mask, + (u8 *)&aux->dverm16.firsts)) { + aux->accel_type = ACCEL_DVERM16; + aux->dverm16.offset = offset; + DEBUG_PRINTF("building double16-vermicelli\n"); + return; + } +#endif // HAVE_SVE2 } if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. diff --git a/src/nfa/vermicelli_sve.h b/src/nfa/vermicelli_sve.h index 88ed688c..42476a69 100644 --- a/src/nfa/vermicelli_sve.h +++ b/src/nfa/vermicelli_sve.h @@ -267,9 +267,7 @@ const u8 *rvermSearch(svuint8_t chars, const u8 *buf, const u8 *buf_end, } static really_inline -const u8 *dvermSearch(char c1, char c2, bool nocase, const u8 *buf, - const u8 *buf_end) { - svuint16_t chars = getCharMaskDouble(c1, c2, nocase); +const u8 *dvermSearch(svuint8_t chars, const u8 *buf, const u8 *buf_end) { size_t len = buf_end - buf; if (len <= svcntb()) { return dvermSearchOnce(chars, buf, buf_end); @@ -374,7 +372,8 @@ const u8 *vermicelliDoubleExec(char c1, char c2, bool nocase, const u8 *buf, assert(buf < buf_end); if (buf_end - buf > 1) { ++buf; - const u8 *ptr = dvermSearch(c1, c2, nocase, buf, buf_end); + svuint16_t chars = getCharMaskDouble(c1, c2, nocase); + const u8 *ptr = dvermSearch(chars, buf, buf_end); if (ptr) { return ptr; } @@ -406,42 +405,92 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, bool nocase, const u8 *buf, } static really_inline -svuint8_t getDupSVEMaskFrom128(m128 _mask) { - return svld1rq_u8(svptrue_b8(), (const uint8_t *)&_mask); +svuint8_t getDupSVEMaskFrom128(m128 mask) { + return svld1rq_u8(svptrue_b8(), (const uint8_t *)&mask); } static really_inline -const u8 *vermicelli16Exec(const m128 _chars, const u8 *buf, +const u8 *vermicelli16Exec(const m128 mask, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("verm16 scan over %td bytes\n", buf_end - buf); - svuint8_t chars = getDupSVEMaskFrom128(_chars); + svuint8_t chars = getDupSVEMaskFrom128(mask); const u8 *ptr = vermSearch(chars, buf, buf_end, false); return ptr ? ptr : buf_end; } static really_inline -const u8 *nvermicelli16Exec(const m128 _chars, const u8 *buf, +const u8 *nvermicelli16Exec(const m128 mask, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("nverm16 scan over %td bytes\n", buf_end - buf); - svuint8_t chars = getDupSVEMaskFrom128(_chars); + svuint8_t chars = getDupSVEMaskFrom128(mask); const u8 *ptr = vermSearch(chars, buf, buf_end, true); return ptr ? ptr : buf_end; } static really_inline -const u8 *rvermicelli16Exec(const m128 _chars, const u8 *buf, +const u8 *rvermicelli16Exec(const m128 mask, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("rverm16 scan over %td bytes\n", buf_end - buf); - svuint8_t chars = getDupSVEMaskFrom128(_chars); + svuint8_t chars = getDupSVEMaskFrom128(mask); const u8 *ptr = rvermSearch(chars, buf, buf_end, false); return ptr ? ptr : buf - 1; } static really_inline -const u8 *rnvermicelli16Exec(const m128 _chars, const u8 *buf, +const u8 *rnvermicelli16Exec(const m128 mask, const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("rnverm16 scan over %td bytes\n", buf_end - buf); - svuint8_t chars = getDupSVEMaskFrom128(_chars); + svuint8_t chars = getDupSVEMaskFrom128(mask); const u8 *ptr = rvermSearch(chars, buf, buf_end, true); return ptr ? ptr : buf - 1; -} \ No newline at end of file +} + +static really_inline +bool vermicelliDouble16CheckPartial(const u64a first_chars, const u8 *buf_end) { + svuint8_t firsts = svreinterpret_u8(svdup_u64(first_chars)); + svbool_t matches = svcmpeq(svptrue_b8(), firsts, svdup_u8(buf_end[-1])); + return svptest_any(svptrue_b8(), matches); +} + +static really_inline +const u8 *vermicelliDouble16Exec(const m128 mask, const u64a firsts, + const u8 *buf, const u8 *buf_end) { + assert(buf < buf_end); + DEBUG_PRINTF("double verm16 scan over %td bytes\n", buf_end - buf); + if (buf_end - buf > 1) { + ++buf; + svuint16_t chars = svreinterpret_u16(getDupSVEMaskFrom128(mask)); + const u8 *ptr = dvermSearch(chars, buf, buf_end); + if (ptr) { + return ptr; + } + } + /* check for partial match at end */ + if (vermicelliDouble16CheckPartial(firsts, buf_end)) { + DEBUG_PRINTF("partial!!!\n"); + return buf_end - 1; + } + return buf_end; +} + +static really_inline +const u8 *vermicelliDoubleMasked16Exec(const m128 mask, char c1, char m1, + const u8 *buf, const u8 *buf_end) { + assert(buf < buf_end); + DEBUG_PRINTF("double verm16 masked scan over %td bytes\n", buf_end - buf); + if (buf_end - buf > 1) { + ++buf; + svuint16_t chars = svreinterpret_u16(getDupSVEMaskFrom128(mask)); + const u8 *ptr = dvermSearch(chars, buf, buf_end); + if (ptr) { + return ptr; + } + } + /* check for partial match at end */ + if ((buf_end[-1] & m1) == (u8)c1) { + DEBUG_PRINTF("partial!!!\n"); + return buf_end - 1; + } + + return buf_end; +} diff --git a/src/nfa/vermicellicompile.cpp b/src/nfa/vermicellicompile.cpp index 5b6ca036..d72ecece 100644 --- a/src/nfa/vermicellicompile.cpp +++ b/src/nfa/vermicellicompile.cpp @@ -50,4 +50,207 @@ bool vermicelli16Build(const CharReach &chars, u8 *rv) { return true; } +bool vermicelliDouble16Build(const flat_set> &twochar, + u8 *chars, u8 *firsts) { + constexpr size_t count_limit = 8; + if (twochar.size() > count_limit) return false; + size_t count = 0; + for (const auto &p : twochar) { + firsts[count] = p.first; + chars[2 * count] = p.first; + chars[(2 * count) + 1] = p.second; + ++count; + } + for(; count < count_limit; ++count) { + firsts[count] = chars[0]; + chars[2 * count] = chars[0]; + chars[(2 * count) + 1] = chars[1]; + } + return true; +} + +static really_inline +void fillMask(u8 matches[], size_t len, u8 *rv) { + for (size_t i = 0; i < 16; ++i) { + rv[i] = matches[i % len]; + } +} + +static really_inline +void getTwoCases(u8 cases[2], u8 bit, char c) { + const u8 set = 1UL << bit; + cases[0] = c & (~set); + cases[1] = c | set; +} + +static really_inline +void getFourCases(u8 cases[4], u8 bit, char case1, char case2) { + const u8 set = 1UL << bit; + cases[0] = case1 & (~set); + cases[1] = case1 | set; + cases[2] = case2 & (~set); + cases[3] = case2 | set; +} + +static really_inline +void getEightCases(u8 cases[8], u8 bit, char case1, char case2, + char case3, char case4) { + const u8 set = 1UL << bit; + cases[0] = case1 & (~set); + cases[1] = case1 | set; + cases[2] = case2 & (~set); + cases[3] = case2 | set; + cases[4] = case3 & (~set); + cases[5] = case3 | set; + cases[6] = case4 & (~set); + cases[7] = case4 | set; +} + +static really_inline +bool getDoubleMatchesForBits(u8 c1, u8 c2, u8 holes[3], u8 c1_holes, + u8 c2_holes, u8 *rv) { + u8 cases[8]; + switch (c1_holes) { + case 0: + switch (c2_holes) { + case 0: { + u8 matches[2] = { c1, c2 }; + fillMask(matches, 2, rv); + return true; + } + case 1: { + getTwoCases(cases, holes[0], c2); + u8 matches[4] = { c1, cases[0], c1, cases[1] }; + fillMask(matches, 4, rv); + return true; + } + case 2: { + getTwoCases(cases, holes[0], c2); + getFourCases(&cases[2], holes[1], cases[0], cases[1]); + u8 matches[8] = { c1, cases[2], c1, cases[3], + c1, cases[4], c1, cases[5] }; + fillMask(matches, 8, rv); + return true; + } + case 3: { + getTwoCases(cases, holes[0], c2); + getFourCases(&cases[4], holes[1], cases[0], cases[1]); + getEightCases(cases, holes[2], cases[4], cases[5], + cases[6], cases[7]); + u8 matches[16] = { c1, cases[0], c1, cases[1], + c1, cases[2], c1, cases[3], + c1, cases[4], c1, cases[5], + c1, cases[6], c1, cases[7] }; + memcpy(rv, matches, sizeof(matches)); + return true; + } + default: + assert(c2_holes < 4); + break; + } + break; + case 1: + getTwoCases(cases, holes[0], c1); + switch (c2_holes) { + case 0: { + u8 matches[4] = { cases[0] , c2, cases[1], c2 }; + fillMask(matches, 4, rv); + return true; + } + case 1: { + getTwoCases(&cases[2], holes[1], c2); + u8 matches[8] = { cases[0], cases[2], + cases[0], cases[3], + cases[1], cases[2], + cases[1], cases[3] }; + fillMask(matches, 8, rv); + return true; + } + case 2: { + getTwoCases(&cases[2], holes[1], c2); + getFourCases(&cases[4], holes[2], cases[2], cases[3]); + u8 matches[16] = { cases[0], cases[4], cases[0], cases[5], + cases[0], cases[6], cases[0], cases[7], + cases[1], cases[4], cases[1], cases[5], + cases[1], cases[6], cases[1], cases[7] }; + memcpy(rv, matches, sizeof(matches)); + return true; + } + default: + assert(c2_holes < 3); + break; + } + break; + case 2: + getTwoCases(cases, holes[0], c1); + getFourCases(&cases[2], holes[1], cases[0], cases[1]); + switch (c2_holes) { + case 0: { + u8 matches[8] = { cases[2], c2, cases[3], c2, + cases[4], c2, cases[5], c2 }; + fillMask(matches, 8, rv); + return true; + } + case 1: { + getTwoCases(&cases[6], holes[2], c2); + u8 matches[16] = { cases[2], cases[6], cases[3], cases[6], + cases[4], cases[6], cases[5], cases[6], + cases[2], cases[7], cases[3], cases[7], + cases[4], cases[7], cases[5], cases[7] }; + memcpy(rv, matches, sizeof(matches)); + return true; + } + default: + assert(c2_holes < 2); + break; + } + break; + case 3: { + assert(!c2_holes); + getTwoCases(cases, holes[0], c1); + getFourCases(&cases[4], holes[1], cases[0], cases[1]); + getEightCases(cases, holes[2], cases[4], cases[5], + cases[6], cases[7]); + u8 matches[16] = { cases[0], c2, cases[1], c2, + cases[2], c2, cases[3], c2, + cases[4], c2, cases[5], c2, + cases[6], c2, cases[7], c2 }; + memcpy(rv, matches, sizeof(matches)); + return true; + } + } + return false; +} + +static really_inline +bool getDoubleMatchesForMask(char c1, char c2, char m1, char m2, + u8 c1_holes, u8 c2_holes, u8 *rv) { + u8 holes[3] = { 0 }; + int count = 0; + if (c1_holes) { + for (int i = 0; i < 8; ++i) { + if (!(m1 & (1UL << i))) { + holes[count++] = i; + } + } + } + if (c2_holes) { + for (int i = 0; i < 8; ++i) { + if (!(m2 & (1UL << i))) { + holes[count++] = i; + } + } + } + return getDoubleMatchesForBits(c1, c2, holes, c1_holes, c2_holes, rv); +} + +bool vermicelliDoubleMasked16Build(char c1, char c2, char m1, char m2, u8 *rv) { + u8 c1_holes = 8 - __builtin_popcount(m1); + u8 c2_holes = 8 - __builtin_popcount(m2); + if (c1_holes + c2_holes > 3) { + return false; + } + return getDoubleMatchesForMask(c1, c2, m1, m2, c1_holes, c2_holes, rv); +} + } // namespace ue2 diff --git a/src/nfa/vermicellicompile.h b/src/nfa/vermicellicompile.h index 5c70100a..0075273c 100644 --- a/src/nfa/vermicellicompile.h +++ b/src/nfa/vermicellicompile.h @@ -43,6 +43,11 @@ namespace ue2 { bool vermicelli16Build(const CharReach &chars, u8 *rv); +bool vermicelliDouble16Build(const flat_set> &twochar, + u8 *chars, u8 *firsts); + +bool vermicelliDoubleMasked16Build(char c1, char c2, char m1, char m2, u8 *rv); + } // namespace ue2 #endif // VERM_COMPILE_H diff --git a/unit/internal/rvermicelli.cpp b/unit/internal/rvermicelli.cpp index 2806c5d8..d89067d0 100644 --- a/unit/internal/rvermicelli.cpp +++ b/unit/internal/rvermicelli.cpp @@ -311,11 +311,6 @@ TEST(RDoubleVermicelli, Exec5) { #include "nfa/vermicellicompile.h" using namespace ue2; -union Matches { - u8 val8[16]; - m128 val128; -}; - TEST(RVermicelli16, ExecNoMatch1) { char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -323,8 +318,8 @@ TEST(RVermicelli16, ExecNoMatch1) { chars.set('a'); chars.set('B'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { @@ -332,7 +327,7 @@ TEST(RVermicelli16, ExecNoMatch1) { const u8 *begin = (const u8 *)t1 + i; const u8 *end = (const u8 *)t1 + strlen(t1) - j; - const u8 *rv = rvermicelli16Exec(matches.val128, begin, end); + const u8 *rv = rvermicelli16Exec(matches, begin, end); ASSERT_EQ(begin - 1, rv); } } @@ -345,12 +340,12 @@ TEST(RVermicelli16, Exec1) { CharReach chars; chars.set('a'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = rvermicelli16Exec(matches.val128, buf, buf + strlen(t1) - i); + const u8 *rv = rvermicelli16Exec(matches, buf, buf + strlen(t1) - i); ASSERT_EQ(buf + 48, rv); } } @@ -362,12 +357,12 @@ TEST(RVermicelli16, Exec2) { CharReach chars; chars.set('a'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = rvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + const u8 *rv = rvermicelli16Exec(matches, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 48, rv); } } @@ -378,20 +373,20 @@ TEST(RVermicelli16, Exec3) { CharReach chars; chars.set('a'); - Matches matches_a; - bool ret = vermicelli16Build(chars, matches_a.val8); + m128 matches_a; + bool ret = vermicelli16Build(chars, (u8 *)&matches_a); ASSERT_TRUE(ret); chars.set('A'); - Matches matches_A; - ret = vermicelli16Build(chars, matches_A.val8); + m128 matches_A; + ret = vermicelli16Build(chars, (u8 *)&matches_A); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = rvermicelli16Exec(matches_a.val128, buf, buf + strlen(t1) - i); + const u8 *rv = rvermicelli16Exec(matches_a, buf, buf + strlen(t1) - i); ASSERT_EQ(buf + 47, rv); - rv = rvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1) - i); + rv = rvermicelli16Exec(matches_A, buf, buf + strlen(t1) - i); ASSERT_EQ(buf + 48, rv); } } @@ -402,21 +397,21 @@ TEST(RVermicelli16, Exec4) { CharReach chars; chars.set('a'); - Matches matches_a; - bool ret = vermicelli16Build(chars, matches_a.val8); + m128 matches_a; + bool ret = vermicelli16Build(chars, (u8 *)&matches_a); ASSERT_TRUE(ret); chars.set('A'); - Matches matches_A; - ret = vermicelli16Build(chars, matches_A.val8); + m128 matches_A; + ret = vermicelli16Build(chars, (u8 *)&matches_A); ASSERT_TRUE(ret); for (size_t i = 0; i < 31; i++) { t1[16 + i] = 'a'; - const u8 *rv = rvermicelli16Exec(matches_a.val128, buf, buf + strlen(t1)); + const u8 *rv = rvermicelli16Exec(matches_a, buf, buf + strlen(t1)); ASSERT_EQ(buf + 16 + i, rv); - rv = rvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + rv = rvermicelli16Exec(matches_A, buf, buf + strlen(t1)); ASSERT_EQ(buf + 16 + i, rv); } } @@ -426,18 +421,18 @@ TEST(RVermicelli16, Exec5) { const u8 *buf = (const u8 *)t1; CharReach chars; - Matches matches[16]; + m128 matches[16]; bool ret; for (int i = 0; i < 16; ++i) { chars.set('a' + i); - ret = vermicelli16Build(chars, matches[i].val8); + ret = vermicelli16Build(chars, (u8 *)&matches[i]); ASSERT_TRUE(ret); } for (int j = 0; j < 16; ++j) { for (size_t i = 0; i < 16; i++) { - const u8 *rv = rvermicelli16Exec(matches[j].val128, buf, buf + strlen(t1) - i); + const u8 *rv = rvermicelli16Exec(matches[j], buf, buf + strlen(t1) - i); ASSERT_EQ(buf + j + 17, rv); } } @@ -451,13 +446,13 @@ TEST(RNVermicelli16, ExecNoMatch1) { chars.set('b'); chars.set('B'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { for (size_t j = 0; j < 16; j++) { - const u8 *rv = rnvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j); + const u8 *rv = rnvermicelli16Exec(matches, buf + i, buf + strlen(t1) - j); ASSERT_EQ(buf + i - 1, rv); } } @@ -470,12 +465,12 @@ TEST(RNVermicelli16, Exec1) { CharReach chars; chars.set('b'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = rnvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - i); + const u8 *rv = rnvermicelli16Exec(matches, buf + i, buf + strlen(t1) - i); ASSERT_EQ(buf + 48, rv); } } @@ -487,12 +482,12 @@ TEST(RNVermicelli16, Exec2) { CharReach chars; chars.set('b'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = rnvermicelli16Exec(matches.val128, buf, buf + strlen(t1) - i); + const u8 *rv = rnvermicelli16Exec(matches, buf, buf + strlen(t1) - i); ASSERT_EQ(buf + 48, rv); } } @@ -503,20 +498,20 @@ TEST(RNVermicelli16, Exec3) { CharReach chars; chars.set('b'); - Matches matches_b; - bool ret = vermicelli16Build(chars, matches_b.val8); + m128 matches_b; + bool ret = vermicelli16Build(chars, (u8 *)&matches_b); ASSERT_TRUE(ret); chars.set('A'); - Matches matches_A; - ret = vermicelli16Build(chars, matches_A.val8); + m128 matches_A; + ret = vermicelli16Build(chars, (u8 *)&matches_A); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = rnvermicelli16Exec(matches_b.val128, buf + i, buf + strlen(t1)); + const u8 *rv = rnvermicelli16Exec(matches_b, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 48, rv); - rv = rnvermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1)); + rv = rnvermicelli16Exec(matches_A, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 47, rv); } } @@ -527,21 +522,21 @@ TEST(RNVermicelli16, Exec4) { CharReach chars; chars.set('b'); - Matches matches_b; - bool ret = vermicelli16Build(chars, matches_b.val8); + m128 matches_b; + bool ret = vermicelli16Build(chars, (u8 *)&matches_b); ASSERT_TRUE(ret); chars.set('A'); - Matches matches_A; - ret = vermicelli16Build(chars, matches_A.val8); + m128 matches_A; + ret = vermicelli16Build(chars, (u8 *)&matches_A); ASSERT_TRUE(ret); for (size_t i = 0; i < 31; i++) { t1[16 + i] = 'a'; - const u8 *rv = rnvermicelli16Exec(matches_b.val128, buf, buf + strlen(t1)); + const u8 *rv = rnvermicelli16Exec(matches_b, buf, buf + strlen(t1)); ASSERT_EQ(buf + 16 + i, rv); - rv = rnvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + rv = rnvermicelli16Exec(matches_A, buf, buf + strlen(t1)); ASSERT_EQ(buf + 16 + i, rv); } } @@ -551,18 +546,18 @@ TEST(RNVermicelli16, Exec5) { const u8 *buf = (const u8 *)t1; CharReach chars; - Matches matches[16]; + m128 matches[16]; bool ret; for (int i = 0; i < 16; ++i) { chars.set('q' - i); - ret = vermicelli16Build(chars, matches[i].val8); + ret = vermicelli16Build(chars, (u8 *)&matches[i]); ASSERT_TRUE(ret); } for (int j = 0; j < 16; ++j) { for (size_t i = 0; i < 16; i++) { - const u8 *rv = rnvermicelli16Exec(matches[j].val128, buf, buf + strlen(t1) - i); + const u8 *rv = rnvermicelli16Exec(matches[j], buf, buf + strlen(t1) - i); ASSERT_EQ(buf - j + 32, rv); } } diff --git a/unit/internal/vermicelli.cpp b/unit/internal/vermicelli.cpp index bc007e1a..dc458cb9 100644 --- a/unit/internal/vermicelli.cpp +++ b/unit/internal/vermicelli.cpp @@ -528,11 +528,6 @@ TEST(DoubleVermicelliMasked, Exec4) { #include "nfa/vermicellicompile.h" using namespace ue2; -union Matches { - u8 val8[16]; - m128 val128; -}; - TEST(Vermicelli16, ExecNoMatch1) { char t1[] = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; const u8 *buf = (const u8 *)t1; @@ -541,13 +536,13 @@ TEST(Vermicelli16, ExecNoMatch1) { chars.set('a'); chars.set('B'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { for (size_t j = 0; j < 16; j++) { - const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j); + const u8 *rv = vermicelli16Exec(matches, buf + i, buf + strlen(t1) - j); ASSERT_EQ(buf + strlen(t1) - j, rv); } } @@ -560,12 +555,12 @@ TEST(Vermicelli16, Exec1) { CharReach chars; chars.set('a'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + const u8 *rv = vermicelli16Exec(matches, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 17, rv); } } @@ -577,12 +572,12 @@ TEST(Vermicelli16, Exec2) { CharReach chars; chars.set('a'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = vermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + const u8 *rv = vermicelli16Exec(matches, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 17, rv); } } @@ -593,20 +588,20 @@ TEST(Vermicelli16, Exec3) { CharReach chars; chars.set('a'); - Matches matches_a; - bool ret = vermicelli16Build(chars, matches_a.val8); + m128 matches_a; + bool ret = vermicelli16Build(chars, (u8 *)&matches_a); ASSERT_TRUE(ret); chars.set('A'); - Matches matches_A; - ret = vermicelli16Build(chars, matches_A.val8); + m128 matches_A; + ret = vermicelli16Build(chars, (u8 *)&matches_A); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = vermicelli16Exec(matches_a.val128, buf + i, buf + strlen(t1)); + const u8 *rv = vermicelli16Exec(matches_a, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 18, rv); - rv = vermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1)); + rv = vermicelli16Exec(matches_A, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 17, rv); } } @@ -617,21 +612,21 @@ TEST(Vermicelli16, Exec4) { CharReach chars; chars.set('a'); - Matches matches_a; - bool ret = vermicelli16Build(chars, matches_a.val8); + m128 matches_a; + bool ret = vermicelli16Build(chars, (u8 *)&matches_a); ASSERT_TRUE(ret); chars.set('A'); - Matches matches_A; - ret = vermicelli16Build(chars, matches_A.val8); + m128 matches_A; + ret = vermicelli16Build(chars, (u8 *)&matches_A); ASSERT_TRUE(ret); for (size_t i = 0; i < 31; i++) { t1[48 - i] = 'a'; - const u8 *rv = vermicelli16Exec(matches_a.val128, buf, buf + strlen(t1)); + const u8 *rv = vermicelli16Exec(matches_a, buf, buf + strlen(t1)); ASSERT_EQ(buf + 48 - i, rv); - rv = vermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + rv = vermicelli16Exec(matches_A, buf, buf + strlen(t1)); ASSERT_EQ(buf + 48 - i, rv); } } @@ -641,18 +636,18 @@ TEST(Vermicelli16, Exec5) { const u8 *buf = (const u8 *)t1; CharReach chars; - Matches matches[16]; + m128 matches[16]; bool ret; for (int i = 0; i < 16; ++i) { chars.set('p' - i); - ret = vermicelli16Build(chars, matches[i].val8); + ret = vermicelli16Build(chars, (u8 *)&matches[i]); ASSERT_TRUE(ret); } for (int j = 0; j < 16; ++j) { for (size_t i = 0; i < 16; i++) { - const u8 *rv = vermicelli16Exec(matches[j].val128, buf + i,buf + strlen(t1)); + const u8 *rv = vermicelli16Exec(matches[j], buf + i,buf + strlen(t1)); ASSERT_EQ(buf - j + 32, rv); } } @@ -666,13 +661,13 @@ TEST(NVermicelli16, ExecNoMatch1) { chars.set('b'); chars.set('B'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { for (size_t j = 0; j < 16; j++) { - const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1) - j); + const u8 *rv = nvermicelli16Exec(matches, buf + i, buf + strlen(t1) - j); ASSERT_EQ((buf + strlen(t1) - j), rv); } } @@ -685,12 +680,12 @@ TEST(NVermicelli16, Exec1) { CharReach chars; chars.set('b'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + const u8 *rv = nvermicelli16Exec(matches, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 17, rv); } } @@ -702,12 +697,12 @@ TEST(NVermicelli16, Exec2) { CharReach chars; chars.set('b'); chars.set('A'); - Matches matches; - bool ret = vermicelli16Build(chars, matches.val8); + m128 matches; + bool ret = vermicelli16Build(chars, (u8 *)&matches); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = nvermicelli16Exec(matches.val128, buf + i, buf + strlen(t1)); + const u8 *rv = nvermicelli16Exec(matches, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 17, rv); } } @@ -718,20 +713,20 @@ TEST(NVermicelli16, Exec3) { CharReach chars; chars.set('b'); - Matches matches_b; - bool ret = vermicelli16Build(chars, matches_b.val8); + m128 matches_b; + bool ret = vermicelli16Build(chars, (u8 *)&matches_b); ASSERT_TRUE(ret); chars.set('A'); - Matches matches_A; - ret = vermicelli16Build(chars, matches_A.val8); + m128 matches_A; + ret = vermicelli16Build(chars, (u8 *)&matches_A); ASSERT_TRUE(ret); for (size_t i = 0; i < 16; i++) { - const u8 *rv = nvermicelli16Exec(matches_b.val128, buf + i, buf + strlen(t1)); + const u8 *rv = nvermicelli16Exec(matches_b, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 17, rv); - rv = nvermicelli16Exec(matches_A.val128, buf + i, buf + strlen(t1)); + rv = nvermicelli16Exec(matches_A, buf + i, buf + strlen(t1)); ASSERT_EQ(buf + 18, rv); } } @@ -742,21 +737,21 @@ TEST(NVermicelli16, Exec4) { CharReach chars; chars.set('b'); - Matches matches_b; - bool ret = vermicelli16Build(chars, matches_b.val8); + m128 matches_b; + bool ret = vermicelli16Build(chars, (u8 *)&matches_b); ASSERT_TRUE(ret); chars.set('A'); - Matches matches_A; - ret = vermicelli16Build(chars, matches_A.val8); + m128 matches_A; + ret = vermicelli16Build(chars, (u8 *)&matches_A); ASSERT_TRUE(ret); for (size_t i = 0; i < 31; i++) { t1[48 - i] = 'a'; - const u8 *rv = nvermicelli16Exec(matches_b.val128, buf, buf + strlen(t1)); + const u8 *rv = nvermicelli16Exec(matches_b, buf, buf + strlen(t1)); ASSERT_EQ(buf + 48 - i, rv); - rv = nvermicelli16Exec(matches_A.val128, buf, buf + strlen(t1)); + rv = nvermicelli16Exec(matches_A, buf, buf + strlen(t1)); ASSERT_EQ(buf + 48 - i, rv); } } @@ -766,21 +761,393 @@ TEST(NVermicelli16, Exec5) { const u8 *buf = (const u8 *)t1; CharReach chars; - Matches matches[16]; + m128 matches[16]; bool ret; for (int i = 0; i < 16; ++i) { chars.set('a' + i); - ret = vermicelli16Build(chars, matches[i].val8); + ret = vermicelli16Build(chars, (u8 *)&matches[i]); ASSERT_TRUE(ret); } for (int j = 0; j < 16; ++j) { for (size_t i = 0; i < 16; i++) { - const u8 *rv = nvermicelli16Exec(matches[j].val128, buf + i, buf + strlen(t1)); + const u8 *rv = nvermicelli16Exec(matches[j], buf + i, buf + strlen(t1)); ASSERT_EQ(buf + j + 18, rv); } } } +TEST(DoubleVermicelli16, ExecNoMatch1) { + std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches; + u64a firsts; + flat_set> pairs; + for (int i = 0; i < 16; i += 2) { + pairs.insert(std::make_pair('a' + i, 'a' + i + 1)); + } + bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = vermicelliDouble16Exec(matches, firsts, + t1_raw + i, t1_raw + t1.length() - i - j); + ASSERT_EQ(t1_raw + t1.length() - i - j, rv); + } + } +} + +TEST(DoubleVermicelli16, ExecNoMatch2) { + std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches; + u64a firsts; + flat_set> pairs; + pairs.insert(std::make_pair('a', 'b')); + pairs.insert(std::make_pair('A', 'B')); + pairs.insert(std::make_pair('B', 'A')); + pairs.insert(std::make_pair('B', 'B')); + bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = vermicelliDouble16Exec(matches, firsts, + t1_raw + i, t1_raw + t1.length() - i - j); + ASSERT_EQ(t1_raw + t1.length() - i - j, rv); + } + } +} + +TEST(DoubleVermicelli16, ExecNoMatch3) { + std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches; + u64a firsts; + flat_set> pairs; + pairs.insert(std::make_pair('a', 'b')); + pairs.insert(std::make_pair('B', 'B')); + pairs.insert(std::make_pair('A', 'B')); + pairs.insert(std::make_pair('b', 'a')); + bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + /* partial match */ + const u8 *rv = vermicelliDouble16Exec(matches, firsts, + t1_raw + i, t1_raw + t1.length() - i - j); + ASSERT_EQ(t1_raw + t1.length() - i - j - 1, rv); + } + } +} + +TEST(DoubleVermicelli16, Exec1) { + std::string t1("bbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches; + u64a firsts; + flat_set> pairs; + pairs.insert(std::make_pair('a', 'b')); + bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDouble16Exec(matches, firsts, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 18, rv); + } + + pairs.insert(std::make_pair('b', 'a')); + ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDouble16Exec(matches, firsts, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + } +} + +TEST(DoubleVermicelli16, Exec2) { + std::string t1("bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches; + u64a firsts; + flat_set> pairs; + pairs.insert(std::make_pair('a', 'a')); + bool ret = vermicelliDouble16Build(pairs, (u8 *)&matches, (u8 *)&firsts); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDouble16Exec(matches, firsts, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + } +} + +TEST(DoubleVermicelliMasked16, ExecNoMatch1) { + std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches1; + bool ret = vermicelliDoubleMasked16Build('a', 'b', 0xff, 0xff, (u8 *)&matches1); + ASSERT_TRUE(ret); + m128 matches2; + ret = vermicelliDoubleMasked16Build('B', 'B', 0xff, CASE_CLEAR, (u8 *)&matches2); + ASSERT_TRUE(ret); + m128 matches3; + ret = vermicelliDoubleMasked16Build('A', 'B', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches3); + ASSERT_TRUE(ret); + m128 matches4; + ret = vermicelliDoubleMasked16Build('B', 'B', CASE_CLEAR, 0xff, (u8 *)&matches4); + ASSERT_TRUE(ret); + m128 matches5; + ret = vermicelliDoubleMasked16Build('B', 'A', 0xff, 0xff, (u8 *)&matches5); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + for (size_t j = 0; j < 16; j++) { + const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff, + t1_raw + i, t1_raw + t1.length() - i - j); + ASSERT_EQ(t1_raw + t1.length() - i - j, rv); + + rv = vermicelliDoubleMasked16Exec(matches2, 'B', 0xff, t1_raw + i, + t1_raw + t1.length() - i - j); + ASSERT_EQ(t1_raw + t1.length() - i - j, rv); + + rv = vermicelliDoubleMasked16Exec(matches3, 'A', CASE_CLEAR, + t1_raw + i, t1_raw + t1.length() - i - j); + ASSERT_EQ(t1_raw + t1.length() - i - j, rv); + + /* partial match */ + rv = vermicelliDoubleMasked16Exec(matches4, 'B', CASE_CLEAR, + t1_raw + i, t1_raw + t1.length() - i - j); + ASSERT_EQ(t1_raw + t1.length() - i - j - 1, rv); + + rv = vermicelliDoubleMasked16Exec(matches5, 'B', 0xff, t1_raw + i, + t1_raw + t1.length() - i - j); + ASSERT_EQ(t1_raw + t1.length() - i - j, rv); + } + } +} + +TEST(DoubleVermicelliMasked16, Exec1) { + std::string t1("bbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches1; + bool ret = vermicelliDoubleMasked16Build('a', 'b', 0xff, 0xff, (u8 *)&matches1); + ASSERT_TRUE(ret); + m128 matches2; + ret = vermicelliDoubleMasked16Build('A', 'B', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches2); + ASSERT_TRUE(ret); + m128 matches3; + ret = vermicelliDoubleMasked16Build('a', 'B', 0xff, CASE_CLEAR, (u8 *)&matches3); + ASSERT_TRUE(ret); + m128 matches4; + ret = vermicelliDoubleMasked16Build('A', 'b', CASE_CLEAR, 0xff, (u8 *)&matches4); + ASSERT_TRUE(ret); + m128 matches5; + ret = vermicelliDoubleMasked16Build('b', 'a', 0xff, 0xff, (u8 *)&matches5); + ASSERT_TRUE(ret); + m128 matches6; + ret = vermicelliDoubleMasked16Build('B', 'A', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches6); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 18, rv); + + rv = vermicelliDoubleMasked16Exec(matches2, 'A', CASE_CLEAR, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 18, rv); + + rv = vermicelliDoubleMasked16Exec(matches3, 'a', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 18, rv); + + rv = vermicelliDoubleMasked16Exec(matches4, 'A', CASE_CLEAR, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 18, rv); + + rv = vermicelliDoubleMasked16Exec(matches5, 'b', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + + rv = vermicelliDoubleMasked16Exec(matches6, 'B', CASE_CLEAR, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + } +} + +TEST(DoubleVermicelliMasked16, Exec2) { + std::string t1("bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches1; + bool ret = vermicelliDoubleMasked16Build('a', 'a', 0xff, 0xff, (u8 *)&matches1); + ASSERT_TRUE(ret); + m128 matches2; + ret = vermicelliDoubleMasked16Build('A', 'A', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches2); + ASSERT_TRUE(ret); + m128 matches3; + ret = vermicelliDoubleMasked16Build('a', 'A', 0xff, CASE_CLEAR, (u8 *)&matches3); + ASSERT_TRUE(ret); + m128 matches4; + ret = vermicelliDoubleMasked16Build('A', 'a', CASE_CLEAR, 0xff, (u8 *)&matches4); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + + rv = vermicelliDoubleMasked16Exec(matches2, 'A', CASE_CLEAR, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + + rv = vermicelliDoubleMasked16Exec(matches3, 'a', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + + rv = vermicelliDoubleMasked16Exec(matches4, 'A', CASE_CLEAR, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + } +} + +TEST(DoubleVermicelliMasked16, Exec3) { + /* 012345678901234567890123 */ + std::string t1("bbbbbbbbbbbbbbbbbaAaaAAaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches1; + bool ret = vermicelliDoubleMasked16Build('A', 'a', 0xff, 0xff, (u8 *)&matches1); + ASSERT_TRUE(ret); + m128 matches2; + ret = vermicelliDoubleMasked16Build('A', 'A', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches2); + ASSERT_TRUE(ret); + m128 matches3; + ret = vermicelliDoubleMasked16Build('A', 'A', 0xff, 0xff, (u8 *)&matches3); + ASSERT_TRUE(ret); + m128 matches4; + ret = vermicelliDoubleMasked16Build('a', 'A', 0xff, 0xff, (u8 *)&matches4); + ASSERT_TRUE(ret); + m128 matches5; + ret = vermicelliDoubleMasked16Build('a', 'A', 0xff, CASE_CLEAR, (u8 *)&matches5); + ASSERT_TRUE(ret); + m128 matches6; + ret = vermicelliDoubleMasked16Build('A', 'a', CASE_CLEAR, 0xff, (u8 *)&matches6); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'A', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 18, rv); + + rv = vermicelliDoubleMasked16Exec(matches2, 'A', CASE_CLEAR, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + + rv = vermicelliDoubleMasked16Exec(matches3, 'A', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 21, rv); + + rv = vermicelliDoubleMasked16Exec(matches4, 'a', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + + rv = vermicelliDoubleMasked16Exec(matches5, 'a', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + + rv = vermicelliDoubleMasked16Exec(matches6, 'A', CASE_CLEAR, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 18, rv); + } +} + +TEST(DoubleVermicelliMasked16, Exec4) { + std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches1; + bool ret = vermicelliDoubleMasked16Build('a', 'a', 0xff, 0xff, (u8 *)&matches1); + ASSERT_TRUE(ret); + m128 matches2; + ret = vermicelliDoubleMasked16Build('A', 'A', CASE_CLEAR, CASE_CLEAR, (u8 *)&matches2); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 31; i++) { + t1[48 - i] = 'a'; + t1[48 - i + 1] = 'a'; + const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff, t1_raw, + t1_raw + t1.length()); + ASSERT_EQ(t1_raw + 48 - i, rv); + + rv = vermicelliDoubleMasked16Exec(matches2, 'A', CASE_CLEAR, t1_raw, + t1_raw + t1.length()); + ASSERT_EQ(t1_raw + 48 - i, rv); + } +} + +TEST(DoubleVermicelliMasked16, Exec5) { + std::string t1("bbbbbbbbbbbbbbbbbaCaGaOCaChBfcNgBFGiLbbbbbbbbbbbbbbbbbbbbbbbb"); + const u8 *t1_raw = (const u8 *)t1.c_str(); + + m128 matches1; + bool ret = vermicelliDoubleMasked16Build('a', 'B', 0xff, 0xde, (u8 *)&matches1); + ASSERT_TRUE(ret); + m128 matches2; + ret = vermicelliDoubleMasked16Build('a', 'D', 0xff, 0xdc, (u8 *)&matches2); + ASSERT_TRUE(ret); + m128 matches3; + ret = vermicelliDoubleMasked16Build('D', 'a', 0xdc, 0xff, (u8 *)&matches3); + ASSERT_TRUE(ret); + m128 matches4; + ret = vermicelliDoubleMasked16Build('A', 'B', 0xdf, 0xde, (u8 *)&matches4); + ASSERT_TRUE(ret); + m128 matches5; + ret = vermicelliDoubleMasked16Build('B', 'a', 0xde, 0xff, (u8 *)&matches5); + ASSERT_TRUE(ret); + m128 matches6; + ret = vermicelliDoubleMasked16Build('B', 'A', 0xde, 0xdf, (u8 *)&matches6); + ASSERT_TRUE(ret); + + for (size_t i = 0; i < 16; i++) { + const u8 *rv = vermicelliDoubleMasked16Exec(matches1, 'a', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + + rv = vermicelliDoubleMasked16Exec(matches2, 'a', 0xff, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 19, rv); + + rv = vermicelliDoubleMasked16Exec(matches3, 'D', 0xdc, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 20, rv); + + rv = vermicelliDoubleMasked16Exec(matches4, 'A', 0xdf, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 17, rv); + + rv = vermicelliDoubleMasked16Exec(matches5, 'B', 0xde, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 16, rv); + + rv = vermicelliDoubleMasked16Exec(matches6, 'B', 0xde, + t1_raw + i, t1_raw + t1.length() - i); + ASSERT_EQ(t1_raw + 16, rv); + } +} + #endif // HAVE_SVE2 \ No newline at end of file