diff --git a/CMakeLists.txt b/CMakeLists.txt index e1bd2794..5d1d741e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -827,6 +827,7 @@ SET (hs_SRCS src/rose/rose_in_graph.h src/rose/rose_in_util.cpp src/rose/rose_in_util.h + src/util/accel_scheme.h src/util/alloc.cpp src/util/alloc.h src/util/bitfield.h diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 2ad3c6dd..b75e0463 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -85,9 +85,9 @@ public: vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const override; - escape_info find_escape_strings(dstate_id_t this_idx) const override; + AccelScheme find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override { return sizeof(gough_accel); } - void buildAccel(dstate_id_t this_idx, const escape_info &info, + void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) override; u32 max_allowed_offset_accel() const override { return 0; } @@ -1146,31 +1146,32 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, return gough_dfa; } -escape_info gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { - escape_info rv; +AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { + AccelScheme rv; if (!contains(accel_gough_info, this_idx)) { - rv.outs = CharReach::dot(); - rv.outs2_broken = true; + rv.cr = CharReach::dot(); + rv.double_byte.clear(); return rv; } rv = mcclellan_build_strat::find_escape_strings(this_idx); - assert(!rv.offset); /* should have been limited by strat */ + assert(!rv.offset || rv.cr.all()); /* should have been limited by strat */ if (rv.offset) { - rv.outs = CharReach::dot(); - rv.outs2_broken = true; + rv.cr = CharReach::dot(); + rv.double_byte.clear(); return rv; } - if (!accel_gough_info.at(this_idx).two_byte) { - rv.outs2_broken = true; + if (rv.double_offset + || !accel_gough_info.at(this_idx).two_byte) { + rv.double_byte.clear(); } return rv; } -void gough_build_strat::buildAccel(dstate_id_t this_idx, const escape_info &info, +void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) { assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); gough_accel *accel = (gough_accel *)accel_out; diff --git a/src/nfa/limex_accel.c b/src/nfa/limex_accel.c index 0bfc9d85..2c73f9ff 100644 --- a/src/nfa/limex_accel.c +++ b/src/nfa/limex_accel.c @@ -49,209 +49,6 @@ #include "util/simd_utils_ssse3.h" #include "util/shuffle_ssse3.h" -static -const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) { - assert(ISALIGNED(aux)); // must be SIMD aligned for shufti - assert(end > ptr); - assert(end - ptr >= 16); // must be at least 16 bytes to scan - - const u8 *start = ptr; - u8 offset; - switch (aux->accel_type) { - case ACCEL_VERM: - DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c); - offset = aux->verm.offset; - ptr = vermicelliExec(aux->verm.c, 0, ptr, end); - break; - case ACCEL_VERM_NOCASE: - DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c); - offset = aux->verm.offset; - ptr = vermicelliExec(aux->verm.c, 1, ptr, end); - break; - case ACCEL_DVERM: - DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - offset = aux->dverm.offset; - ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end); - break; - case ACCEL_DVERM_NOCASE: - DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - offset = aux->dverm.offset; - ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, - 1, ptr, end); - break; - case ACCEL_DVERM_MASKED: - DEBUG_PRINTF("double vermicelli masked for " - "0x%02hhx%02hhx/0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2, - aux->dverm.m1, aux->dverm.m2); - offset = aux->dverm.offset; - ptr = vermicelliDoubleMaskedExec(aux->dverm.c1, aux->dverm.c2, - aux->dverm.m1, aux->dverm.m2, ptr, end); - break; - case ACCEL_MLVERM: - DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = long_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); - break; - case ACCEL_MLVERM_NOCASE: - DEBUG_PRINTF("long vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = long_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); - break; - case ACCEL_MLGVERM: - DEBUG_PRINTF("long grab vermicelli for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = longgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); - break; - case ACCEL_MLGVERM_NOCASE: - DEBUG_PRINTF("long grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = longgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); - break; - case ACCEL_MSVERM: - DEBUG_PRINTF("shift vermicelli for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = shift_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); - break; - case ACCEL_MSVERM_NOCASE: - DEBUG_PRINTF("shift vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = shift_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); - break; - case ACCEL_MSGVERM: - DEBUG_PRINTF("shift grab vermicelli for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = shiftgrab_vermicelliExec(aux->mverm.c, 0, ptr, end, aux->mverm.len); - break; - case ACCEL_MSGVERM_NOCASE: - DEBUG_PRINTF("shift grab vermicelli-nocase for 0x%02hhx\n", aux->mverm.c); - offset = aux->mverm.offset; - ptr = shiftgrab_vermicelliExec(aux->mverm.c, 1, ptr, end, aux->mverm.len); - break; - case ACCEL_MDSVERM: - DEBUG_PRINTF("double shift vermicelli for 0x%02hhx\n", aux->mdverm.c); - offset = aux->mdverm.offset; - ptr = doubleshift_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2); - break; - case ACCEL_MDSVERM_NOCASE: - DEBUG_PRINTF("double shift vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c); - offset = aux->mverm.offset; - ptr = doubleshift_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2); - break; - case ACCEL_MDSGVERM: - DEBUG_PRINTF("double shift grab vermicelli for 0x%02hhx\n", aux->mdverm.c); - offset = aux->mverm.offset; - ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 0, ptr, end, aux->mdverm.len1, aux->mdverm.len2); - break; - case ACCEL_MDSGVERM_NOCASE: - DEBUG_PRINTF("double shift grab vermicelli-nocase for 0x%02hhx\n", aux->mdverm.c); - offset = aux->mverm.offset; - ptr = doubleshiftgrab_vermicelliExec(aux->mdverm.c, 1, ptr, end, aux->mdverm.len1, aux->mdverm.len2); - break; - case ACCEL_SHUFTI: - DEBUG_PRINTF("single shufti\n"); - offset = aux->shufti.offset; - ptr = shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end); - break; - case ACCEL_DSHUFTI: - DEBUG_PRINTF("double shufti\n"); - offset = aux->dshufti.offset; - ptr = shuftiDoubleExec(aux->dshufti.lo1, aux->dshufti.hi1, - aux->dshufti.lo2, aux->dshufti.hi2, ptr, end); - break; - case ACCEL_MLSHUFTI: - offset = aux->mshufti.offset; - ptr = long_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); - break; - case ACCEL_MLGSHUFTI: - offset = aux->mshufti.offset; - ptr = longgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); - break; - case ACCEL_MSSHUFTI: - offset = aux->mshufti.offset; - ptr = shift_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); - break; - case ACCEL_MSGSHUFTI: - offset = aux->mshufti.offset; - ptr = shiftgrab_shuftiExec(aux->mshufti.lo, aux->mshufti.hi, ptr, end, aux->mshufti.len); - break; - case ACCEL_MDSSHUFTI: - offset = aux->mdshufti.offset; - ptr = doubleshift_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end, - aux->mdshufti.len1, aux->mdshufti.len2); - break; - case ACCEL_MDSGSHUFTI: - offset = aux->mdshufti.offset; - ptr = doubleshiftgrab_shuftiExec(aux->mdshufti.lo, aux->mdshufti.hi, ptr, end, - aux->mdshufti.len1, aux->mdshufti.len2); - break; - case ACCEL_TRUFFLE: - DEBUG_PRINTF("truffle shuffle\n"); - offset = aux->truffle.offset; - ptr = truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end); - break; - case ACCEL_MLTRUFFLE: - DEBUG_PRINTF("long match truffle shuffle\n"); - offset = aux->mtruffle.offset; - ptr = long_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, - ptr, end, aux->mtruffle.len); - break; - case ACCEL_MLGTRUFFLE: - DEBUG_PRINTF("long grab match truffle shuffle\n"); - offset = aux->mtruffle.offset; - ptr = longgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, - ptr, end, aux->mtruffle.len); - break; - case ACCEL_MSTRUFFLE: - DEBUG_PRINTF("shift match truffle shuffle\n"); - offset = aux->mtruffle.offset; - ptr = shift_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, - ptr, end, aux->mtruffle.len); - break; - case ACCEL_MSGTRUFFLE: - DEBUG_PRINTF("shift grab match truffle shuffle\n"); - offset = aux->mtruffle.offset; - ptr = shiftgrab_truffleExec(aux->mtruffle.mask1, aux->mtruffle.mask2, - ptr, end, aux->mtruffle.len); - break; - case ACCEL_MDSTRUFFLE: - DEBUG_PRINTF("double shift match truffle shuffle\n"); - offset = aux->mdtruffle.offset; - ptr = doubleshift_truffleExec(aux->mdtruffle.mask1, - aux->mdtruffle.mask2, ptr, end, - aux->mdtruffle.len1, - aux->mdtruffle.len2); - break; - case ACCEL_MDSGTRUFFLE: - DEBUG_PRINTF("double shift grab match truffle shuffle\n"); - offset = aux->mdtruffle.offset; - ptr = doubleshiftgrab_truffleExec(aux->mdtruffle.mask1, - aux->mdtruffle.mask2, ptr, end, - aux->mdtruffle.len1, - aux->mdtruffle.len2); - break; - case ACCEL_RED_TAPE: - ptr = end; /* there is no escape */ - offset = aux->generic.offset; - break; - default: - /* no acceleration, fall through and return current ptr */ - offset = 0; - break; - } - - if (offset) { - ptr -= offset; - if (ptr < start) { - return start; - } - } - - return ptr; -} - static really_inline size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, const u8 *input, u32 idx, size_t i, size_t end) { @@ -272,7 +69,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, } aux = aux + aux_idx; - const u8 *ptr = accelScan(aux, &input[i], &input[end]); + const u8 *ptr = run_accel(aux, &input[i], &input[end]); assert(ptr >= &input[i]); size_t j = (size_t)(ptr - input); DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i)); diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index b4418730..8bc0b9d8 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -128,10 +128,10 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) { } static -bool double_byte_ok(const escape_info &info) { - return !info.outs2_broken - && info.outs2_single.count() < info.outs2.size() - && info.outs2_single.count() <= 2 && !info.outs2.empty(); +bool double_byte_ok(const AccelScheme &info) { + return !info.double_byte.empty() + && info.double_cr.count() < info.double_byte.size() + && info.double_cr.count() <= 2 && !info.double_byte.empty(); } static @@ -189,7 +189,7 @@ u32 mcclellan_build_strat::max_allowed_offset_accel() const { return ACCEL_DFA_MAX_OFFSET_DEPTH; } -escape_info mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) +AccelScheme mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) const { return find_mcclellan_escape_info(rdfa, this_idx, max_allowed_offset_accel()); @@ -197,33 +197,33 @@ escape_info mcclellan_build_strat::find_escape_strings(dstate_id_t this_idx) /** builds acceleration schemes for states */ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, - const escape_info &info, + const AccelScheme &info, void *accel_out) { AccelAux *accel = (AccelAux *)accel_out; DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, - info.outs2_offset); + info.double_offset); accel->generic.offset = verify_u8(info.offset); - if (double_byte_ok(info) && info.outs2_single.none() - && info.outs2.size() == 1) { + if (double_byte_ok(info) && info.double_cr.none() + && info.double_byte.size() == 1) { accel->accel_type = ACCEL_DVERM; - accel->dverm.c1 = info.outs2.begin()->first; - accel->dverm.c2 = info.outs2.begin()->second; - accel->dverm.offset = verify_u8(info.outs2_offset); + accel->dverm.c1 = info.double_byte.begin()->first; + accel->dverm.c2 = info.double_byte.begin()->second; + accel->dverm.offset = verify_u8(info.double_offset); DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); return; } - if (double_byte_ok(info) && info.outs2_single.none() - && (info.outs2.size() == 2 || info.outs2.size() == 4)) { + if (double_byte_ok(info) && info.double_cr.none() + && (info.double_byte.size() == 2 || info.double_byte.size() == 4)) { bool ok = true; - assert(!info.outs2.empty()); - u8 firstC = info.outs2.begin()->first & CASE_CLEAR; - u8 secondC = info.outs2.begin()->second & CASE_CLEAR; + assert(!info.double_byte.empty()); + u8 firstC = info.double_byte.begin()->first & CASE_CLEAR; + u8 secondC = info.double_byte.begin()->second & CASE_CLEAR; - for (const pair &p : info.outs2) { + for (const pair &p : info.double_byte) { if ((p.first & CASE_CLEAR) != firstC || (p.second & CASE_CLEAR) != secondC) { ok = false; @@ -235,18 +235,18 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, accel->accel_type = ACCEL_DVERM_NOCASE; accel->dverm.c1 = firstC; accel->dverm.c2 = secondC; - accel->dverm.offset = verify_u8(info.outs2_offset); + accel->dverm.offset = verify_u8(info.double_offset); DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); return; } u8 m1; u8 m2; - if (buildDvermMask(info.outs2, &m1, &m2)) { + if (buildDvermMask(info.double_byte, &m1, &m2)) { accel->accel_type = ACCEL_DVERM_MASKED; - accel->dverm.offset = verify_u8(info.outs2_offset); - accel->dverm.c1 = info.outs2.begin()->first & m1; - accel->dverm.c2 = info.outs2.begin()->second & m2; + accel->dverm.offset = verify_u8(info.double_offset); + accel->dverm.c1 = info.double_byte.begin()->first & m1; + accel->dverm.c2 = info.double_byte.begin()->second & m2; accel->dverm.m1 = m1; accel->dverm.m2 = m2; DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", @@ -256,52 +256,52 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, } if (double_byte_ok(info) - && shuftiBuildDoubleMasks(info.outs2_single, info.outs2, + && shuftiBuildDoubleMasks(info.double_cr, info.double_byte, &accel->dshufti.lo1, &accel->dshufti.hi1, &accel->dshufti.lo2, &accel->dshufti.hi2)) { accel->accel_type = ACCEL_DSHUFTI; - accel->dshufti.offset = verify_u8(info.outs2_offset); + accel->dshufti.offset = verify_u8(info.double_offset); DEBUG_PRINTF("state %hu is double shufti\n", this_idx); return; } - if (info.outs.none()) { + if (info.cr.none()) { accel->accel_type = ACCEL_RED_TAPE; DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" " from which there is no escape\n", this_idx); return; } - if (info.outs.count() == 1) { + if (info.cr.count() == 1) { accel->accel_type = ACCEL_VERM; - accel->verm.c = info.outs.find_first(); + accel->verm.c = info.cr.find_first(); DEBUG_PRINTF("state %hu is vermicelli\n", this_idx); return; } - if (info.outs.count() == 2 && info.outs.isCaselessChar()) { + if (info.cr.count() == 2 && info.cr.isCaselessChar()) { accel->accel_type = ACCEL_VERM_NOCASE; - accel->verm.c = info.outs.find_first() & CASE_CLEAR; + accel->verm.c = info.cr.find_first() & CASE_CLEAR; DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx); return; } - if (info.outs.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) { + if (info.cr.count() > ACCEL_DFA_MAX_FLOATING_STOP_CHAR) { accel->accel_type = ACCEL_NONE; DEBUG_PRINTF("state %hu is too broad\n", this_idx); return; } accel->accel_type = ACCEL_SHUFTI; - if (-1 != shuftiBuildMasks(info.outs, &accel->shufti.lo, + if (-1 != shuftiBuildMasks(info.cr, &accel->shufti.lo, &accel->shufti.hi)) { DEBUG_PRINTF("state %hu is shufti\n", this_idx); return; } - assert(!info.outs.none()); + assert(!info.cr.none()); accel->accel_type = ACCEL_TRUFFLE; - truffleBuildMasks(info.outs, &accel->truffle.mask1, &accel->truffle.mask2); + truffleBuildMasks(info.cr, &accel->truffle.mask1, &accel->truffle.mask2); DEBUG_PRINTF("state %hu is truffle\n", this_idx); } @@ -486,7 +486,7 @@ void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, } static -void fillAccelOut(const map &accel_escape_info, +void fillAccelOut(const map &accel_escape_info, set *accel_states) { for (dstate_id_t i : accel_escape_info | map_keys) { accel_states->insert(i); @@ -581,7 +581,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, unique_ptr ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - map accel_escape_info + map accel_escape_info = populateAccelerationInfo(info.raw, info.strat, cc.grey); size_t tran_size = (1 << info.getAlphaShift()) @@ -748,7 +748,7 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, static void allocateFSN8(dfa_info &info, - const map &accel_escape_info, + const map &accel_escape_info, u16 *accel_limit, u16 *accept_limit) { info.states[0].impl_id = 0; /* dead is always 0 */ @@ -801,7 +801,7 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, unique_ptr ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - map accel_escape_info + map accel_escape_info = populateAccelerationInfo(info.raw, info.strat, cc.grey); size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index 8dcc161b..781e262d 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -31,6 +31,7 @@ #include "rdfa.h" #include "ue2common.h" +#include "util/accel_scheme.h" #include "util/alloc.h" #include "util/charreach.h" #include "util/ue2_containers.h" @@ -54,15 +55,6 @@ struct raw_report_info { std::vector &ro /* out */) const = 0; }; -struct escape_info { - CharReach outs; - CharReach outs2_single; - flat_set> outs2; - bool outs2_broken = false; - u32 offset = 0; - u32 outs2_offset = 0; -}; - class dfa_build_strat { public: virtual ~dfa_build_strat(); @@ -72,9 +64,9 @@ public: std::vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const = 0; - virtual escape_info find_escape_strings(dstate_id_t this_idx) const = 0; + virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const = 0; virtual size_t accelSize(void) const = 0; - virtual void buildAccel(dstate_id_t this_idx, const escape_info &info, + virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) = 0; }; @@ -87,9 +79,9 @@ public: std::vector &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const override; - escape_info find_escape_strings(dstate_id_t this_idx) const override; + AccelScheme find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override; - void buildAccel(dstate_id_t this_idx,const escape_info &info, + void buildAccel(dstate_id_t this_idx,const AccelScheme &info, void *accel_out) override; virtual u32 max_allowed_offset_accel() const; diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp index 3e73d31d..432c035d 100644 --- a/src/nfa/mcclellancompile_accel.cpp +++ b/src/nfa/mcclellancompile_accel.cpp @@ -178,25 +178,14 @@ vector > generate_paths(const raw_dfa &rdfa, dstate_id_t base, } static -escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, +AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, u32 max_allowed_accel_offset) { DEBUG_PRINTF("looking for accel for %hu\n", base); vector > paths = generate_paths(rdfa, base, max_allowed_accel_offset + 1); AccelScheme as = findBestAccelScheme(paths, CharReach(), true); - escape_info rv; - rv.offset = as.offset; - rv.outs = as.cr; - if (!as.double_byte.empty()) { - rv.outs2_single = as.double_cr; - rv.outs2 = as.double_byte; - rv.outs2_offset = as.double_offset; - rv.outs2_broken = false; - } else { - rv.outs2_broken = true; - } DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); - return rv; + return as; } static @@ -214,18 +203,18 @@ vector find_nonexit_symbols(const raw_dfa &rdfa, static set find_region(const raw_dfa &rdfa, dstate_id_t base, - const escape_info &ei) { + const AccelScheme &ei) { DEBUG_PRINTF("looking for region around %hu\n", base); set region = {base}; - if (!ei.outs2_broken) { + if (!ei.double_byte.empty()) { return region; } - DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.outs).c_str(), ei.offset); + DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.cr).c_str(), ei.offset); - const CharReach &escape = ei.outs; + const CharReach &escape = ei.cr; auto nonexit_symbols = find_nonexit_symbols(rdfa, escape); vector pending = {base}; @@ -248,16 +237,16 @@ set find_region(const raw_dfa &rdfa, dstate_id_t base, } static -bool better(const escape_info &a, const escape_info &b) { - if (!a.outs2_broken && b.outs2_broken) { +bool better(const AccelScheme &a, const AccelScheme &b) { + if (!a.double_byte.empty() && b.double_byte.empty()) { return true; } - if (!b.outs2_broken) { + if (!b.double_byte.empty()) { return false; } - return a.outs.count() < b.outs.count(); + return a.cr.count() < b.cr.count(); } static @@ -271,10 +260,10 @@ vector reverse_alpha_remapping(const raw_dfa &rdfa) { return rv; } -map populateAccelerationInfo(const raw_dfa &rdfa, +map populateAccelerationInfo(const raw_dfa &rdfa, const dfa_build_strat &strat, const Grey &grey) { - map rv; + map rv; if (!grey.accelerateDFA) { return rv; } @@ -283,7 +272,7 @@ map populateAccelerationInfo(const raw_dfa &rdfa, DEBUG_PRINTF("sds %hu\n", sds_proxy); for (size_t i = 0; i < rdfa.states.size(); i++) { - escape_info ei = strat.find_escape_strings(i); + AccelScheme ei = strat.find_escape_strings(i); if (i == DEAD_STATE) { continue; @@ -301,25 +290,25 @@ map populateAccelerationInfo(const raw_dfa &rdfa, : ACCEL_DFA_MAX_STOP_CHAR; DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); - if (ei.outs.count() > single_limit) { + if (ei.cr.count() > single_limit) { DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, - ei.outs.count()); + ei.cr.count()); continue; } DEBUG_PRINTF("state %zu should be accelerable %zu\n", - i, ei.outs.count()); + i, ei.cr.count()); rv[i] = ei; } /* provide accleration states to states in the region of sds */ if (contains(rv, sds_proxy)) { - escape_info sds_ei = rv[sds_proxy]; - sds_ei.outs2_broken = true; /* region based on single byte scheme + AccelScheme sds_ei = rv[sds_proxy]; + sds_ei.double_byte.clear(); /* region based on single byte scheme * may differ from double byte */ DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n", - sds_ei.outs.count()); + sds_ei.cr.count()); auto sds_region = find_region(rdfa, sds_proxy, sds_ei); for (auto s : sds_region) { if (!contains(rv, s) || better(sds_ei, rv[s])) { @@ -332,18 +321,20 @@ map populateAccelerationInfo(const raw_dfa &rdfa, } static -bool double_byte_ok(const escape_info &info) { - return !info.outs2_broken - && info.outs2_single.count() < info.outs2.size() - && info.outs2_single.count() <= 2 && !info.outs2.empty(); +bool double_byte_ok(const AccelScheme &info) { + return !info.double_byte.empty() + && info.double_cr.count() < info.double_byte.size() + && info.double_cr.count() <= 2 && !info.double_byte.empty(); } -escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, - dstate_id_t this_idx, +AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx, u32 max_allowed_accel_offset) { - escape_info rv; + AccelScheme rv; + rv.cr.clear(); + rv.offset = 0; const dstate &raw = rdfa.states[this_idx]; const vector rev_map = reverse_alpha_remapping(rdfa); + bool outs2_broken = false; for (u32 i = 0; i < rev_map.size(); i++) { if (raw.next[i] == this_idx) { @@ -352,17 +343,17 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, const CharReach &cr_i = rev_map.at(i); - rv.outs |= cr_i; + rv.cr |= cr_i; DEBUG_PRINTF("next is %hu\n", raw.next[i]); const dstate &raw_next = rdfa.states[raw.next[i]]; if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { DEBUG_PRINTF("leads to report\n"); - rv.outs2_broken = true; /* cannot accelerate over reports */ + outs2_broken = true; /* cannot accelerate over reports */ } - if (rv.outs2_broken) { + if (outs2_broken) { continue; } @@ -378,35 +369,39 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, } if (cr_i.count() * cr_all_j.count() > 8) { - DEBUG_PRINTF("adding sym %u to outs2_single\n", i); - rv.outs2_single |= cr_i; + DEBUG_PRINTF("adding sym %u to double_cr\n", i); + rv.double_cr |= cr_i; } else { for (auto ii = cr_i.find_first(); ii != CharReach::npos; ii = cr_i.find_next(ii)) { for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; jj = cr_all_j.find_next(jj)) { - rv.outs2.emplace((u8)ii, (u8)jj); + rv.double_byte.emplace((u8)ii, (u8)jj); } } } - if (rv.outs2.size() > 8) { + if (rv.double_byte.size() > 8) { DEBUG_PRINTF("outs2 too big\n"); - rv.outs2_broken = true; + outs2_broken = true; } } + if (outs2_broken) { + rv.double_byte.clear(); + } + DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); - DEBUG_PRINTF("broken %d\n", rv.outs2_broken); + DEBUG_PRINTF("broken %d\n", outs2_broken); if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) && this_idx == rdfa.start_floating && this_idx != DEAD_STATE) { DEBUG_PRINTF("looking for offset accel at %u\n", this_idx); auto offset = look_for_offset_accel(rdfa, this_idx, max_allowed_accel_offset); - DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(), - rv.outs.count()); - if (double_byte_ok(offset) || offset.outs.count() < rv.outs.count()) { + DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), + rv.cr.count()); + if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) { DEBUG_PRINTF("using offset accel\n"); rv = offset; } diff --git a/src/nfa/mcclellancompile_accel.h b/src/nfa/mcclellancompile_accel.h index aa1672b0..427267d7 100644 --- a/src/nfa/mcclellancompile_accel.h +++ b/src/nfa/mcclellancompile_accel.h @@ -48,11 +48,11 @@ struct Grey; * than normal states as accelerating sds is important. Matches NFA value */ #define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 -std::map populateAccelerationInfo(const raw_dfa &rdfa, +std::map populateAccelerationInfo(const raw_dfa &rdfa, const dfa_build_strat &strat, const Grey &grey); -escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, +AccelScheme find_mcclellan_escape_info(const raw_dfa &rdfa, dstate_id_t this_idx, u32 max_allowed_accel_offset); diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index c5f4e4bc..cb3d1210 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -37,6 +37,7 @@ #include "ng_misc_opt.h" #include "ue2common.h" #include "nfa/accelcompile.h" +#include "util/accel_scheme.h" #include "util/charreach.h" #include "util/order_check.h" #include "util/ue2_containers.h" @@ -47,7 +48,6 @@ namespace ue2 { /* compile time accel defs */ -#define MAX_ACCEL_DEPTH 4 #define MAX_MERGED_ACCEL_STOPS 200 #define ACCEL_MAX_STOP_CHAR 24 #define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */ @@ -65,14 +65,6 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, #define DOUBLE_SHUFTI_LIMIT 20 -struct AccelScheme { - ue2::flat_set > double_byte; - CharReach cr = CharReach::dot(); - CharReach double_cr; - u32 offset = MAX_ACCEL_DEPTH + 1; - u32 double_offset = 0; -}; - NFAVertex get_sds_or_proxy(const NGHolder &g); AccelScheme nfaFindAccel(const NGHolder &g, const std::vector &verts, diff --git a/src/util/accel_scheme.h b/src/util/accel_scheme.h new file mode 100644 index 00000000..f524fe93 --- /dev/null +++ b/src/util/accel_scheme.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ACCEL_SCHEME_H +#define ACCEL_SCHEME_H + +#include "util/charreach.h" +#include "util/ue2_containers.h" + +#include + +namespace ue2 { + +#define MAX_ACCEL_DEPTH 4 + +struct AccelScheme { + flat_set > double_byte; + CharReach cr = CharReach::dot(); + CharReach double_cr; + u32 offset = MAX_ACCEL_DEPTH + 1; + u32 double_offset = 0; +}; + +} + +#endif