diff --git a/src/nfa/accelcompile.cpp b/src/nfa/accelcompile.cpp index 5739618a..6f3b6e8a 100644 --- a/src/nfa/accelcompile.cpp +++ b/src/nfa/accelcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -94,7 +94,6 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs); } -static bool isCaselessDouble(const flat_set> &stop) { // test for vector containing if (stop.size() != 4) { diff --git a/src/nfa/accelcompile.h b/src/nfa/accelcompile.h index 7bf7fe5d..d479a545 100644 --- a/src/nfa/accelcompile.h +++ b/src/nfa/accelcompile.h @@ -56,6 +56,8 @@ struct MultibyteAccelInfo { multiaccel_type type = MAT_NONE; }; +bool isCaselessDouble(const flat_set> &stop); + struct AccelInfo { AccelInfo() : single_offset(0U), double_offset(0U), single_stops(CharReach::dot()), diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 7fa01d8a..5d51feb9 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -566,12 +566,29 @@ bool containsBadSubset(const limex_accel_info &accel, } static -void doAccelCommon(NGHolder &g, - ue2::unordered_map &accel_map, - const ue2::unordered_map &state_ids, - const map &br_cyclic, - const u32 num_states, limex_accel_info *accel, - const CompileContext &cc) { +bool is_too_wide(const AccelScheme &as) { + return as.cr.count() > MAX_MERGED_ACCEL_STOPS; +} + +static +void fillAccelInfo(build_info &bi) { + if (!bi.do_accel) { + return; + } + + NGHolder &g = bi.h; + limex_accel_info &accel = bi.accel; + unordered_map &accel_map = accel.accel_map; + const map &br_cyclic = bi.br_cyclic; + const CompileContext &cc = bi.cc; + const unordered_map &state_ids = bi.state_ids; + const u32 num_states = bi.num_states; + + nfaFindAccelSchemes(g, br_cyclic, &accel_map); + filterAccelStates(g, bi.tops, &accel_map); + + assert(accel_map.size() <= NFA_MAX_ACCEL_STATES); + vector refined_cr = reduced_cr(g, br_cyclic); vector astates; @@ -602,7 +619,7 @@ void doAccelCommon(NGHolder &g, } } - if (containsBadSubset(*accel, state_set, effective_sds)) { + if (containsBadSubset(accel, state_set, effective_sds)) { DEBUG_PRINTF("accel %u has bad subset\n", i); continue; /* if a subset failed to build we would too */ } @@ -610,19 +627,20 @@ void doAccelCommon(NGHolder &g, const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy); AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic, - allow_wide); - if (as.cr.count() > MAX_MERGED_ACCEL_STOPS) { + allow_wide, true); + if (is_too_wide(as)) { DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i, as.cr.count(), MAX_MERGED_ACCEL_STOPS); continue; } - DEBUG_PRINTF("accel %u ok with offset %u\n", i, as.offset); + DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset, + as.double_offset); // try multibyte acceleration first MultibyteAccelInfo mai = nfaCheckMultiAccel(g, states, cc); - precalcAccel &pa = accel->precalc[state_set]; + precalcAccel &pa = accel.precalc[state_set]; useful |= state_set; // if we successfully built a multibyte accel scheme, use that @@ -635,17 +653,11 @@ void doAccelCommon(NGHolder &g, pa.single_offset = as.offset; pa.single_cr = as.cr; - - if (states.size() == 1) { - DoubleAccelInfo b = findBestDoubleAccelInfo(g, states.front()); - if (pa.single_cr.count() > b.stop1.count()) { - /* insert this information into the precalc accel info as it is - * better than the single scheme */ - pa.double_offset = b.offset; - pa.double_lits = b.stop2; - pa.double_cr = b.stop1; - } - } + if (as.double_byte.size() != 0) { + pa.double_offset = as.double_offset; + pa.double_lits = as.double_byte; + pa.double_cr = as.double_cr; + }; } for (const auto &m : accel_map) { @@ -663,33 +675,21 @@ void doAccelCommon(NGHolder &g, state_set.set(state_id); bool is_multi = false; - auto p_it = accel->precalc.find(state_set); - if (p_it != accel->precalc.end()) { + auto p_it = accel.precalc.find(state_set); + if (p_it != accel.precalc.end()) { const precalcAccel &pa = p_it->second; offset = max(pa.double_offset, pa.single_offset); is_multi = pa.ma_info.type != MultibyteAccelInfo::MAT_NONE; assert(offset <= MAX_ACCEL_DEPTH); } - accel->accelerable.insert(v); - if (!is_multi) - findAccelFriends(g, v, br_cyclic, offset, &accel->friends[v]); + accel.accelerable.insert(v); + if (!is_multi) { + findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); + } } } -static -void fillAccelInfo(build_info &bi) { - if (!bi.do_accel) { - return; - } - - nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map); - filterAccelStates(bi.h, bi.tops, &bi.accel.accel_map); - assert(bi.accel.accel_map.size() <= NFA_MAX_ACCEL_STATES); - doAccelCommon(bi.h, bi.accel.accel_map, bi.state_ids, bi.br_cyclic, - bi.num_states, &bi.accel, bi.cc); -} - /** The AccelAux structure has large alignment specified, and this makes some * compilers do odd things unless we specify a custom allocator. */ typedef vector > diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 9b21b8c4..279f454e 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -201,7 +201,8 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, void *accel_out) { AccelAux *accel = (AccelAux *)accel_out; - DEBUG_PRINTF("accelerations scheme has offset %u\n", info.offset); + DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, + info.outs2_offset); accel->generic.offset = verify_u8(info.offset); if (double_byte_ok(info) && info.outs2_single.none() @@ -209,6 +210,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, accel->accel_type = ACCEL_DVERM; accel->dverm.c1 = info.outs2.begin()->first; accel->dverm.c2 = info.outs2.begin()->second; + accel->dverm.offset = verify_u8(info.outs2_offset); DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); return; } @@ -233,6 +235,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, accel->accel_type = ACCEL_DVERM_NOCASE; accel->dverm.c1 = firstC; accel->dverm.c2 = secondC; + accel->dverm.offset = verify_u8(info.outs2_offset); DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); return; } @@ -240,6 +243,7 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx, if (double_byte_ok(info)) { accel->accel_type = ACCEL_DSHUFTI; + accel->dshufti.offset = verify_u8(info.outs2_offset); shuftiBuildDoubleMasks(info.outs2_single, info.outs2, &accel->dshufti.lo1, &accel->dshufti.hi1, diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index d4b4325d..8dcc161b 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -60,6 +60,7 @@ struct escape_info { flat_set> outs2; bool outs2_broken = false; u32 offset = 0; + u32 outs2_offset = 0; }; class dfa_build_strat { diff --git a/src/nfa/mcclellancompile_accel.cpp b/src/nfa/mcclellancompile_accel.cpp index 67f4a39e..471d0d53 100644 --- a/src/nfa/mcclellancompile_accel.cpp +++ b/src/nfa/mcclellancompile_accel.cpp @@ -183,11 +183,18 @@ escape_info look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, DEBUG_PRINTF("looking for accel for %hu\n", base); vector > paths = generate_paths(rdfa, base, max_allowed_accel_offset + 1); - AccelScheme as = findBestAccelScheme(paths, CharReach()); + AccelScheme as = findBestAccelScheme(paths, CharReach(), true); escape_info rv; - rv.outs2_broken = true; rv.offset = as.offset; rv.outs = as.cr; + if (!as.double_byte.empty()) { + rv.outs2_single = as.double_cr; + rv.outs2 = as.double_byte; + rv.outs2_offset = as.double_offset; + rv.outs2_broken = false; + } else { + rv.outs2_broken = true; + } DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); return rv; } @@ -308,10 +315,15 @@ map populateAccelerationInfo(const raw_dfa &rdfa, /* provide accleration states to states in the region of sds */ if (contains(rv, sds_proxy)) { - auto sds_region = find_region(rdfa, sds_proxy, rv[sds_proxy]); + escape_info sds_ei = rv[sds_proxy]; + sds_ei.outs2_broken = true; /* region based on single byte scheme + * may differ from double byte */ + DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n", + sds_ei.outs.count()); + auto sds_region = find_region(rdfa, sds_proxy, sds_ei); for (auto s : sds_region) { - if (!contains(rv, s) || better(rv[sds_proxy], rv[s])) { - rv[s] = rv[sds_proxy]; + if (!contains(rv, s) || better(sds_ei, rv[s])) { + rv[s] = sds_ei; } } } @@ -395,7 +407,7 @@ escape_info find_mcclellan_escape_info(const raw_dfa &rdfa, max_allowed_accel_offset); DEBUG_PRINTF("width %zu vs %zu\n", offset.outs.count(), rv.outs.count()); - if (offset.outs.count() < rv.outs.count()) { + if (double_byte_ok(offset) || offset.outs.count() < rv.outs.count()) { DEBUG_PRINTF("using offset accel\n"); rv = offset; } diff --git a/src/nfagraph/ng_limex_accel.cpp b/src/nfagraph/ng_limex_accel.cpp index 41eda35d..63ec546b 100644 --- a/src/nfagraph/ng_limex_accel.cpp +++ b/src/nfagraph/ng_limex_accel.cpp @@ -132,199 +132,6 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, } } -static -void buildTwoByteStops(flat_set> &twobyte, const CharReach &cr1, - const CharReach &cr2) { - for (size_t c1 = cr1.find_first(); c1 != cr1.npos; c1 = cr1.find_next(c1)) { - for (size_t c2 = cr2.find_first(); c2 != cr2.npos; - c2 = cr2.find_next(c2)) { - twobyte.emplace((u8)c1, (u8)c2); - } - } -} - -static -void findStopLiteralsAtVertex(NFAVertex v, const NGHolder &g, - DoubleAccelInfo &build) { - DEBUG_PRINTF("state %u\n", g[v].index); - - // double-byte accel is possible: calculate all single- and double-byte - // accel literals. - const CharReach &cr1 = g[v].char_reach; - - if (edge(v, g.accept, g).second) { - // If this first byte is an accept state, it must contribute a - // single-byte escape. We can still go on and calculate additional - // double-byte ones, though. - /* TODO: fix for rose */ - build.stop1 |= cr1; - } - - flat_set> twobyte; // for just this starting state - bool single = false; - - for (auto w : adjacent_vertices_range(v, g)) { - if (w == g.accept || w == g.acceptEod) { - continue; - } - const CharReach &cr2 = g[w].char_reach; - size_t count = cr1.count() * cr2.count() + build.stop2.size(); - if (count > 0 && count <= 8) { // can't do more than 8 two-byte - buildTwoByteStops(twobyte, cr1, cr2); - } else { - // two many two-byte literals, add the first byte as single - single = true; - break; - } - } - - if (single || twobyte.empty()) { - assert(!cr1.none()); - build.stop1 |= cr1; - } else { - assert(!twobyte.empty()); - build.stop2.insert(twobyte.begin(), twobyte.end()); - } -} - -static -bool is_bit5_insensitive(const flat_set> &stop) { - if (stop.size() != 4) { - return false; - } - - const u8 a = stop.begin()->first & CASE_CLEAR; - const u8 b = stop.begin()->second & CASE_CLEAR; - - for (flat_set>::const_iterator it = stop.begin(); - it != stop.end(); ++it) { - if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) { - return false; - } - } - - return true; -} - -static -bool is_dverm(const DoubleAccelInfo &a) { - if (a.stop1.any()) { - return false; - } - - if (a.stop2.size() == 1) { - return true; - } - - return is_bit5_insensitive(a.stop2); -} - -static -bool is_double_better(const DoubleAccelInfo &a, const DoubleAccelInfo &b) { - /* Note: this is not an operator< */ - - if (a.stop2.empty()) { - return false; - } - - if (b.stop2.empty()) { - return true; - } - - if (a.stop1.count() > b.stop1.count()) { - return false; - } - - if (a.stop1.count() < b.stop1.count()) { - return true; - } - - bool a_dvm = is_dverm(a); - bool b_dvm = is_dverm(b); - - if (b_dvm && !a_dvm) { - return false; - } - - if (!b_dvm && a_dvm) { - return true; - } - - if (a.stop2.size() > b.stop2.size()) { - return false; - } - - if (a.stop2.size() < b.stop2.size()) { - return true; - } - - return a.offset < b.offset; -} - -/** \brief Find the escape literals for a two byte accel at the given accel - * offset */ -static -void findDoubleAccel(const NGHolder &g, NFAVertex v, u32 accel_offset, - DoubleAccelInfo &build) { - DEBUG_PRINTF("find double accel +%u for vertex %u\n", accel_offset, - g[v].index); - build.offset = accel_offset; - - // Our accel state contributes single-byte escapes - build.stop1 |= ~g[v].char_reach; - - flat_set searchStates; // states that contribute stop literals - searchStates.insert(v); /* TODO: verify */ - - /* Note: We cannot search past an accepting state */ - /* TODO: remove restriction for non-callback generating */ - flat_set nextStates; - - insert(&nextStates, adjacent_vertices(v, g)); - nextStates.erase(v); - nextStates.erase(g.accept); - nextStates.erase(g.acceptEod); - - searchStates.swap(nextStates); - nextStates.clear(); - - // subsequent iterations are simpler, just follow all edges - for (u32 j = 1; j <= accel_offset; j++) { - for (auto u : searchStates) { - insert(&nextStates, adjacent_vertices(u, g)); - if (edge(u, g.accept, g).second) { - nextStates.clear(); - break; - } - nextStates.erase(g.accept); - nextStates.erase(g.acceptEod); - } - - searchStates.swap(nextStates); - nextStates.clear(); - } - - vector sorted; - insert(&sorted, sorted.end(), searchStates); - sort(sorted.begin(), sorted.end(), make_index_ordering(g)); - for (auto sv : sorted) { - findStopLiteralsAtVertex(sv, g, build); - } -} - -DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v) { - DoubleAccelInfo rv; - for (u32 offset = 0; offset <= MAX_ACCEL_DEPTH; offset++) { - DoubleAccelInfo b_temp; - findDoubleAccel(g, v, offset, b_temp); - if (is_double_better(b_temp, rv)) { - rv = b_temp; - } - } - - return rv; -} - static void findPaths(const NGHolder &g, NFAVertex v, const vector &refined_cr, @@ -384,8 +191,13 @@ void findPaths(const NGHolder &g, NFAVertex v, } static -AccelScheme merge(const AccelScheme &a, const AccelScheme &b) { - return AccelScheme(a.cr | b.cr, MAX(a.offset, b.offset)); +AccelScheme merge(AccelScheme a, const AccelScheme &b) { + a.cr |= b.cr; + ENSURE_AT_LEAST(&a.offset, b.offset); + a.double_cr |= b.double_cr; + insert(&a.double_byte, b.double_byte); + ENSURE_AT_LEAST(&a.double_offset, b.double_offset); + return a; } static @@ -445,8 +257,106 @@ void findBest(vector >::const_iterator pb, } } -#ifdef DEBUG +static +AccelScheme make_double_accel(AccelScheme as, CharReach cr_1, + const CharReach &cr_2_in, u32 offset_in) { + cr_1 &= ~as.double_cr; + CharReach cr_2 = cr_2_in & ~as.double_cr; + u32 offset = offset_in; + if (cr_1.none()) { + DEBUG_PRINTF("empty first element\n"); + as.double_offset = offset; + return as; + } + + if (cr_2_in != cr_2 || cr_2.none()) { + offset = offset_in + 1; + } + + size_t two_count = cr_1.count() * cr_2.count(); + + DEBUG_PRINTF("will generate raw %zu pairs\n", two_count); + + if (!two_count) { + DEBUG_PRINTF("empty element\n"); + as.double_offset = offset; + return as; + } + + if (two_count > 8) { + if (cr_2.count() < cr_1.count()) { + as.double_cr |= cr_2; + offset = offset_in + 1; + } else { + as.double_cr |= cr_1; + } + } else { + for (auto i = cr_1.find_first(); i != CharReach::npos; + i = cr_1.find_next(i)) { + for (auto j = cr_2.find_first(); j != CharReach::npos; + j = cr_2.find_next(j)) { + as.double_byte.insert(make_pair(i, j)); + } + } + } + + as.double_offset = offset; + DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n", + as.double_byte.size(), as.double_cr.count(), as.offset); + return as; +} + +static +void findDoubleBest(vector >::const_iterator pb, + vector >::const_iterator pe, + const AccelScheme &curr, AccelScheme *best) { + assert(curr.offset <= MAX_ACCEL_DEPTH); + DEBUG_PRINTF("paths left %zu\n", pe - pb); + if (pb == pe) { + *best = curr; + return; + } + + DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); + + vector priority_path; + u32 i = 0; + for (vector::const_iterator p = pb->begin(); + p != pb->end() && next(p) != pb->end(); + ++p, i++) { + priority_path.push_back(make_double_accel(curr, *p, *next(p), i)); + } + + sort(priority_path.begin(), priority_path.end()); + + DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n", + best->double_byte.size(), best->double_cr.count(), + best->offset); + + for (vector::const_iterator it = priority_path.begin(); + it != priority_path.end(); ++it) { + + AccelScheme in = merge(curr, *it); + DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", + in.double_byte.size(), in.double_cr.count(), in.offset); + + if (in > *best) { + DEBUG_PRINTF("worse\n"); + continue; + } + AccelScheme temp = *best; + findDoubleBest(pb + 1, pe, in, &temp); + if (temp < *best) { + *best = temp; + DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n", + best->double_byte.size(), best->double_cr.count(), + best->offset); + } + } +} + +#ifdef DEBUG static void dumpPaths(const vector > &paths) { for (vector >::const_iterator p = paths.begin(); @@ -526,13 +436,56 @@ void improvePaths(vector > &paths) { #endif } +#define MAX_DOUBLE_ACCEL_PATHS 10 + +static +AccelScheme findBestDoubleAccelScheme(vector > paths, + const CharReach &terminating) { + DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n", + terminating.count()); + unifyPathsLastSegment(paths); + AccelScheme curr; + curr.double_cr = terminating; + curr.offset = 0; + /* if there are too many paths, shorten the paths to reduce the number of + * distinct paths we have to consider */ + while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) { + for (auto &p : paths) { + if (p.empty()) { + return curr; + } + p.pop_back(); + } + unifyPathsLastSegment(paths); + } + + if (paths.empty()) { + return curr; + } + + AccelScheme best; + best.double_cr = CharReach::dot(); + findDoubleBest(paths.begin(), paths.end(), curr, &best); + curr = best; + DEBUG_PRINTF("da %zu pairs, %zu singles\n", curr.double_byte.size(), + curr.double_cr.count()); + return curr; +} + AccelScheme findBestAccelScheme(vector > paths, - const CharReach &terminating) { + const CharReach &terminating, + bool look_for_double_byte) { + AccelScheme da; + + if (look_for_double_byte) { + da = findBestDoubleAccelScheme(paths, terminating); + } + improvePaths(paths); DEBUG_PRINTF("we have %zu paths\n", paths.size()); if (paths.size() > 40) { - return AccelScheme(); /* too many paths to explore */ + return da; /* too many paths to explore */ } /* if we were smart we would do something netflowy on the paths to find the @@ -559,13 +512,21 @@ AccelScheme findBestAccelScheme(vector > paths, assert(offset <= best.offset); best.offset = offset; + /* merge best single and best double */ + if (!da.double_byte.empty() && da.double_byte.size() <= 8 + && da.double_cr.count() < best.cr.count()) { + best.double_byte = da.double_byte; + best.double_cr = da.double_cr; + best.double_offset = da.double_offset; + } + return best; } AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, const vector &refined_cr, const map &br_cyclic, - bool allow_wide) { + bool allow_wide, bool look_for_double_byte) { CharReach terminating; for (auto v : verts) { if (!hasSelfLoop(v, g)) { @@ -612,7 +573,8 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector &verts, reverse(it->begin(), it->end()); } - return findBestAccelScheme(std::move(paths), terminating); + return findBestAccelScheme(std::move(paths), terminating, + look_for_double_byte); } NFAVertex get_sds_or_proxy(const NGHolder &g) { @@ -903,9 +865,9 @@ depth_done: } } - // Look for one byte accel schemes verm/shufti; + // Look for offset accel schemes verm/shufti; vector verts(1, v); - *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide); + *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true); DEBUG_PRINTF("as width %zu\n", as->cr.count()); return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide; } diff --git a/src/nfagraph/ng_limex_accel.h b/src/nfagraph/ng_limex_accel.h index 113b216c..80b3f0ec 100644 --- a/src/nfagraph/ng_limex_accel.h +++ b/src/nfagraph/ng_limex_accel.h @@ -63,15 +63,6 @@ void findAccelFriends(const NGHolder &g, NFAVertex v, u32 offset, ue2::flat_set *friends); -struct DoubleAccelInfo { - DoubleAccelInfo() : offset(0) {} - u32 offset; //!< offset correction to apply - CharReach stop1; //!< single-byte accel stop literals - flat_set> stop2; //!< double-byte accel stop literals -}; - -DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v); - struct AccelScheme { AccelScheme(const CharReach &cr_in, u32 offset_in) : cr(cr_in), offset(offset_in) { @@ -84,6 +75,36 @@ struct AccelScheme { // Don't use ORDER_CHECK as it will (stupidly) eval count() too many // times. + size_t a_dcount = double_cr.count(); + size_t b_dcount = b.double_cr.count(); + + bool feasible_double_a + = !a.double_byte.empty() && a.double_byte.size() <= 8; + bool feasible_double_b + = !b.double_byte.empty() && b.double_byte.size() <= 8; + + if (feasible_double_a != feasible_double_b) { + return feasible_double_a > feasible_double_b; + } + + if (feasible_double_a) { + if (a_dcount != b_dcount) { + return a_dcount < b_dcount; + } + + if ((a.double_byte.size() == 1) != (b.double_byte.size() == 1)) { + return a.double_byte.size() < b.double_byte.size(); + } + + bool cd_a = isCaselessDouble(a.double_byte); + bool cd_b = isCaselessDouble(b.double_byte); + if (cd_a != cd_b) { + return cd_a > cd_b; + } + ORDER_CHECK(double_byte.size()); + ORDER_CHECK(double_offset); + } + const size_t a_count = cr.count(), b_count = b.cr.count(); if (a_count != b_count) { return a_count < b_count; @@ -92,6 +113,9 @@ struct AccelScheme { /* TODO: give bonus if one is a 'caseless' character */ ORDER_CHECK(offset); ORDER_CHECK(cr); + ORDER_CHECK(double_byte); + ORDER_CHECK(double_cr); + ORDER_CHECK(double_offset); return false; } @@ -99,8 +123,11 @@ struct AccelScheme { return b < *this; } + ue2::flat_set > double_byte; CharReach cr; + CharReach double_cr; u32 offset; + u32 double_offset = 0; }; NFAVertex get_sds_or_proxy(const NGHolder &g); @@ -108,12 +135,15 @@ NFAVertex get_sds_or_proxy(const NGHolder &g); AccelScheme nfaFindAccel(const NGHolder &g, const std::vector &verts, const std::vector &refined_cr, const std::map &br_cyclic, - bool allow_wide); + bool allow_wide, bool look_for_double_byte = false); AccelScheme findBestAccelScheme(std::vector > paths, - const CharReach &terminating); + const CharReach &terminating, + bool look_for_double_byte = false); -/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */ +/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). If a + * single byte accel scheme is found it is placed into *as + */ bool nfaCheckAccel(const NGHolder &g, NFAVertex v, const std::vector &refined_cr, const std::map &br_cyclic,