diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index 11ae2000..b76078f9 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -978,11 +978,6 @@ unique_ptr makeHolder(const CastleProto &proto, auto g = ue2::make_unique(proto.kind); for (const auto &m : proto.repeats) { - if (m.first >= NFA_MAX_TOP_MASKS) { - DEBUG_PRINTF("top %u too big for an NFA\n", m.first); - return nullptr; - } - addToHolder(*g, m.first, m.second); } diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 53a003e3..2c164090 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -41,7 +41,6 @@ #include "nfagraph/ng_holder.h" #include "nfagraph/ng_limex_accel.h" #include "nfagraph/ng_repeat.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_squash.h" #include "nfagraph/ng_util.h" #include "ue2common.h" @@ -74,6 +73,12 @@ using boost::adaptors::map_values; namespace ue2 { +/** + * \brief Special state index value meaning that the vertex will not + * participate in an (NFA/DFA/etc) implementation. + */ +static constexpr u32 NO_STATE = ~0; + namespace { struct precalcAccel { @@ -91,7 +96,7 @@ struct precalcAccel { struct limex_accel_info { ue2::unordered_set accelerable; map precalc; - ue2::unordered_map > friends; + ue2::unordered_map> friends; ue2::unordered_map accel_map; }; @@ -134,7 +139,7 @@ struct build_info { const vector &ri, const map &rsmi, const map &smi, - const map &ti, const set &zi, + const map> &ti, const set &zi, bool dai, bool sci, const CompileContext &cci, u32 nsi) : h(hi), state_ids(states_in), repeats(ri), tops(ti), zombies(zi), @@ -160,7 +165,7 @@ struct build_info { map reportSquashMap; map squashMap; - const map &tops; + const map> &tops; ue2::unordered_set tugs; map br_cyclic; const set &zombies; @@ -522,20 +527,25 @@ struct fas_visitor : public boost::default_bfs_visitor { }; static -void filterAccelStates(NGHolder &g, const map &tops, +void filterAccelStates(NGHolder &g, const map> &tops, ue2::unordered_map *accel_map) { /* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything * else should be ditched. We use a simple BFS to choose accel states near * the start. */ - // Temporarily wire start to each top for the BFS. - vector topEdges; - wireStartToTops(g, tops, topEdges); + vector tempEdges; + for (const auto &vv : tops | map_values) { + for (NFAVertex v : vv) { + if (!edge(g.start, v, g).second) { + tempEdges.push_back(add_edge(g.start, v, g).first); + } + } + } // Similarly, connect (start, startDs) if necessary. if (!edge(g.start, g.startDs, g).second) { auto e = add_edge(g.start, g.startDs, g).first; - topEdges.push_back(e); // Remove edge later. + tempEdges.push_back(e); // Remove edge later. } ue2::unordered_map out; @@ -551,7 +561,7 @@ void filterAccelStates(NGHolder &g, const map &tops, ; /* found max accel_states */ } - remove_edges(topEdges, g); + remove_edges(tempEdges, g); assert(out.size() <= NFA_MAX_ACCEL_STATES); accel_map->swap(out); @@ -705,7 +715,7 @@ void fillAccelInfo(build_info &bi) { /** The AccelAux structure has large alignment specified, and this makes some * compilers do odd things unless we specify a custom allocator. */ -typedef vector > +typedef vector> AccelAuxVector; #define IMPOSSIBLE_ACCEL_MASK (~0U) @@ -1122,19 +1132,20 @@ void buildTopMasks(const build_info &args, vector &topMasks) { u32 numMasks = args.tops.rbegin()->first + 1; // max mask index DEBUG_PRINTF("we have %u top masks\n", numMasks); - assert(numMasks <= NFA_MAX_TOP_MASKS); topMasks.assign(numMasks, NFAStateSet(args.num_states)); // all zeroes for (const auto &m : args.tops) { u32 mask_idx = m.first; - u32 state_id = args.state_ids.at(m.second); - DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx); + for (NFAVertex v : m.second) { + u32 state_id = args.state_ids.at(v); + DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx); - assert(mask_idx < numMasks); - assert(state_id != NO_STATE); + assert(mask_idx < numMasks); + assert(state_id != NO_STATE); - topMasks[mask_idx].set(state_id); + topMasks[mask_idx].set(state_id); + } } } @@ -2123,7 +2134,7 @@ struct Factory { u32 maxShift = findMaxVarShift(args, shiftCount); findExceptionalTransitions(args, exceptional, maxShift); - map > exceptionMap; + map> exceptionMap; vector reportList; u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional, @@ -2315,13 +2326,13 @@ MAKE_LIMEX_TRAITS(512) #ifndef NDEBUG // Some sanity tests, called by an assertion in generate(). static UNUSED -bool isSane(const NGHolder &h, const map &tops, +bool isSane(const NGHolder &h, const map> &tops, const ue2::unordered_map &state_ids, u32 num_states) { ue2::unordered_set seen; ue2::unordered_set top_starts; - for (const auto &m : tops) { - top_starts.insert(m.second); + for (const auto &vv : tops | map_values) { + insert(&top_starts, vv); } for (auto v : vertices_range(h)) { @@ -2385,7 +2396,7 @@ aligned_unique_ptr generate(NGHolder &h, const vector &repeats, const map &reportSquashMap, const map &squashMap, - const map &tops, + const map> &tops, const set &zombies, bool do_accel, bool stateCompression, @@ -2457,7 +2468,7 @@ u32 countAccelStates(NGHolder &h, const vector &repeats, const map &reportSquashMap, const map &squashMap, - const map &tops, + const map> &tops, const set &zombies, const CompileContext &cc) { const u32 num_states = max_state(states) + 1; diff --git a/src/nfa/limex_compile.h b/src/nfa/limex_compile.h index 62a07e10..21cb7608 100644 --- a/src/nfa/limex_compile.h +++ b/src/nfa/limex_compile.h @@ -71,7 +71,7 @@ aligned_unique_ptr generate(NGHolder &g, const std::vector &repeats, const std::map &reportSquashMap, const std::map &squashMap, - const std::map &tops, + const std::map> &tops, const std::set &zombies, bool do_accel, bool stateCompression, @@ -89,7 +89,7 @@ u32 countAccelStates(NGHolder &h, const std::vector &repeats, const std::map &reportSquashMap, const std::map &squashMap, - const std::map &tops, + const std::map> &tops, const std::set &zombies, const CompileContext &cc); diff --git a/src/nfa/limex_limits.h b/src/nfa/limex_limits.h index 9b35b115..f4df54a4 100644 --- a/src/nfa/limex_limits.h +++ b/src/nfa/limex_limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,6 +31,5 @@ #define NFA_MAX_STATES 512 /**< max states in an NFA */ #define NFA_MAX_ACCEL_STATES 8 /**< max accel states in a NFA */ -#define NFA_MAX_TOP_MASKS 32 /**< max number of MQE_TOP_N event types */ #endif diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index e70b7708..143dca16 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -35,7 +35,6 @@ #include "nfa/goughcompile.h" #include "ng_holder.h" #include "ng_mcclellan_internal.h" -#include "ng_restructuring.h" #include "ng_som_util.h" #include "ng_squash.h" #include "ng_util.h" @@ -118,11 +117,11 @@ public: using StateMap = typename Automaton_Traits::StateMap; protected: - Automaton_Base(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, + Automaton_Base(const NGHolder &graph_in, som_type som, const vector> &triggers, bool unordered_som) - : graph(graph_in), numStates(num_vertices(graph)), unused(unused_in), + : graph(graph_in), numStates(num_vertices(graph)), + unused(getRedundantStarts(graph_in)), init(Automaton_Traits::init_states(numStates)), initDS(Automaton_Traits::init_states(numStates)), squash(Automaton_Traits::init_states(numStates)), @@ -210,7 +209,7 @@ public: const NGHolder &graph; const u32 numStates; - const flat_set &unused; + const flat_set unused; array alpha; array unalpha; @@ -251,10 +250,9 @@ struct Big_Traits { class Automaton_Big : public Automaton_Base { public: - Automaton_Big(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, + Automaton_Big(const NGHolder &graph_in, som_type som, const vector> &triggers, bool unordered_som) - : Automaton_Base(graph_in, unused_in, som, triggers, unordered_som) {} + : Automaton_Base(graph_in, som, triggers, unordered_som) {} }; struct Graph_Traits { @@ -278,11 +276,10 @@ struct Graph_Traits { class Automaton_Graph : public Automaton_Base { public: - Automaton_Graph(const NGHolder &graph_in, - const flat_set &unused_in, som_type som, + Automaton_Graph(const NGHolder &graph_in, som_type som, const vector> &triggers, bool unordered_som) - : Automaton_Base(graph_in, unused_in, som, triggers, unordered_som) {} + : Automaton_Base(graph_in, som, triggers, unordered_som) {} }; class Automaton_Haig_Merge { @@ -512,15 +509,14 @@ void haig_note_starts(const NGHolder &g, map *out) { template static -bool doHaig(const NGHolder &g, - const flat_set &unused, - som_type som, const vector> &triggers, - bool unordered_som, raw_som_dfa *rdfa) { +bool doHaig(const NGHolder &g, som_type som, + const vector> &triggers, bool unordered_som, + raw_som_dfa *rdfa) { u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from a fight */ typedef typename Auto::StateSet StateSet; vector nfa_state_map; - Auto n(g, unused, som, triggers, unordered_som); + Auto n(g, som, triggers, unordered_som); try { if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) { DEBUG_PRINTF("state limit exceeded\n"); @@ -550,9 +546,9 @@ bool doHaig(const NGHolder &g, haig_do_preds(g, source_states, n.v_by_index, rdfa->state_som.back().preds); - haig_do_report(g, unused, g.accept, source_states, n.v_by_index, + haig_do_report(g, n.unused, g.accept, source_states, n.v_by_index, rdfa->state_som.back().reports); - haig_do_report(g, unused, g.acceptEod, source_states, n.v_by_index, + haig_do_report(g, n.unused, g.acceptEod, source_states, n.v_by_index, rdfa->state_som.back().reports_eod); } @@ -577,8 +573,6 @@ attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, assert(allMatchStatesHaveReports(g)); assert(hasCorrectlyNumberedVertices(g)); - auto unused = findUnusedStates(g); - u32 numStates = num_vertices(g); if (numStates > HAIG_MAX_NFA_STATE) { DEBUG_PRINTF("giving up... looks too big\n"); @@ -592,12 +586,11 @@ attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, bool rv; if (numStates <= NFA_STATE_LIMIT) { /* fast path */ - rv = doHaig(g, unused, som, triggers, unordered_som, + rv = doHaig(g, som, triggers, unordered_som, rdfa.get()); } else { /* not the fast path */ - rv = doHaig(g, unused, som, triggers, unordered_som, - rdfa.get()); + rv = doHaig(g, som, triggers, unordered_som, rdfa.get()); } if (!rv) { diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index c6e4c24e..66494c77 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -54,10 +54,15 @@ #include "util/ue2_containers.h" #include "util/verify_types.h" +#include #include #include +#include + using namespace std; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; namespace ue2 { @@ -146,78 +151,310 @@ void dropRedundantStartEdges(NGHolder &g) { } static -void makeTopStates(NGHolder &g, map &tops, - const map &top_reach) { - /* TODO: more intelligent creation of top states */ - map> top_succs; - for (const auto &e : out_edges_range(g.start, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - for (u32 t : g[e].tops) { - top_succs[t].push_back(v); - } - } - - for (const auto &top : top_succs) { - u32 t = top.first; - - CharReach top_cr; +CharReach calcTopVertexReach(const flat_set &tops, + const map &top_reach) { + CharReach top_cr; + for (u32 t : tops) { if (contains(top_reach, t)) { - top_cr = top_reach.at(t); + top_cr |= top_reach.at(t); } else { top_cr = CharReach::dot(); - } - - assert(!contains(tops, t)); - - NFAVertex s = NGHolder::null_vertex(); - flat_set succs; - insert(&succs, top.second); - - for (auto v : top.second) { - if (!top_cr.isSubsetOf(g[v].char_reach)) { - continue; - } - - flat_set vsuccs; - insert(&vsuccs, adjacent_vertices(v, g)); - - if (succs != vsuccs) { - continue; - } - - if (g[v].reports != g[g.start].reports) { - continue; - } - s = v; break; } + } + return top_cr; +} - if (!s) { - s = add_vertex(g[g.start], g); - g[s].char_reach = top_cr; - for (auto v : top.second) { - add_edge(s, v, g); +static +NFAVertex makeTopStartVertex(NGHolder &g, const flat_set &tops, + const flat_set &succs, + const map &top_reach) { + assert(!succs.empty()); + assert(!tops.empty()); + + bool reporter = false; + + NFAVertex u = add_vertex(g[g.start], g); + CharReach top_cr = calcTopVertexReach(tops, top_reach); + g[u].char_reach = top_cr; + for (auto v : succs) { + if (v == g.accept || v == g.acceptEod) { + reporter = true; + } + add_edge(u, v, g); + } + + // Only retain reports (which we copied on add_vertex above) for new top + // vertices connected to accepts. + if (!reporter) { + g[u].reports.clear(); + } + + return u; +} + +static +void pickNextTopStateToHandle(const map> &top_succs, + const map> &succ_tops, + flat_set *picked_tops, + flat_set *picked_succs) { + /* pick top or vertex we want to handle */ + if (top_succs.size() < succ_tops.size()) { + auto best = top_succs.end(); + for (auto it = top_succs.begin(); it != top_succs.end(); ++it) { + if (best == top_succs.end() + || it->second.size() < best->second.size()) { + best = it; } } - tops[t] = s; + assert(best != top_succs.end()); + assert(!best->second.empty()); /* should already been pruned */ + + *picked_tops = { best->first }; + *picked_succs = best->second; + } else { + auto best = succ_tops.end(); + for (auto it = succ_tops.begin(); it != succ_tops.end(); ++it) { + /* have to worry about determinism for this one */ + if (best == succ_tops.end() + || it->second.size() < best->second.size() + || (it->second.size() == best->second.size() + && it->second < best->second)) { + best = it; + } + } + assert(best != succ_tops.end()); + assert(!best->second.empty()); /* should already been pruned */ + + *picked_succs = { best->first }; + *picked_tops = best->second; } +} + +static +void expandCbsByTops(const map> &unhandled_top_succs, + const map> &top_succs, + const map> &succ_tops, + flat_set &picked_tops, + flat_set &picked_succs) { + NFAVertex v = *picked_succs.begin(); /* arbitrary successor - all equiv */ + const auto &cand_tops = succ_tops.at(v); + + for (u32 t : cand_tops) { + if (!contains(unhandled_top_succs, t)) { + continue; + } + if (!has_intersection(unhandled_top_succs.at(t), picked_succs)) { + continue; /* not adding any useful work that hasn't already been + * done */ + } + if (!is_subset_of(picked_succs, top_succs.at(t))) { + continue; /* will not form a cbs */ + } + picked_tops.insert(t); + } +} + +static +void expandCbsBySuccs(const map> &unhandled_succ_tops, + const map> &top_succs, + const map> &succ_tops, + flat_set &picked_tops, + flat_set &picked_succs) { + u32 t = *picked_tops.begin(); /* arbitrary top - all equiv */ + const auto &cand_succs = top_succs.at(t); + + for (NFAVertex v : cand_succs) { + if (!contains(unhandled_succ_tops, v)) { + continue; + } + if (!has_intersection(unhandled_succ_tops.at(v), picked_tops)) { + continue; /* not adding any useful work that hasn't already been + * done */ + } + if (!is_subset_of(picked_tops, succ_tops.at(v))) { + continue; /* will not form a cbs */ + } + picked_succs.insert(v); + } +} + +/* See if we can expand the complete bipartite subgraph (cbs) specified by the + * picked tops/succs by adding more to either of the tops or succs. + */ +static +void expandTopSuccCbs(const map> &top_succs, + const map> &succ_tops, + const map> &unhandled_top_succs, + const map> &unhandled_succ_tops, + flat_set &picked_tops, + flat_set &picked_succs) { + /* Note: all picked (tops|succs) are equivalent */ + + /* Try to expand first (as we are more likely to succeed) on the side + * with fewest remaining things to be handled */ + + if (unhandled_top_succs.size() < unhandled_succ_tops.size()) { + expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, + picked_tops, picked_succs); + expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, + picked_tops, picked_succs); + } else { + expandCbsBySuccs(unhandled_succ_tops, top_succs, succ_tops, + picked_tops, picked_succs); + expandCbsByTops(unhandled_top_succs, top_succs, succ_tops, + picked_tops, picked_succs); + } +} + +static +void markTopSuccAsHandled(NFAVertex start_v, + const flat_set &handled_tops, + const flat_set &handled_succs, + map> &tops_out, + map> &unhandled_top_succs, + map> &unhandled_succ_tops) { + for (u32 t : handled_tops) { + tops_out[t].insert(start_v); + assert(contains(unhandled_top_succs, t)); + erase_all(&unhandled_top_succs[t], handled_succs); + if (unhandled_top_succs[t].empty()) { + unhandled_top_succs.erase(t); + } + } + + for (NFAVertex v : handled_succs) { + assert(contains(unhandled_succ_tops, v)); + erase_all(&unhandled_succ_tops[v], handled_tops); + if (unhandled_succ_tops[v].empty()) { + unhandled_succ_tops.erase(v); + } + } +} + +static +void attemptToUseAsStart(const NGHolder &g, NFAVertex u, + const map &top_reach, + map> &unhandled_top_succs, + map> &unhandled_succ_tops, + map> &tops_out) { + flat_set top_inter = unhandled_succ_tops.at(u); + flat_set succs; + for (NFAVertex v : adjacent_vertices_range(u, g)) { + if (!contains(unhandled_succ_tops, v)) { + return; + } + const flat_set &v_tops = unhandled_succ_tops.at(v); + flat_set new_inter; + auto ni_inserter = inserter(new_inter, new_inter.end()); + set_intersection(top_inter.begin(), top_inter.end(), + v_tops.begin(), v_tops.end(), ni_inserter); + top_inter = move(new_inter); + succs.insert(v); + } + + if (top_inter.empty()) { + return; + } + + auto top_cr = calcTopVertexReach(top_inter, top_reach); + if (!top_cr.isSubsetOf(g[u].char_reach)) { + return; + } + + markTopSuccAsHandled(u, top_inter, succs, tops_out, unhandled_top_succs, + unhandled_succ_tops); +} + +/* We may have cases where a top triggers something that starts with a .* (or + * similar state). In these cases we can make use of that state as a start + * state. + */ +static +void reusePredsAsStarts(const NGHolder &g, const map &top_reach, + map> &unhandled_top_succs, + map> &unhandled_succ_tops, + map> &tops_out) { + /* create list of candidates first, to avoid issues of iter invalidation + * and determinism */ + vector cand_starts; + for (NFAVertex u : unhandled_succ_tops | map_keys) { + if (hasSelfLoop(u, g)) { + cand_starts.push_back(u); + } + } + sort(cand_starts.begin(), cand_starts.end(), make_index_ordering(g)); + + for (NFAVertex u : cand_starts) { + if (!contains(unhandled_succ_tops, u)) { + continue; + } + attemptToUseAsStart(g, u, top_reach, unhandled_top_succs, + unhandled_succ_tops, tops_out); + } +} + +static +void makeTopStates(NGHolder &g, map> &tops_out, + const map &top_reach) { + /* Ideally, we want to add the smallest number of states to the graph for + * tops to turn on so that they can accurately trigger their successors. + * + * The relationships between tops and their successors forms a bipartite + * graph. Finding the optimal number of start states to add is equivalent to + * finding a minimal biclique coverings. Unfortunately, this is known to be + * NP-complete. + * + * Given this, we will just do something simple to avoid creating something + * truly wasteful: + * 1) Try to find any cyclic states which can act as their own start states + * 2) Pick a top or a succ to create a start state for and then try to find + * the largest complete bipartite subgraph that it is part of. + */ + + map> top_succs; + map> succ_tops; + for (const auto &e : out_edges_range(g.start, g)) { + NFAVertex v = target(e, g); + for (u32 t : g[e].tops) { + top_succs[t].insert(v); + succ_tops[v].insert(t); + } + } + + auto unhandled_top_succs = top_succs; + auto unhandled_succ_tops = succ_tops; + + reusePredsAsStarts(g, top_reach, unhandled_top_succs, unhandled_succ_tops, + tops_out); + + /* Note: there may be successors which are equivalent (in terms of + top-triggering), it may be more efficient to discover this and treat them + as a unit. TODO */ + + while (!unhandled_succ_tops.empty()) { + assert(!unhandled_top_succs.empty()); + flat_set u_tops; + flat_set u_succs; + pickNextTopStateToHandle(unhandled_top_succs, unhandled_succ_tops, + &u_tops, &u_succs); + + expandTopSuccCbs(top_succs, succ_tops, unhandled_top_succs, + unhandled_succ_tops, u_tops, u_succs); + + /* create start vertex to handle this top/succ combination */ + NFAVertex u = makeTopStartVertex(g, u_tops, u_succs, top_reach); + + /* update maps */ + markTopSuccAsHandled(u, u_tops, u_succs, tops_out, unhandled_top_succs, + unhandled_succ_tops); + } + assert(unhandled_top_succs.empty()); // We are completely replacing the start vertex, so clear its reports. clear_out_edges(g.start, g); add_edge(g.start, g.startDs, g); g[g.start].reports.clear(); - - // Only retain reports (which we copied on add_vertex above) for new top - // vertices connected to accepts. - for (const auto &m : tops) { - NFAVertex v = m.second; - if (!edge(v, g.accept, g).second && !edge(v, g.acceptEod, g).second) { - g[v].reports.clear(); - } - } } static @@ -325,7 +562,8 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, const map>> &triggers, bool impl_test_only, const CompileContext &cc, ue2::unordered_map &state_ids, - vector &repeats, map &tops) { + vector &repeats, + map> &tops) { assert(is_triggered(h_in) || fixed_depth_tops.empty()); unique_ptr h = cloneHolder(h_in); @@ -335,15 +573,19 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, impl_test_only, cc.grey); // If we're building a rose/suffix, do the top dance. + flat_set topVerts; if (is_triggered(*h)) { makeTopStates(*h, tops, findTopReach(triggers)); + + for (const auto &vv : tops | map_values) { + insert(&topVerts, vv); + } } dropRedundantStartEdges(*h); // Do state numbering - state_ids = numberStates(*h, tops); - dropUnusedStarts(*h, state_ids); + state_ids = numberStates(*h, topVerts); // In debugging, we sometimes like to reverse the state numbering to stress // the NFA construction code. @@ -389,14 +631,14 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, ue2::unordered_map state_ids; vector repeats; - map tops; + map> tops; unique_ptr h = prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); // Quick exit: if we've got an embarrassment of riches, i.e. more states // than we can implement in our largest NFA model, bail here. - u32 numStates = countStates(*h, state_ids, false); + u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); return nullptr; @@ -469,13 +711,11 @@ aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */ // Do state numbering. - auto state_ids = numberStates(h); - - dropUnusedStarts(h, state_ids); + auto state_ids = numberStates(h, {}); // Quick exit: if we've got an embarrassment of riches, i.e. more states // than we can implement in our largest NFA model, bail here. - u32 numStates = countStates(h, state_ids, false); + u32 numStates = countStates(state_ids); if (numStates > NFA_MAX_STATES) { DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates); return nullptr; @@ -483,7 +723,7 @@ aligned_unique_ptr constructReversedNFA_i(const NGHolder &h_in, u32 hint, assert(sanityCheckGraph(h, state_ids)); - map tops; /* only the standards tops for nfas */ + map> tops; /* only the standards tops for nfas */ set zombies; vector repeats; map reportSquashMap; @@ -518,7 +758,7 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, // Quick check: we can always implement an NFA with less than NFA_MAX_STATES // states. Note that top masks can generate extra states, so we account for // those here too. - if (num_vertices(g) + NFA_MAX_TOP_MASKS < NFA_MAX_STATES) { + if (num_vertices(g) + getTops(g).size() < NFA_MAX_STATES) { return true; } @@ -539,12 +779,12 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm, ue2::unordered_map state_ids; vector repeats; - map tops; + map> tops; unique_ptr h = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); assert(h); - u32 numStates = countStates(*h, state_ids, false); + u32 numStates = countStates(state_ids); if (numStates <= NFA_MAX_STATES) { return numStates; } @@ -586,12 +826,12 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm, ue2::unordered_map state_ids; vector repeats; - map tops; + map> tops; unique_ptr h = prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc, state_ids, repeats, tops); - if (!h || countStates(*h, state_ids, false) > NFA_MAX_STATES) { + if (!h || countStates(state_ids) > NFA_MAX_STATES) { DEBUG_PRINTF("not constructible\n"); return NFA_MAX_ACCEL_STATES + 1; } diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 39788570..71c9a05e 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -36,7 +36,6 @@ #include "nfa/rdfa.h" #include "ng_holder.h" #include "ng_mcclellan_internal.h" -#include "ng_restructuring.h" #include "ng_squash.h" #include "ng_util.h" #include "ue2common.h" @@ -348,10 +347,11 @@ public: using StateMap = typename Automaton_Traits::StateMap; Automaton_Base(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, + bool single_trigger, const vector> &triggers, bool prunable_in) : rm(rm_in), graph(graph_in), numStates(num_vertices(graph)), - unused(unused_in), init(Automaton_Traits::init_states(numStates)), + unused(getRedundantStarts(graph_in)), + init(Automaton_Traits::init_states(numStates)), initDS(Automaton_Traits::init_states(numStates)), squash(Automaton_Traits::init_states(numStates)), accept(Automaton_Traits::init_states(numStates)), @@ -444,7 +444,7 @@ private: public: const NGHolder &graph; u32 numStates; - const flat_set &unused; + const flat_set unused; vector v_by_index; vector cr_by_index; /* pre alpha'ed */ StateSet init; @@ -482,9 +482,9 @@ struct Big_Traits { class Automaton_Big : public Automaton_Base { public: Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, + bool single_trigger, const vector> &triggers, bool prunable_in) - : Automaton_Base(rm_in, graph_in, unused_in, single_trigger, triggers, + : Automaton_Base(rm_in, graph_in, single_trigger, triggers, prunable_in) {} }; @@ -510,14 +510,36 @@ struct Graph_Traits { class Automaton_Graph : public Automaton_Base { public: Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, - const flat_set &unused_in, bool single_trigger, - const vector> &triggers, bool prunable_in) - : Automaton_Base(rm_in, graph_in, unused_in, single_trigger, triggers, + bool single_trigger, + const vector> &triggers, bool prunable_in) + : Automaton_Base(rm_in, graph_in, single_trigger, triggers, prunable_in) {} }; } // namespace +static +bool startIsRedundant(const NGHolder &g) { + set start; + set startDs; + + insert(&start, adjacent_vertices(g.start, g)); + insert(&startDs, adjacent_vertices(g.startDs, g)); + + return start == startDs; +} + +flat_set getRedundantStarts(const NGHolder &g) { + flat_set dead; + if (startIsRedundant(g)) { + dead.insert(g.start); + } + if (proper_out_degree(g.startDs, g) == 0) { + dead.insert(g.startDs); + } + return dead; +} + unique_ptr buildMcClellan(const NGHolder &graph, const ReportManager *rm, bool single_trigger, const vector> &triggers, @@ -526,8 +548,6 @@ unique_ptr buildMcClellan(const NGHolder &graph, return nullptr; } - auto unused = findUnusedStates(graph); - DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind); assert(allMatchStatesHaveReports(graph)); @@ -553,8 +573,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, if (numStates <= NFA_STATE_LIMIT) { /* Fast path. Automaton_Graph uses a bitfield internally to represent * states and is quicker than Automaton_Big. */ - Automaton_Graph n(rm, graph, unused, single_trigger, triggers, - prunable); + Automaton_Graph n(rm, graph, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ @@ -566,7 +585,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, rdfa->alpha_remap = n.alpha; } else { /* Slow path. Too many states to use Automaton_Graph. */ - Automaton_Big n(rm, graph, unused, single_trigger, triggers, prunable); + Automaton_Big n(rm, graph, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ diff --git a/src/nfagraph/ng_mcclellan_internal.h b/src/nfagraph/ng_mcclellan_internal.h index 22fcf01e..b78dac3b 100644 --- a/src/nfagraph/ng_mcclellan_internal.h +++ b/src/nfagraph/ng_mcclellan_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,7 +36,6 @@ #include "ue2common.h" #include "nfa/mcclellancompile.h" #include "nfagraph/ng_holder.h" -#include "nfagraph/ng_restructuring.h" // for NO_STATE #include "util/charreach.h" #include "util/graph_range.h" #include "util/ue2_containers.h" @@ -69,6 +68,13 @@ void markToppableStarts(const NGHolder &g, const flat_set &unused, const std::vector> &triggers, boost::dynamic_bitset<> *out); +/** + * \brief Returns a set of start vertices that will not participate in an + * implementation of this graph. These are either starts with no successors or + * starts which are redundant with startDs. + */ +flat_set getRedundantStarts(const NGHolder &g); + template void transition_graph(autom &nfa, const std::vector &vByStateId, const typename autom::StateSet &in, diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index c85860c7..46990330 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -49,37 +49,71 @@ namespace ue2 { /** Connect the start vertex to each of the vertices in \p tops. This is useful * temporarily for when we need to run a graph algorithm that expects a single * source vertex. */ -void wireStartToTops(NGHolder &g, const map &tops, - vector &topEdges) { - for (const auto &top : tops) { - NFAVertex v = top.second; +static +void wireStartToTops(NGHolder &g, const flat_set &tops, + vector &tempEdges) { + for (NFAVertex v : tops) { assert(!isLeafNode(v, g)); const NFAEdge &e = add_edge(g.start, v, g).first; - topEdges.push_back(e); + tempEdges.push_back(e); } } +/** + * Returns true if start's successors (aside from startDs) are subset of + * startDs's proper successors or if start has no successors other than startDs. + */ static -void getStateOrdering(NGHolder &g, const map &tops, +bool startIsRedundant(const NGHolder &g) { + /* We ignore startDs as the self-loop may have been stripped as an + * optimisation for repeats (improveLeadingRepeats()). */ + set start; + insert(&start, adjacent_vertices_range(g.start, g)); + start.erase(g.startDs); + + // Trivial case: start has no successors other than startDs. + if (start.empty()) { + DEBUG_PRINTF("start has no out-edges other than to startDs\n"); + return true; + } + + set startDs; + insert(&startDs, adjacent_vertices_range(g.startDs, g)); + startDs.erase(g.startDs); + + if (!is_subset_of(start, startDs)) { + DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n"); + return false; + } + + return true; +} + +static +void getStateOrdering(NGHolder &g, const flat_set &tops, vector &ordering) { // First, wire up our "tops" to start so that we have a single source, // which will give a nicer topo order. - vector topEdges; - wireStartToTops(g, tops, topEdges); + vector tempEdges; + wireStartToTops(g, tops, tempEdges); renumberGraphVertices(g); vector temp = getTopoOrdering(g); - remove_edges(topEdges, g); + remove_edges(tempEdges, g); // Move {start, startDs} to the end, so they'll be first when we reverse - // the ordering. + // the ordering (if they are required). temp.erase(remove(temp.begin(), temp.end(), g.startDs)); temp.erase(remove(temp.begin(), temp.end(), g.start)); - temp.push_back(g.startDs); - temp.push_back(g.start); + if (proper_out_degree(g.startDs, g)) { + temp.push_back(g.startDs); + } + if (!startIsRedundant(g)) { + temp.push_back(g.start); + } // Walk ordering, remove vertices that shouldn't be participating in state // numbering, such as accepts. @@ -149,16 +183,15 @@ void optimiseTightLoops(const NGHolder &g, vector &ordering) { continue; } - DEBUG_PRINTF("moving vertex %u next to %u\n", - g[v].index, g[u].index); + DEBUG_PRINTF("moving vertex %u next to %u\n", g[v].index, g[u].index); ordering.erase(v_it); ordering.insert(++u_it, v); } } -ue2::unordered_map -numberStates(NGHolder &h, const map &tops) { +unordered_map +numberStates(NGHolder &h, const flat_set &tops) { DEBUG_PRINTF("numbering states for holder %p\n", &h); vector ordering; @@ -166,15 +199,10 @@ numberStates(NGHolder &h, const map &tops) { optimiseTightLoops(h, ordering); - ue2::unordered_map states = getStateIndices(h, ordering); - - return states; + return getStateIndices(h, ordering); } -u32 countStates(const NGHolder &g, - const ue2::unordered_map &state_ids, - bool addTops) { - /* TODO: smarter top state allocation, move to limex? */ +u32 countStates(const unordered_map &state_ids) { if (state_ids.empty()) { return 0; } @@ -185,168 +213,9 @@ u32 countStates(const NGHolder &g, max_state = max(m.second, max_state); } } - u32 num_states = max_state + 1; - assert(contains(state_ids, g.start)); - if (addTops && is_triggered(g) && state_ids.at(g.start) != NO_STATE) { - num_states--; - set tops; - for (auto e : out_edges_range(g.start, g)) { - insert(&tops, g[e].tops); - } - num_states += tops.size(); - } - return num_states; } -/** - * Returns true if start leads to all of startDs's proper successors or if - * start has no successors other than startDs. - */ -static -bool startIsRedundant(const NGHolder &g) { - set start, startDs; - - for (const auto &e : out_edges_range(g.start, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - start.insert(v); - } - - for (const auto &e : out_edges_range(g.startDs, g)) { - NFAVertex v = target(e, g); - if (v == g.startDs) { - continue; - } - startDs.insert(v); - } - - // Trivial case: start has no successors other than startDs. - if (start.empty()) { - DEBUG_PRINTF("start has no out-edges other than to startDs\n"); - return true; - } - - if (start != startDs) { - DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n"); - return false; - } - - return true; -} - -/** One final, FINAL optimisation. Drop either start or startDs if it's unused - * in this graph. We leave this until this late because having both vertices in - * the graph, with fixed state indices, is useful for merging and other - * analyses. */ -void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states) { - u32 adj = 0; - - if (startIsRedundant(g)) { - DEBUG_PRINTF("dropping unused start\n"); - states[g.start] = NO_STATE; - adj++; - } - - if (proper_out_degree(g.startDs, g) == 0) { - DEBUG_PRINTF("dropping unused startDs\n"); - states[g.startDs] = NO_STATE; - adj++; - } - - if (!adj) { - DEBUG_PRINTF("both start and startDs must remain\n"); - return; - } - - // We have removed one or both of the starts. Walk the non-special vertices - // in the graph with state indices assigned to them and subtract - // adj from all of them. - for (auto v : vertices_range(g)) { - u32 &state = states[v]; // note ref - if (state == NO_STATE) { - continue; - } - if (is_any_start(v, g)) { - assert(state <= 1); - state = 0; // one start remains - } else { - assert(!is_special(v, g)); - assert(state >= adj); - state -= adj; - } - } -} - -flat_set findUnusedStates(const NGHolder &g) { - flat_set dead; - if (startIsRedundant(g)) { - dead.insert(g.start); - } - if (proper_out_degree(g.startDs, g) == 0) { - dead.insert(g.startDs); - } - return dead; -} - -/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to - * accepts. */ -void reverseHolder(const NGHolder &g_in, NGHolder &g) { - // Make the BGL do the grunt work. - ue2::unordered_map vertexMap; - boost::transpose_graph(g_in.g, g.g, - orig_to_copy(boost::make_assoc_property_map(vertexMap)). - vertex_index_map(get(&NFAGraphVertexProps::index, g_in.g))); - - // The transpose_graph operation will have created extra copies of our - // specials. We have to rewire their neighbours to the 'real' specials and - // delete them. - NFAVertex start = vertexMap[g_in.acceptEod]; - NFAVertex startDs = vertexMap[g_in.accept]; - NFAVertex accept = vertexMap[g_in.startDs]; - NFAVertex acceptEod = vertexMap[g_in.start]; - - // Successors of starts. - for (const auto &e : out_edges_range(start, g)) { - NFAVertex v = target(e, g); - add_edge(g.start, v, g[e], g); - } - for (const auto &e : out_edges_range(startDs, g)) { - NFAVertex v = target(e, g); - add_edge(g.startDs, v, g[e], g); - } - - // Predecessors of accepts. - for (const auto &e : in_edges_range(accept, g)) { - NFAVertex u = source(e, g); - add_edge(u, g.accept, g[e], g); - } - for (const auto &e : in_edges_range(acceptEod, g)) { - NFAVertex u = source(e, g); - add_edge(u, g.acceptEod, g[e], g); - } - - // Remove our impostors. - clear_vertex(start, g); - remove_vertex(start, g); - clear_vertex(startDs, g); - remove_vertex(startDs, g); - clear_vertex(accept, g); - remove_vertex(accept, g); - clear_vertex(acceptEod, g); - remove_vertex(acceptEod, g); - - // Renumber so that g's properties (number of vertices, edges) are - // accurate. - g.renumberVertices(); - g.renumberEdges(); - - assert(num_vertices(g) == num_vertices(g_in)); - assert(num_edges(g) == num_edges(g_in)); -} - } // namespace ue2 diff --git a/src/nfagraph/ng_restructuring.h b/src/nfagraph/ng_restructuring.h index 5e244bf6..bbd478d5 100644 --- a/src/nfagraph/ng_restructuring.h +++ b/src/nfagraph/ng_restructuring.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,23 +37,8 @@ #include "ue2common.h" #include "util/ue2_containers.h" -#include -#include - namespace ue2 { -class NGHolder; - -/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to - * accepts. */ -void reverseHolder(const NGHolder &g, NGHolder &out); - -/** Connect the start vertex to each of the vertices in \p tops. This is useful - * temporarily for when we need to run a graph algorithm that expects a single - * source vertex. */ -void wireStartToTops(NGHolder &g, const std::map &tops, - std::vector &topEdges); - /** * \brief Special state index value meaning that the vertex will not * participate in an (NFA/DFA/etc) implementation. @@ -63,30 +48,14 @@ static constexpr u32 NO_STATE = ~0; /** * \brief Gives each participating vertex in the graph a unique state index. */ -ue2::unordered_map -numberStates(NGHolder &h, - const std::map &tops = std::map{}); +unordered_map +numberStates(NGHolder &h, const flat_set &tops); /** * \brief Counts the number of states (vertices with state indices) in the * graph. - * - * If addTops is true, also accounts for states that will be constructed for - * each unique top. */ -u32 countStates(const NGHolder &g, - const ue2::unordered_map &state_ids, - bool addTops = true); - -/** Optimisation: drop unnecessary start states. */ -void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states); - -/** - * \brief Returns a set of vertices that will not participate in an - * implementation (NFA, DFA etc) of this graph. For example, starts with no - * successors. - */ -flat_set findUnusedStates(const NGHolder &g); +u32 countStates(const unordered_map &state_ids); } // namespace ue2 diff --git a/src/nfagraph/ng_uncalc_components.cpp b/src/nfagraph/ng_uncalc_components.cpp index fd6dfc3e..3326d6f4 100644 --- a/src/nfagraph/ng_uncalc_components.cpp +++ b/src/nfagraph/ng_uncalc_components.cpp @@ -39,7 +39,6 @@ #include "ng_limex.h" #include "ng_redundancy.h" #include "ng_region.h" -#include "ng_restructuring.h" #include "ng_uncalc_components.h" #include "ng_util.h" #include "ue2common.h" @@ -55,42 +54,52 @@ #include #include +#include + using namespace std; +using boost::adaptors::map_values; namespace ue2 { static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */ /** Sentinel value meaning no component has yet been selected. */ -static const u32 NO_COMPONENT = 0xffffffffu; +static const u32 NO_COMPONENT = ~0U; -static -vector getSortedVA(const NGHolder &g, - const ue2::unordered_map &state_ids) { - vector out; - out.reserve(num_vertices(g)); +static const u32 UNUSED_STATE = ~0U; - for (auto v : vertices_range(g)) { - assert(contains(state_ids, v)); - if (state_ids.at(v) == NO_STATE) { - continue; +namespace { +struct ranking_info { + explicit ranking_info(const NGHolder &h) : to_vertex(getTopoOrdering(h)) { + u32 rank = 0; + + reverse(to_vertex.begin(), to_vertex.end()); + + for (NFAVertex v : to_vertex) { + to_rank[v] = rank++; + } + + for (NFAVertex v : vertices_range(h)) { + if (!contains(to_rank, v)) { + to_rank[v] = UNUSED_STATE; + } } - out.push_back(v); } - // Order vertices by their state indices. - sort(begin(out), end(out), [&state_ids](NFAVertex a, NFAVertex b) { - return state_ids.at(a) < state_ids.at(b); - }); - -#ifndef NDEBUG - // State indices should match vector indices. - for (u32 i = 0; i < out.size(); i++) { - assert(state_ids.at(out.at(i)) == i); + NFAVertex at(u32 ranking) const { return to_vertex.at(ranking); } + u32 get(NFAVertex v) const { return to_rank.at(v); } + u32 size() const { return (u32)to_vertex.size(); } + u32 add_to_tail(NFAVertex v) { + u32 rank = size(); + to_rank[v] = rank; + to_vertex.push_back(v); + return rank; } -#endif - return out; +private: + vector to_vertex; + unordered_map to_rank; +}; } static never_inline @@ -122,9 +131,9 @@ bool cplVerticesMatch(const NGHolder &ga, NFAVertex va, } static never_inline -u32 cplCommonReachAndSimple(const NGHolder &ga, const vector &a, - const NGHolder &gb, const vector &b) { - u32 ml = min(a.size(), b.size()); +u32 cplCommonReachAndSimple(const NGHolder &ga, const ranking_info &a_ranking, + const NGHolder &gb, const ranking_info &b_ranking) { + u32 ml = min(a_ranking.size(), b_ranking.size()); if (ml > 65535) { ml = 65535; } @@ -133,7 +142,7 @@ u32 cplCommonReachAndSimple(const NGHolder &ga, const vector &a, // "startedness" properties. u32 max = 0; for (; max < ml; max++) { - if (!cplVerticesMatch(ga, a[max], gb, b[max])) { + if (!cplVerticesMatch(ga, a_ranking.at(max), gb, b_ranking.at(max))) { break; } } @@ -141,34 +150,30 @@ u32 cplCommonReachAndSimple(const NGHolder &ga, const vector &a, return max; } -u32 commonPrefixLength(const NGHolder &ga, - const ue2::unordered_map &a_state_ids, - const NGHolder &gb, - const ue2::unordered_map &b_state_ids) { - vector a = getSortedVA(ga, a_state_ids); - vector b = getSortedVA(gb, b_state_ids); - +static +u32 commonPrefixLength(const NGHolder &ga, const ranking_info &a_ranking, + const NGHolder &gb, const ranking_info &b_ranking) { /* upper bound on the common region based on local properties */ - u32 max = cplCommonReachAndSimple(ga, a, gb, b); + u32 max = cplCommonReachAndSimple(ga, a_ranking, gb, b_ranking); DEBUG_PRINTF("cpl upper bound %u\n", max); while (max > 0) { - bool ok = true; - /* shrink max region based on in-edges from outside the region */ for (size_t j = max; j > 0; j--) { - for (auto u : inv_adjacent_vertices_range(a[j - 1], ga)) { - u32 state_id = a_state_ids.at(u); - if (state_id != NO_STATE && state_id >= max) { + NFAVertex a_v = a_ranking.at(j - 1); + NFAVertex b_v = b_ranking.at(j - 1); + for (auto u : inv_adjacent_vertices_range(a_v, ga)) { + u32 state_id = a_ranking.get(u); + if (state_id != UNUSED_STATE && state_id >= max) { max = j - 1; DEBUG_PRINTF("lowering max to %u\n", max); goto next_vertex; } } - for (auto u : inv_adjacent_vertices_range(b[j - 1], gb)) { - u32 state_id = b_state_ids.at(u); - if (state_id != NO_STATE && state_id >= max) { + for (auto u : inv_adjacent_vertices_range(b_v, gb)) { + u32 state_id = b_ranking.get(u); + if (state_id != UNUSED_STATE && state_id >= max) { max = j - 1; DEBUG_PRINTF("lowering max to %u\n", max); goto next_vertex; @@ -180,14 +185,13 @@ u32 commonPrefixLength(const NGHolder &ga, /* Ensure that every pair of vertices has same out-edges to vertices in the region. */ - for (size_t i = 0; ok && i < max; i++) { + for (size_t i = 0; i < max; i++) { size_t a_count = 0; size_t b_count = 0; - NGHolder::out_edge_iterator ei, ee; - for (tie(ei, ee) = out_edges(a[i], ga); ok && ei != ee; ++ei) { - u32 sid = a_state_ids.at(target(*ei, ga)); - if (sid == NO_STATE || sid >= max) { + for (NFAEdge a_edge : out_edges_range(a_ranking.at(i), ga)) { + u32 sid = a_ranking.get(target(a_edge, ga)); + if (sid == UNUSED_STATE || sid >= max) { continue; } @@ -195,28 +199,26 @@ u32 commonPrefixLength(const NGHolder &ga, NFAEdge b_edge; bool has_b_edge; - tie(b_edge, has_b_edge) = edge(b[i], b[sid], gb); + tie(b_edge, has_b_edge) = edge(b_ranking.at(i), + b_ranking.at(sid), gb); if (!has_b_edge) { max = i; - ok = false; DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n", max, i, sid); - break; + goto try_smaller; } - if (ga[*ei].tops != gb[b_edge].tops) { + if (ga[a_edge].tops != gb[b_edge].tops) { max = i; - ok = false; DEBUG_PRINTF("tops don't match on edge %zu->%u\n", i, sid); + goto try_smaller; } } - NGHolder::adjacency_iterator ai, ae; - for (tie(ai, ae) = adjacent_vertices(b[i], gb); ok && ai != ae; - ++ai) { - u32 sid = b_state_ids.at(*ai); - if (sid == NO_STATE || sid >= max) { + for (NFAVertex b_v : adjacent_vertices_range(b_ranking.at(i), gb)) { + u32 sid = b_ranking.get(b_v); + if (sid == UNUSED_STATE || sid >= max) { continue; } @@ -225,28 +227,32 @@ u32 commonPrefixLength(const NGHolder &ga, if (a_count != b_count) { max = i; - DEBUG_PRINTF("lowering max to %u due to a,b count " - "(a_count=%zu, b_count=%zu)\n", max, a_count, - b_count); - ok = false; + DEBUG_PRINTF("lowering max to %u due to a,b count (a_count=%zu," + " b_count=%zu)\n", max, a_count, b_count); + goto try_smaller; } } - if (ok) { - DEBUG_PRINTF("survived checks, returning cpl %u\n", max); - return max; - } + DEBUG_PRINTF("survived checks, returning cpl %u\n", max); + return max; + try_smaller:; } DEBUG_PRINTF("failed to find any common region\n"); return 0; } +u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb) { + return commonPrefixLength(ga, ranking_info(ga), gb, ranking_info(gb)); +} + static never_inline -void mergeNfa(NGHolder &dest, vector &destStateMap, - ue2::unordered_map &dest_state_ids, - NGHolder &vic, vector &vicStateMap, - size_t common_len) { +void mergeNfaComponent(NGHolder &dest, const NGHolder &vic, size_t common_len) { + assert(&dest != &vic); + + auto dest_info = ranking_info(dest); + auto vic_info = ranking_info(vic); + map vmap; // vic -> dest vmap[vic.start] = dest.start; @@ -255,22 +261,20 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, vmap[vic.acceptEod] = dest.acceptEod; vmap[nullptr] = nullptr; - u32 stateNum = countStates(dest, dest_state_ids); - // For vertices in the common len, add to vmap and merge in the reports, if // any. for (u32 i = 0; i < common_len; i++) { - NFAVertex v_old = vicStateMap[i], v = destStateMap[i]; + NFAVertex v_old = vic_info.at(i); + NFAVertex v = dest_info.at(i); vmap[v_old] = v; const auto &reports = vic[v_old].reports; dest[v].reports.insert(reports.begin(), reports.end()); } - // Add in vertices beyond the common len, giving them state numbers - // starting at stateNum. - for (u32 i = common_len; i < vicStateMap.size(); i++) { - NFAVertex v_old = vicStateMap[i]; + // Add in vertices beyond the common len + for (u32 i = common_len; i < vic_info.size(); i++) { + NFAVertex v_old = vic_info.at(i); if (is_special(v_old, vic)) { // Dest already has start vertices, just merge the reports. @@ -282,15 +286,17 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, } NFAVertex v = add_vertex(vic[v_old], dest); - dest_state_ids[v] = stateNum++; + dest_info.add_to_tail(v); vmap[v_old] = v; } /* add edges */ DEBUG_PRINTF("common_len=%zu\n", common_len); for (const auto &e : edges_range(vic)) { - NFAVertex u_old = source(e, vic), v_old = target(e, vic); - NFAVertex u = vmap[u_old], v = vmap[v_old]; + NFAVertex u_old = source(e, vic); + NFAVertex v_old = target(e, vic); + NFAVertex u = vmap[u_old]; + NFAVertex v = vmap[v_old]; bool uspecial = is_special(u, dest); bool vspecial = is_special(v, dest); @@ -301,15 +307,14 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, // We're in the common region if v's state ID is low enough, unless v // is a special (an accept), in which case we use u's state ID. - assert(contains(dest_state_ids, v)); - bool in_common_region = dest_state_ids.at(v) < common_len; - if (vspecial && dest_state_ids.at(u) < common_len) { + bool in_common_region = dest_info.get(v) < common_len; + if (vspecial && dest_info.get(u) < common_len) { in_common_region = true; } DEBUG_PRINTF("adding idx=%u (state %u) -> idx=%u (state %u)%s\n", - dest[u].index, dest_state_ids.at(u), - dest[v].index, dest_state_ids.at(v), + dest[u].index, dest_info.get(u), + dest[v].index, dest_info.get(v), in_common_region ? " [common]" : ""); if (in_common_region) { @@ -337,18 +342,6 @@ void mergeNfa(NGHolder &dest, vector &destStateMap, dest.renumberVertices(); } -static never_inline -void mergeNfaComponent(NGHolder &pholder, NGHolder &vholder, size_t cpl) { - assert(&pholder != &vholder); - - auto v_state_ids = numberStates(vholder); - auto p_state_ids = numberStates(pholder); - auto vhvmap = getSortedVA(vholder, v_state_ids); - auto phvmap = getSortedVA(pholder, p_state_ids); - - mergeNfa(pholder, phvmap, p_state_ids, vholder, vhvmap, cpl); -} - namespace { struct NfaMergeCandidateH { NfaMergeCandidateH(size_t cpl_in, NGHolder *first_in, NGHolder *second_in, @@ -373,14 +366,19 @@ struct NfaMergeCandidateH { /** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */ static -bool shouldMerge(NGHolder &ha, - const ue2::unordered_map &a_state_ids, - NGHolder &hb, - const ue2::unordered_map &b_state_ids, - size_t cpl, const ReportManager *rm, - const CompileContext &cc) { - size_t combinedStateCount = - countStates(ha, a_state_ids) + countStates(hb, b_state_ids) - cpl; +bool shouldMerge(const NGHolder &ha, const NGHolder &hb, size_t cpl, + const ReportManager *rm, const CompileContext &cc) { + size_t combinedStateCount = num_vertices(ha) + num_vertices(hb) - cpl; + + combinedStateCount -= 2 * 2; /* discount accepts from both */ + + if (is_triggered(ha)) { + /* allow for a state for each top, ignore existing starts */ + combinedStateCount -= 2; /* for start, startDs */ + auto tops = getTops(ha); + insert(&tops, getTops(hb)); + combinedStateCount += tops.size(); + } if (combinedStateCount > FAST_STATE_LIMIT) { // More complex implementability check. @@ -423,11 +421,13 @@ void buildNfaMergeQueue(const vector &cluster, // First, make sure all holders have numbered states and collect their // counts. - vector> states_map(cs); + vector states_map; + states_map.reserve(cs); for (size_t i = 0; i < cs; i++) { assert(cluster[i]); - NGHolder &g = *(cluster[i]); - states_map[i] = numberStates(g); + assert(states_map.size() == i); + const NGHolder &g = *(cluster[i]); + states_map.emplace_back(g); } vector seen_cpl(cs * cs, 0); @@ -536,11 +536,9 @@ bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) { } /** Merge graph \p ga into graph \p gb. Returns false on failure. */ -bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, +bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, const CompileContext &cc) { assert(ga.kind == gb.kind); - auto a_state_ids = numberStates(ga); - auto b_state_ids = numberStates(gb); // Vacuous NFAs require special checks on their starts to ensure that tops // match, and that reports match for mixed-accept cases. @@ -549,14 +547,13 @@ bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, return false; } - u32 cpl = commonPrefixLength(ga, a_state_ids, gb, b_state_ids); - if (!shouldMerge(gb, b_state_ids, ga, a_state_ids, cpl, rm, cc)) { + u32 cpl = commonPrefixLength(ga, gb); + if (!shouldMerge(gb, ga, cpl, rm, cc)) { return false; } mergeNfaComponent(gb, ga, cpl); reduceImplementableGraph(gb, SOM_NONE, rm, cc); - b_state_ids = numberStates(gb); return true; } diff --git a/src/nfagraph/ng_uncalc_components.h b/src/nfagraph/ng_uncalc_components.h index 5f341961..ddab8825 100644 --- a/src/nfagraph/ng_uncalc_components.h +++ b/src/nfagraph/ng_uncalc_components.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,10 +52,7 @@ class ReportManager; * The CPL is calculated based the topological ordering given by the state * indices for each graph. */ -u32 commonPrefixLength(const NGHolder &ga, - const ue2::unordered_map &a_state_ids, - const NGHolder &gb, - const ue2::unordered_map &b_state_ids); +u32 commonPrefixLength(const NGHolder &ga, const NGHolder &gb); /** * \brief Merge the group of graphs in \p cluster where possible. @@ -73,7 +70,7 @@ void mergeNfaCluster(const std::vector &cluster, * Returns false on failure. On success, \p gb is reduced via \ref * reduceImplementableGraph and renumbered. */ -bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm, +bool mergeNfaPair(const NGHolder &ga, NGHolder &gb, const ReportManager *rm, const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index da9c2438..71eef7eb 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -343,6 +343,47 @@ bool is_virtual_start(NFAVertex v, const NGHolder &g) { return g[v].assert_flags & POS_FLAG_VIRTUAL_START; } +static +void reorderSpecials(const NGHolder &g, vector &topoOrder) { + // Start is last element of reverse topo ordering. + auto it = find(topoOrder.begin(), topoOrder.end(), g.start); + if (it != topoOrder.end() - 1) { + DEBUG_PRINTF("repositioning start\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end(), g.start); + } + + // StartDs is second-to-last element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), g.startDs); + if (it != topoOrder.end() - 2) { + DEBUG_PRINTF("repositioning start ds\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.end() - 1, g.startDs); + } + + // AcceptEOD is first element of reverse topo ordering. + it = find(topoOrder.begin(), topoOrder.end(), g.acceptEod); + if (it != topoOrder.begin()) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + topoOrder.insert(topoOrder.begin(), g.acceptEod); + } + + // Accept is second element of reverse topo ordering, if it's connected. + it = find(topoOrder.begin(), topoOrder.end(), g.accept); + if (it != topoOrder.begin() + 1) { + DEBUG_PRINTF("repositioning accept\n"); + assert(it != topoOrder.end()); + topoOrder.erase(it); + if (in_degree(g.accept, g) != 0) { + topoOrder.insert(topoOrder.begin() + 1, g.accept); + } + } +} + vector getTopoOrdering(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); @@ -372,6 +413,8 @@ vector getTopoOrdering(const NGHolder &g) { color_map(make_iterator_property_map(colour.begin(), index_map)) .vertex_index_map(index_map)); + reorderSpecials(g, ordering); + return ordering; } @@ -629,6 +672,60 @@ unique_ptr cloneHolder(const NGHolder &in) { return h; } +void reverseHolder(const NGHolder &g_in, NGHolder &g) { + // Make the BGL do the grunt work. + ue2::unordered_map vertexMap; + boost::transpose_graph(g_in.g, g.g, + orig_to_copy(boost::make_assoc_property_map(vertexMap)). + vertex_index_map(get(&NFAGraphVertexProps::index, g_in.g))); + + // The transpose_graph operation will have created extra copies of our + // specials. We have to rewire their neighbours to the 'real' specials and + // delete them. + NFAVertex start = vertexMap[g_in.acceptEod]; + NFAVertex startDs = vertexMap[g_in.accept]; + NFAVertex accept = vertexMap[g_in.startDs]; + NFAVertex acceptEod = vertexMap[g_in.start]; + + // Successors of starts. + for (const auto &e : out_edges_range(start, g)) { + NFAVertex v = target(e, g); + add_edge(g.start, v, g[e], g); + } + for (const auto &e : out_edges_range(startDs, g)) { + NFAVertex v = target(e, g); + add_edge(g.startDs, v, g[e], g); + } + + // Predecessors of accepts. + for (const auto &e : in_edges_range(accept, g)) { + NFAVertex u = source(e, g); + add_edge(u, g.accept, g[e], g); + } + for (const auto &e : in_edges_range(acceptEod, g)) { + NFAVertex u = source(e, g); + add_edge(u, g.acceptEod, g[e], g); + } + + // Remove our impostors. + clear_vertex(start, g); + remove_vertex(start, g); + clear_vertex(startDs, g); + remove_vertex(startDs, g); + clear_vertex(accept, g); + remove_vertex(accept, g); + clear_vertex(acceptEod, g); + remove_vertex(acceptEod, g); + + // Renumber so that g's properties (number of vertices, edges) are + // accurate. + g.renumberVertices(); + g.renumberEdges(); + + assert(num_vertices(g) == num_vertices(g_in)); + assert(num_edges(g) == num_edges(g_in)); +} + #ifndef NDEBUG bool allMatchStatesHaveReports(const NGHolder &g) { diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 1c6dd461..6c6907a3 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -174,7 +174,11 @@ bool is_match_vertex(NFAVertex v, const GraphT &g) { } /** Generate a reverse topological ordering for a back-edge filtered version of - * our graph (as it must be a DAG and correctly numbered) */ + * our graph (as it must be a DAG and correctly numbered). + * + * Note: we ensure that we produce a topo ordering that begins with acceptEod + * and accept (if present) and ends with startDs followed by start. + */ std::vector getTopoOrdering(const NGHolder &g); /** Comparison functor used to sort by vertex_index. */ @@ -300,6 +304,10 @@ void clearReports(NGHolder &g); * r_old. */ void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new); +/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to + * accepts. */ +void reverseHolder(const NGHolder &g, NGHolder &out); + #ifndef NDEBUG // Assertions: only available in internal builds. diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 6b19549b..38c488be 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -47,6 +47,7 @@ #include "nfagraph/ng_is_equal.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_mcclellan.h" +#include "nfagraph/ng_prune.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" #include "nfagraph/ng_stop.h" @@ -788,19 +789,230 @@ void RoseBuildImpl::findTransientLeftfixes(void) { /** Find all the different roses and their associated literals. */ static -map> findLeftSucc(RoseBuildImpl &tbi) { +map> findLeftSucc(const RoseBuildImpl &build) { map> leftfixes; - for (auto v : vertices_range(tbi.g)) { - if (tbi.g[v].left) { - const LeftEngInfo &lei = tbi.g[v].left; + for (auto v : vertices_range(build.g)) { + if (build.g[v].left) { + const LeftEngInfo &lei = build.g[v].left; leftfixes[lei].push_back(v); } } return leftfixes; } +namespace { +struct infix_info { + set preds; + set succs; +}; +} + static -bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, +map findInfixGraphInfo(const RoseBuildImpl &build) { + map rv; + + for (auto v : vertices_range(build.g)) { + if (!build.g[v].left) { + continue; + } + + if (build.isRootSuccessor(v)) { + DEBUG_PRINTF("a prefix is never an infix\n"); + continue; + } + + /* ensure only proper nfas */ + const LeftEngInfo &lei = build.g[v].left; + if (!lei.graph) { + continue; + } + if (lei.haig || lei.dfa) { + continue; + } + assert(!lei.castle); + infix_info &info = rv[lei.graph.get()]; + insert(&info.preds, inv_adjacent_vertices_range(v, build.g)); + info.succs.insert(v); + } + + return rv; +} + +static +map> getTopInfo(const NGHolder &h) { + map> rv; + for (NFAEdge e : out_edges_range(h.start, h)) { + for (u32 t : h[e].tops) { + rv[t].insert(e); + } + } + return rv; +} + +static +u32 findUnusedTop(const map> &tops) { + u32 i = 0; + while (contains(tops, i)) { + i++; + } + return i; +} + +static +bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) { + RoseGraph &g = build.g; + + set tops; /* tops triggered by u */ + for (RoseEdge e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (g[v].left.graph.get() != &h) { + continue; + } + tops.insert(g[e].rose_top); + } + + assert(!tops.empty()); + if (tops.size() <= 1) { + return false; + } + DEBUG_PRINTF("%zu triggers %zu tops for %p\n", build.g[u].idx, tops.size(), + &h); + + auto h_top_info = getTopInfo(h); + flat_set edges_to_trigger; + for (u32 t : tops) { + insert(&edges_to_trigger, h_top_info[t]); + } + + u32 new_top = ~0U; + /* check if there is already a top with the right the successor set */ + for (const auto &elem : h_top_info) { + if (elem.second == edges_to_trigger) { + new_top = elem.first; + break; + } + } + + /* if no existing suitable top, add a new top for us */ + if (new_top == ~0U) { + new_top = findUnusedTop(h_top_info); + + /* add top to edges out of start */ + for (NFAEdge e : out_edges_range(h.start, h)) { + if (has_intersection(tops, h[e].tops)) { + h[e].tops.insert(new_top); + } + } + + /* check still implementable if we add a new top */ + if (!isImplementableNFA(h, nullptr, build.cc)) { + DEBUG_PRINTF("unable to add new top\n"); + for (NFAEdge e : out_edges_range(h.start, h)) { + h[e].tops.erase(new_top); + } + /* we should be back to the original graph */ + assert(isImplementableNFA(h, nullptr, build.cc)); + return false; + } + } + + DEBUG_PRINTF("using new merged top %u\n", new_top); + assert(new_top != ~0U); + for (RoseEdge e: out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (g[v].left.graph.get() != &h) { + continue; + } + g[e].rose_top = new_top; + } + + return true; +} + +static +void packInfixTops(NGHolder &h, RoseGraph &g, + const set &verts) { + if (!is_triggered(h)) { + DEBUG_PRINTF("not triggered, no tops\n"); + return; + } + assert(isCorrectlyTopped(h)); + DEBUG_PRINTF("pruning unused tops\n"); + flat_set used_tops; + for (auto v : verts) { + assert(g[v].left.graph.get() == &h); + + for (const auto &e : in_edges_range(v, g)) { + u32 top = g[e].rose_top; + used_tops.insert(top); + } + } + + map top_mapping; + for (u32 t : used_tops) { + u32 new_top = top_mapping.size(); + top_mapping[t] = new_top; + } + + for (auto v : verts) { + assert(g[v].left.graph.get() == &h); + + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_mapping.at(g[e].rose_top); + } + } + + vector dead; + for (const auto &e : out_edges_range(h.start, h)) { + NFAVertex v = target(e, h); + if (v == h.startDs) { + continue; // stylised edge, leave it alone. + } + flat_set updated_tops; + for (u32 t : h[e].tops) { + if (contains(top_mapping, t)) { + updated_tops.insert(top_mapping.at(t)); + } + } + h[e].tops = move(updated_tops); + if (h[e].tops.empty()) { + DEBUG_PRINTF("edge (start,%u) has only unused tops\n", h[v].index); + dead.push_back(e); + } + } + + if (dead.empty()) { + return; + } + + remove_edges(dead, h); + pruneUseless(h); + clearReports(h); // As we may have removed vacuous edges. +} + +static +void reduceTopTriggerLoad(RoseBuildImpl &build) { + auto infixes = findInfixGraphInfo(build); + + for (auto &p : infixes) { + if (onlyOneTop(*p.first)) { + continue; + } + + bool changed = false; + for (RoseVertex v : p.second.preds) { + changed |= reduceTopTriggerLoad(build, *p.first, v); + } + + if (changed) { + packInfixTops(*p.first, build.g, p.second.succs); + reduceImplementableGraph(*p.first, SOM_NONE, nullptr, build.cc); + } + } +} + +static +bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left, const set &all_lits, const RoseEdge &e) { assert(left.graph()); @@ -816,8 +1028,8 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, /* check each pred literal to see if they all kill previous graph * state */ - for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { - const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id); + for (u32 lit_id : build.g[source(e, build.g)].literals) { + const rose_literal_id &pred_lit = build.literals.right.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); DEBUG_PRINTF("running graph %zu\n", states.size()); @@ -833,7 +1045,7 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left, } static -bool triggerKillsRose(const RoseBuildImpl &tbi, const left_id &left, +bool triggerKillsRose(const RoseBuildImpl &build, const left_id &left, const set &all_lits, const RoseEdge &e) { if (left.haig()) { /* TODO: To allow this for som-based engines we would also need to @@ -843,32 +1055,30 @@ bool triggerKillsRose(const RoseBuildImpl &tbi, const left_id &left, } if (left.graph()) { - return triggerKillsRoseGraph(tbi, left, all_lits, e); + return triggerKillsRoseGraph(build, left, all_lits, e); } if (left.castle()) { - return triggerKillsRoseCastle(tbi, left, all_lits, e); + return triggerKillsRoseCastle(build, left, all_lits, e); } return false; } +/* Sometimes the arrival of a top for a rose infix can ensure that the nfa would + * be dead at that time. In the case of multiple trigger literals, we can only + * base our decision on that portion of literal after any overlapping literals. + */ static -void inspectRoseTops(RoseBuildImpl &tbi) { - /* Sometimes the arrival of a top for a rose infix can ensure that the nfa - * would be dead at that time. In the case of multiple trigger literals we - * can only base our decision on that portion of literal after any - * overlapping literals */ +void findTopTriggerCancels(RoseBuildImpl &build) { + auto left_succ = findLeftSucc(build); /* leftfixes -> succ verts */ - map> roses = - findLeftSucc(tbi); /* rose -> succ verts */ - - for (const auto &r : roses) { + for (const auto &r : left_succ) { const left_id &left = r.first; const vector &succs = r.second; assert(!succs.empty()); - if (tbi.isRootSuccessor(*succs.begin())) { + if (build.isRootSuccessor(*succs.begin())) { /* a prefix is never an infix */ continue; } @@ -878,10 +1088,10 @@ void inspectRoseTops(RoseBuildImpl &tbi) { set pred_lit_ids; for (auto v : succs) { - for (const auto &e : in_edges_range(v, tbi.g)) { - RoseVertex u = source(e, tbi.g); - tops_seen.insert(tbi.g[e].rose_top); - insert(&pred_lit_ids, tbi.g[u].literals); + for (const auto &e : in_edges_range(v, build.g)) { + RoseVertex u = source(e, build.g); + tops_seen.insert(build.g[e].rose_top); + insert(&pred_lit_ids, build.g[u].literals); rose_edges.insert(e); } } @@ -893,7 +1103,7 @@ void inspectRoseTops(RoseBuildImpl &tbi) { } for (u32 lit_id : pred_lit_ids) { - const rose_literal_id &p_lit = tbi.literals.right.at(lit_id); + const rose_literal_id &p_lit = build.literals.right.at(lit_id); if (p_lit.delay || p_lit.table == ROSE_ANCHORED) { goto next_rose; } @@ -905,15 +1115,22 @@ void inspectRoseTops(RoseBuildImpl &tbi) { all_lits.size(), rose_edges.size()); for (const auto &e : rose_edges) { - if (triggerKillsRose(tbi, left, all_lits, e)) { + if (triggerKillsRose(build, left, all_lits, e)) { DEBUG_PRINTF("top will override previous rose state\n"); - tbi.g[e].rose_cancel_prev_top = true; + build.g[e].rose_cancel_prev_top = true; } } next_rose:; } } +static +void optimiseRoseTops(RoseBuildImpl &build) { + reduceTopTriggerLoad(build); + /* prune unused tops ? */ + findTopTriggerCancels(build); +} + static void buildRoseSquashMasks(RoseBuildImpl &tbi) { /* Rose nfa squash masks are applied to the groups when the nfa can no @@ -1492,7 +1709,7 @@ aligned_unique_ptr RoseBuildImpl::buildRose(u32 minWidth) { /* final prep work */ remapCastleTops(*this); - inspectRoseTops(*this); + optimiseRoseTops(*this); buildRoseSquashMasks(*this); rm.assignDkeys(this); diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 01134736..054dd12f 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -53,7 +53,6 @@ #include "nfagraph/ng_redundancy.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" -#include "nfagraph/ng_restructuring.h" #include "nfagraph/ng_stop.h" #include "nfagraph/ng_uncalc_components.h" #include "nfagraph/ng_util.h" @@ -1457,11 +1456,7 @@ bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { static u32 commonPrefixLength(left_id &r1, left_id &r2) { if (r1.graph() && r2.graph()) { - auto &g1 = *r1.graph(); - auto &g2 = *r2.graph(); - auto state_ids_1 = numberStates(g1); - auto state_ids_2 = numberStates(g2); - return commonPrefixLength(g1, state_ids_1, g2, state_ids_2); + return commonPrefixLength(*r1.graph(), *r2.graph()); } else if (r1.castle() && r2.castle()) { return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle())); } @@ -1750,7 +1745,6 @@ u32 findUnusedTop(const ue2::flat_set &tops) { while (contains(tops, i)) { i++; } - assert(i < NFA_MAX_TOP_MASKS); return i; } @@ -1779,11 +1773,6 @@ bool setDistinctTops(NGHolder &h1, const NGHolder &h2, DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(), tops2.size()); - if (tops1.size() + tops2.size() > NFA_MAX_TOP_MASKS) { - DEBUG_PRINTF("too many tops!\n"); - return false; - } - // If our tops don't intersect, we're OK to merge with no changes. if (!has_intersection(tops1, tops2)) { DEBUG_PRINTF("tops don't intersect\n"); @@ -1856,11 +1845,6 @@ bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, return true; } -static -bool hasMaxTops(const NGHolder &h) { - return getTops(h).size() == NFA_MAX_TOP_MASKS; -} - /** \brief Estimate the number of accel states in the given graph when built as * an NFA. * @@ -1899,11 +1883,6 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) { "with %p (%zu verts)\n", r1.graph(), verts1.size(), r2.graph(), verts2.size()); - if (hasMaxTops(*r1.graph())) { - DEBUG_PRINTF("h1 has hit max tops\n"); - break; // next h1 - } - u32 accel1 = accel_count[r1]; if (accel1 >= NFA_MAX_ACCEL_STATES) { DEBUG_PRINTF("h1 has hit max accel\n"); @@ -2203,11 +2182,6 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, const deque &verts2 = suffixes.vertices(s2); assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX); - if (hasMaxTops(*s1.graph())) { - DEBUG_PRINTF("h1 has hit max tops\n"); - break; // next h1 - } - if (!acyclic) { u32 accel1 = accel_count[s1]; if (accel1 >= NFA_MAX_ACCEL_STATES) {