diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index d06083bd..4409924b 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -69,17 +69,15 @@ struct haig_too_wide { template static -void populateInit(const NGHolder &g, - const ue2::unordered_map &state_ids, +void populateInit(const NGHolder &g, const flat_set &unused, stateset *init, stateset *initDS, vector *v_by_index) { DEBUG_PRINTF("graph kind: %u\n", (int)g.kind); for (auto v : vertices_range(g)) { - u32 v_index = g[v].index; - if (state_ids.at(v) == NO_STATE) { + if (contains(unused, v)) { continue; } - + u32 v_index = g[v].index; if (is_any_start(v, g)) { init->set(v_index); if (hasSelfLoop(v, g) || is_triggered(g)) { @@ -116,8 +114,8 @@ void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) { class Automaton_Base { protected: Automaton_Base(const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in) - : graph(graph_in), state_ids(state_ids_in) { + const flat_set &unused_in) + : graph(graph_in), unused(unused_in) { calculateAlphabet(graph, alpha, unalpha, &alphasize); assert(alphasize <= ALPHABET_SIZE); } @@ -126,7 +124,7 @@ public: static bool canPrune(const flat_set &) { return false; } const NGHolder &graph; - const ue2::unordered_map &state_ids; + const flat_set &unused; array alpha; array unalpha; @@ -145,14 +143,13 @@ public: typedef map StateMap; Automaton_Big(const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in, - som_type som, const vector> &triggers, - bool unordered_som) - : Automaton_Base(graph_in, state_ids_in), numStates(num_vertices(graph)), + const flat_set &unused_in, som_type som, + const vector> &triggers, bool unordered_som) + : Automaton_Base(graph_in, unused_in), numStates(num_vertices(graph)), init(numStates), initDS(numStates), squash(numStates), accept(numStates), acceptEod(numStates), toppable(numStates), dead(numStates) { - populateInit(graph, state_ids, &init, &initDS, &v_by_index); + populateInit(graph, unused, &init, &initDS, &v_by_index); populateAccepts(graph, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; @@ -175,7 +172,7 @@ public: cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { - markToppableStarts(graph, state_ids, false, triggers, &toppable); + markToppableStarts(graph, unused, false, triggers, &toppable); } } @@ -244,11 +241,11 @@ public: typedef ue2::unordered_map StateMap; Automaton_Graph(const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in, + const flat_set &unused_in, som_type som, const vector> &triggers, bool unordered_som) - : Automaton_Base(graph_in, state_ids_in) { - populateInit(graph, state_ids, &init, &initDS, &v_by_index); + : Automaton_Base(graph_in, unused_in) { + populateInit(graph, unused, &init, &initDS, &v_by_index); populateAccepts(graph, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; @@ -272,7 +269,7 @@ public: cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { dynamic_bitset<> temp(NFA_STATE_LIMIT); - markToppableStarts(graph, state_ids, false, triggers, &temp); + markToppableStarts(graph, unused, false, triggers, &temp); toppable = bitfield(temp); } } @@ -481,11 +478,10 @@ bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) { } static -s32 getSlotID(const NGHolder &g, - UNUSED const ue2::unordered_map &state_ids, +s32 getSlotID(const NGHolder &g, UNUSED const flat_set &unused, NFAVertex v) { if (is_triggered(g) && v == g.start) { - assert(state_ids.at(v) != NO_STATE); + assert(!contains(unused, v)); } else if (is_any_start_inc_virtual(v, g)) { return CREATE_NEW_SOM; } @@ -516,8 +512,7 @@ void haig_do_preds(const NGHolder &g, const stateset &nfa_states, template static -void haig_do_report(const NGHolder &g, - const ue2::unordered_map &state_ids, +void haig_do_report(const NGHolder &g, const flat_set &unused, NFAVertex accept_v, const stateset &source_nfa_states, const vector &state_mapping, set &out) { @@ -528,7 +523,7 @@ void haig_do_report(const NGHolder &g, continue; } for (ReportID report_id : g[v].reports) { - out.insert(som_report(report_id, getSlotID(g, state_ids, v))); + out.insert(som_report(report_id, getSlotID(g, unused, v))); } } } @@ -565,14 +560,14 @@ void haig_note_starts(const NGHolder &g, map *out) { template static bool doHaig(const NGHolder &g, - const ue2::unordered_map &state_ids, + const flat_set &unused, som_type som, const vector> &triggers, bool unordered_som, raw_som_dfa *rdfa) { u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from a fight */ typedef typename Auto::StateSet StateSet; vector nfa_state_map; - Auto n(g, state_ids, som, triggers, unordered_som); + Auto n(g, unused, som, triggers, unordered_som); try { if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) { DEBUG_PRINTF("state limit exceeded\n"); @@ -602,9 +597,9 @@ bool doHaig(const NGHolder &g, haig_do_preds(g, source_states, n.v_by_index, rdfa->state_som.back().preds); - haig_do_report(g, state_ids, g.accept, source_states, n.v_by_index, + haig_do_report(g, unused, g.accept, source_states, n.v_by_index, rdfa->state_som.back().reports); - haig_do_report(g, state_ids, g.acceptEod, source_states, n.v_by_index, + haig_do_report(g, unused, g.acceptEod, source_states, n.v_by_index, rdfa->state_som.back().reports_eod); } @@ -613,10 +608,10 @@ bool doHaig(const NGHolder &g, return true; } -unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, - u32 somPrecision, - const vector > &triggers, - const Grey &grey, bool unordered_som) { +unique_ptr +attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, + const vector> &triggers, const Grey &grey, + bool unordered_som) { assert(is_triggered(g) != triggers.empty()); assert(!unordered_som || is_triggered(g)); @@ -625,13 +620,12 @@ unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, return nullptr; } - auto state_ids = numberStates(g); - dropUnusedStarts(g, state_ids); - DEBUG_PRINTF("attempting to build haig \n"); assert(allMatchStatesHaveReports(g)); assert(hasCorrectlyNumberedVertices(g)); + auto unused = findUnusedStates(g); + u32 numStates = num_vertices(g); if (numStates > HAIG_MAX_NFA_STATE) { DEBUG_PRINTF("giving up... looks too big\n"); @@ -645,11 +639,11 @@ unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, bool rv; if (numStates <= NFA_STATE_LIMIT) { /* fast path */ - rv = doHaig(g, state_ids, som, triggers, unordered_som, + rv = doHaig(g, unused, som, triggers, unordered_som, rdfa.get()); } else { /* not the fast path */ - rv = doHaig(g, state_ids, som, triggers, unordered_som, + rv = doHaig(g, unused, som, triggers, unordered_som, rdfa.get()); } diff --git a/src/nfagraph/ng_haig.h b/src/nfagraph/ng_haig.h index 1df5e2f0..baff2f58 100644 --- a/src/nfagraph/ng_haig.h +++ b/src/nfagraph/ng_haig.h @@ -54,10 +54,10 @@ struct raw_som_dfa; * between) */ -std::unique_ptr attemptToBuildHaig(NGHolder &g, som_type som, - u32 somPrecision, - const std::vector > &triggers, - const Grey &grey, bool unordered_som_triggers = false); +std::unique_ptr +attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision, + const std::vector> &triggers, + const Grey &grey, bool unordered_som_triggers = false); std::unique_ptr attemptToMergeHaig(const std::vector &dfas, diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 245dbefe..89716287 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -152,12 +152,11 @@ void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state, template static -void populateInit(const NGHolder &g, - const ue2::unordered_map &state_ids, +void populateInit(const NGHolder &g, const flat_set &unused, stateset *init, stateset *init_deep, vector *v_by_index) { for (auto v : vertices_range(g)) { - if (state_ids.at(v) == NO_STATE) { + if (contains(unused, v)) { continue; } @@ -188,21 +187,22 @@ void populateInit(const NGHolder &g, } template -void populateAccepts(const NGHolder &g, - const ue2::unordered_map &state_ids, +void populateAccepts(const NGHolder &g, const flat_set &unused, StateSet *accept, StateSet *acceptEod) { for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (state_ids.at(v) != NO_STATE) { - accept->set(g[v].index); + if (contains(unused, v)) { + continue; } + accept->set(g[v].index); } for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { if (v == g.accept) { continue; } - if (state_ids.at(v) != NO_STATE) { - acceptEod->set(g[v].index); + if (contains(unused, v)) { + continue; } + acceptEod->set(g[v].index); } } @@ -315,8 +315,7 @@ bool triggerAllowed(const NGHolder &g, const NFAVertex v, return true; } -void markToppableStarts(const NGHolder &g, - const ue2::unordered_map &state_ids, +void markToppableStarts(const NGHolder &g, const flat_set &unused, bool single_trigger, const vector> &triggers, dynamic_bitset<> *out) { @@ -325,14 +324,13 @@ void markToppableStarts(const NGHolder &g, } for (auto v : vertices_range(g)) { - if (state_ids.at(v) == NO_STATE) { + if (contains(unused, v)) { continue; } - u32 vert_id = g[v].index; for (const auto &trigger : triggers) { if (triggerAllowed(g, v, triggers, trigger)) { - DEBUG_PRINTF("idx %u is valid location for top\n", vert_id); - out->set(vert_id); + DEBUG_PRINTF("idx %u is valid location for top\n", g[v].index); + out->set(g[v].index); break; } } @@ -349,15 +347,14 @@ public: typedef map StateMap; Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in, - bool single_trigger, + const flat_set &unused_in, bool single_trigger, const vector> &triggers, bool prunable_in) - : rm(rm_in), graph(graph_in), state_ids(state_ids_in), - numStates(num_vertices(graph)), init(numStates), initDS(numStates), + : rm(rm_in), graph(graph_in), numStates(num_vertices(graph)), + unused(unused_in), init(numStates), initDS(numStates), squash(numStates), accept(numStates), acceptEod(numStates), toppable(numStates), prunable(prunable_in), dead(numStates) { - populateInit(graph, state_ids, &init, &initDS, &v_by_index); - populateAccepts(graph, state_ids, &accept, &acceptEod); + populateInit(graph, unused, &init, &initDS, &v_by_index); + populateAccepts(graph, unused, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; if (initDS == init) { @@ -379,7 +376,7 @@ public: cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { - markToppableStarts(graph, state_ids, single_trigger, triggers, + markToppableStarts(graph, unused, single_trigger, triggers, &toppable); } } @@ -438,8 +435,8 @@ private: const ReportManager *rm; public: const NGHolder &graph; - const ue2::unordered_map &state_ids; u32 numStates; + const flat_set &unused; vector v_by_index; vector cr_by_index; /* pre alpha'ed */ StateSet init; @@ -466,13 +463,11 @@ public: typedef ue2::unordered_map StateMap; Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in, - const ue2::unordered_map &state_ids_in, - bool single_trigger, + const flat_set &unused_in, bool single_trigger, const vector> &triggers, bool prunable_in) - : rm(rm_in), graph(graph_in), state_ids(state_ids_in), - prunable(prunable_in) { - populateInit(graph, state_ids, &init, &initDS, &v_by_index); - populateAccepts(graph, state_ids, &accept, &acceptEod); + : rm(rm_in), graph(graph_in), unused(unused_in), prunable(prunable_in) { + populateInit(graph, unused, &init, &initDS, &v_by_index); + populateAccepts(graph, unused, &accept, &acceptEod); start_anchored = DEAD_STATE + 1; if (initDS == init) { @@ -496,8 +491,7 @@ public: cr_by_index = populateCR(graph, v_by_index, alpha); if (is_triggered(graph)) { dynamic_bitset<> temp(NFA_STATE_LIMIT); - markToppableStarts(graph, state_ids, single_trigger, triggers, - &temp); + markToppableStarts(graph, unused, single_trigger, triggers, &temp); toppable = bitfield(temp); } } @@ -557,7 +551,7 @@ private: const ReportManager *rm; public: const NGHolder &graph; - const ue2::unordered_map &state_ids; + const flat_set &unused; vector v_by_index; vector cr_by_index; /* pre alpha'ed */ StateSet init; @@ -580,20 +574,15 @@ public: } // namespace -unique_ptr buildMcClellan(const NGHolder &g, const ReportManager *rm, - bool single_trigger, +unique_ptr buildMcClellan(const NGHolder &graph, + const ReportManager *rm, bool single_trigger, const vector> &triggers, const Grey &grey, bool finalChance) { if (!grey.allowMcClellan) { return nullptr; } - // Construct a mutable copy of the graph so that we can drop unused starts. - auto g_copy = cloneHolder(g); - NGHolder &graph = *g_copy; - - auto state_ids = numberStates(graph); - dropUnusedStarts(graph, state_ids); + auto unused = findUnusedStates(graph); DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind); assert(allMatchStatesHaveReports(graph)); @@ -620,7 +609,7 @@ unique_ptr buildMcClellan(const NGHolder &g, const ReportManager *rm, if (numStates <= NFA_STATE_LIMIT) { /* Fast path. Automaton_Graph uses a bitfield internally to represent * states and is quicker than Automaton_Big. */ - Automaton_Graph n(rm, graph, state_ids, single_trigger, triggers, + Automaton_Graph n(rm, graph, unused, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); @@ -633,8 +622,7 @@ unique_ptr buildMcClellan(const NGHolder &g, const ReportManager *rm, rdfa->alpha_remap = n.alpha; } else { /* Slow path. Too many states to use Automaton_Graph. */ - Automaton_Big n(rm, graph, state_ids, single_trigger, triggers, - prunable); + Automaton_Big n(rm, graph, unused, single_trigger, triggers, prunable); if (determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ diff --git a/src/nfagraph/ng_mcclellan_internal.h b/src/nfagraph/ng_mcclellan_internal.h index 13d32aa4..22fcf01e 100644 --- a/src/nfagraph/ng_mcclellan_internal.h +++ b/src/nfagraph/ng_mcclellan_internal.h @@ -64,8 +64,7 @@ void getFullTransitionFromState(const raw_dfa &n, u16 state, u16 *out_table); /** produce a map of states on which it is valid to receive tops */ -void markToppableStarts(const NGHolder &g, - const ue2::unordered_map &state_ids, +void markToppableStarts(const NGHolder &g, const flat_set &unused, bool single_trigger, const std::vector> &triggers, boost::dynamic_bitset<> *out); @@ -76,7 +75,7 @@ void transition_graph(autom &nfa, const std::vector &vByStateId, typename autom::StateSet *next) { typedef typename autom::StateSet StateSet; const NGHolder &graph = nfa.graph; - const auto &state_ids = nfa.state_ids; + const auto &unused = nfa.unused; const auto &alpha = nfa.alpha; const StateSet &squash = nfa.squash; const std::map &squash_mask = nfa.squash_mask; @@ -94,7 +93,7 @@ void transition_graph(autom &nfa, const std::vector &vByStateId, NFAVertex u = vByStateId[i]; for (const auto &v : adjacent_vertices_range(u, graph)) { - if (state_ids.at(v) == NO_STATE) { + if (contains(unused, v)) { continue; } succ.set(graph[v].index); diff --git a/src/nfagraph/ng_restructuring.cpp b/src/nfagraph/ng_restructuring.cpp index 70344356..09abf775 100644 --- a/src/nfagraph/ng_restructuring.cpp +++ b/src/nfagraph/ng_restructuring.cpp @@ -281,6 +281,17 @@ void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states) { } } +flat_set findUnusedStates(const NGHolder &g) { + flat_set dead; + if (startIsRedundant(g)) { + dead.insert(g.start); + } + if (proper_out_degree(g.startDs, g) == 0) { + dead.insert(g.startDs); + } + return dead; +} + /** Construct a reversed copy of an arbitrary NGHolder, mapping starts to * accepts. */ void reverseHolder(const NGHolder &g_in, NGHolder &g) { diff --git a/src/nfagraph/ng_restructuring.h b/src/nfagraph/ng_restructuring.h index ce95b96c..5e244bf6 100644 --- a/src/nfagraph/ng_restructuring.h +++ b/src/nfagraph/ng_restructuring.h @@ -81,6 +81,13 @@ u32 countStates(const NGHolder &g, /** Optimisation: drop unnecessary start states. */ void dropUnusedStarts(NGHolder &g, ue2::unordered_map &states); +/** + * \brief Returns a set of vertices that will not participate in an + * implementation (NFA, DFA etc) of this graph. For example, starts with no + * successors. + */ +flat_set findUnusedStates(const NGHolder &g); + } // namespace ue2 #endif