From 10f52346ca4d0e5749a844a73942545fe3695381 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 13:56:51 +1000 Subject: [PATCH] smallwrite: bfs ordering, refine daddy selection --- src/nfa/mcclellancompile.cpp | 14 +++++++++++--- src/smallwrite/smallwrite_build.cpp | 7 +++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index e2466000..e875477b 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -841,9 +841,17 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, flat_set hinted; if (trust_daddy_states) { - hinted.insert(currState.daddy); - addIfEarlier(hinted, info.raw.start_floating, curr_id); - addIfEarlier(hinted, info.raw.start_anchored, curr_id); + // Use the daddy already set for this state so long as it isn't already + // a Sherman state. + if (!info.is_sherman(currState.daddy)) { + hinted.insert(currState.daddy); + } else { + // Fall back to granddaddy, which has already been processed (due + // to BFS ordering) and cannot be a Sherman state. + dstate_id_t granddaddy = info.states[currState.daddy].daddy; + assert(!info.is_sherman(granddaddy)); + hinted.insert(granddaddy); + } } else { hinted = find_daddy_candidates(info, curr_id); } diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index fac8d012..ce3315e8 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -452,6 +452,13 @@ void buildAutomaton(LitTrie &trie, ACVisitor ac_vis(trie, failure_map, ordering); boost::breadth_first_search(trie, trie.root, visitor(ac_vis)); + // Renumber with BFS ordering, which is assumed by other DFA construction + // code (i.e. Sherman state computation). + size_t idx = 0; + for (auto v : ordering) { + trie[v].index = idx++; + } + // Compute missing edges from failure map. for (auto v : ordering) { DEBUG_PRINTF("vertex %zu\n", trie[v].index);