From beac58fcb42123106e7624509a9b8ab187ded090 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 11 Apr 2017 12:44:20 +1000 Subject: [PATCH] dfa: allow smwr to avoid lengthy daddy recalc --- src/nfa/mcclellancompile.cpp | 19 +++++++++++++------ src/nfa/mcclellancompile.h | 3 ++- src/smallwrite/smallwrite_build.cpp | 19 +++++++++++-------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 044e38c5..29642dde 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -802,9 +802,9 @@ flat_set find_daddy_candidates(const dfa_info &info, #define MAX_SHERMAN_SELF_LOOP 20 static -void find_better_daddy(dfa_info &info, dstate_id_t curr_id, - bool using8bit, bool any_cyclic_near_anchored_state, - const Grey &grey) { +void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, + bool any_cyclic_near_anchored_state, + bool trust_daddy_states, const Grey &grey) { if (!grey.allowShermanStates) { return; } @@ -839,7 +839,12 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, dstate_id_t best_daddy = 0; dstate &currState = info.states[curr_id]; - const auto hinted = find_daddy_candidates(info, curr_id); + flat_set hinted; + if (trust_daddy_states) { + hinted.insert(currState.daddy); + } else { + hinted = find_daddy_candidates(info, curr_id); + } for (const dstate_id_t &donor : hinted) { assert(donor < curr_id); @@ -947,6 +952,7 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, + bool trust_daddy_states, set *accel_states) { u16 total_daddy = 0; dfa_info info(strat); @@ -963,7 +969,7 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, for (u32 i = 0; i < info.size(); i++) { find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state, - cc.grey); + trust_daddy_states, cc.grey); total_daddy += info.extra[i].daddytaken; } @@ -989,9 +995,10 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, + bool trust_daddy_states, set *accel_states) { mcclellan_build_strat mbs(raw, rm, only_accel_init); - return mcclellanCompile_i(raw, mbs, cc, accel_states); + return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states); } size_t mcclellan_build_strat::accelSize(void) const { diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index c204e03c..a176db28 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -71,12 +71,13 @@ private: bytecode_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, + bool trust_daddy_states = false, std::set *accel_states = nullptr); /* used internally by mcclellan/haig/gough compile process */ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, - const CompileContext &cc, + const CompileContext &cc, bool trust_daddy_states = false, std::set *accel_states = nullptr); /** diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index ffd3fe0f..829c72e5 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -692,18 +692,20 @@ bool is_slow(const raw_dfa &rdfa, const set &accel, static bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, - const ReportManager &rm, bool has_literals, + const ReportManager &rm, bool has_non_literals, set &accel_states) { - // If we determinised literals, then we only need to consider the init + // If we determinised only literals, then we only need to consider the init // states for acceleration. - bool only_accel_init = has_literals; + bool only_accel_init = !has_non_literals; + bool trust_daddy_states = !has_non_literals; bytecode_ptr dfa = nullptr; if (cc.grey.allowSmallWriteSheng) { dfa = shengCompile(rdfa, cc, rm, only_accel_init, &accel_states); } if (!dfa) { - dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init, &accel_states); + dfa = mcclellanCompile(rdfa, cc, rm, only_accel_init, + trust_daddy_states, &accel_states); } return dfa; } @@ -711,14 +713,14 @@ bytecode_ptr getDfa(raw_dfa &rdfa, const CompileContext &cc, static bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, const CompileContext &cc, const ReportManager &rm, - bool has_literals, u32 *start_offset, + bool has_non_literals, u32 *start_offset, u32 *small_region) { *start_offset = remove_leading_dots(rdfa); // Unleash the McClellan! set accel_states; - auto nfa = getDfa(rdfa, cc, rm, has_literals, accel_states); + auto nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); return nullptr; @@ -737,7 +739,7 @@ bytecode_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, return nullptr; } - nfa = getDfa(rdfa, cc, rm, has_literals, accel_states); + nfa = getDfa(rdfa, cc, rm, has_non_literals, accel_states); if (!nfa) { DEBUG_PRINTF("DFA compile failed for smallwrite NFA\n"); assert(0); /* able to build orig dfa but not the trimmed? */ @@ -768,6 +770,7 @@ unique_ptr makeSmallWriteBuilder(size_t num_patterns, bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase); + const bool has_non_literals = rdfa != nullptr; if (!rdfa && !has_literals) { DEBUG_PRINTF("no smallwrite engine\n"); poisoned = true; @@ -788,7 +791,7 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { u32 start_offset; u32 small_region; - auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_literals, + auto nfa = prepEngine(*rdfa, roseQuality, cc, rm, has_non_literals, &start_offset, &small_region); if (!nfa) { DEBUG_PRINTF("some smallwrite outfix could not be prepped\n");