From 340773481ef59aa9d7ff5bfb58c76b4a62e5f338 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 20 Jul 2017 15:11:50 +1000 Subject: [PATCH] smallwrite: batch dfa merge to reduce compile time --- src/grey.cpp | 2 + src/grey.h | 1 + src/smallwrite/smallwrite_build.cpp | 141 +++++++++++++--------------- 3 files changed, 66 insertions(+), 78 deletions(-) diff --git a/src/grey.cpp b/src/grey.cpp index 24140c05..3762a497 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -139,6 +139,7 @@ Grey::Grey(void) : limitSmallWriteOutfixSize(1048576), // 1 MB smallWriteMaxPatterns(10000), smallWriteMaxLiterals(10000), + smallWriteMergeBatchSize(20), allowTamarama(true), // Tamarama engine tamaChunkSize(100), dumpFlags(0), @@ -302,6 +303,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(limitSmallWriteOutfixSize); G_UPDATE(smallWriteMaxPatterns); G_UPDATE(smallWriteMaxLiterals); + G_UPDATE(smallWriteMergeBatchSize); G_UPDATE(allowTamarama); G_UPDATE(tamaChunkSize); G_UPDATE(limitPatternCount); diff --git a/src/grey.h b/src/grey.h index 50519418..34c62918 100644 --- a/src/grey.h +++ b/src/grey.h @@ -157,6 +157,7 @@ struct Grey { u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs u32 smallWriteMaxPatterns; // only try small writes if fewer patterns u32 smallWriteMaxLiterals; // only try small writes if fewer literals + u32 smallWriteMergeBatchSize; // number of DFAs to merge in a batch // Tamarama engine bool allowTamarama; diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index bb933cbe..c041155b 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -132,12 +132,10 @@ public: set all_reports() const override; - bool determiniseLiterals(); - const ReportManager &rm; const CompileContext &cc; - unique_ptr rdfa; + vector> dfas; LitTrie lit_trie; LitTrie lit_trie_nocase; size_t num_literals = 0; @@ -226,6 +224,40 @@ bool pruneOverlong(NGHolder &g, const depth &max_depth, return modified; } +/** + * \brief Attempt to merge the set of DFAs given down into a single raw_dfa. + * Returns false on failure. + */ +static +bool mergeDfas(vector> &dfas, const ReportManager &rm, + const CompileContext &cc) { + assert(!dfas.empty()); + + if (dfas.size() == 1) { + return true; + } + + DEBUG_PRINTF("attempting to merge %zu DFAs\n", dfas.size()); + + vector dfa_ptrs; + dfa_ptrs.reserve(dfas.size()); + for (auto &d : dfas) { + dfa_ptrs.push_back(d.get()); + } + + auto merged = mergeAllDfas(dfa_ptrs, DFA_MERGE_MAX_STATES, &rm, cc.grey); + if (!merged) { + DEBUG_PRINTF("merge failed\n"); + return false; + } + + DEBUG_PRINTF("merge succeeded, result has %zu states\n", + merged->states.size()); + dfas.clear(); + dfas.push_back(std::move(merged)); + return true; +} + void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. @@ -283,19 +315,14 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { minimize_hopcroft(*r, cc.grey); } - if (rdfa) { - // do a merge of the new dfa with the existing dfa - auto merged = mergeTwoDfas(rdfa.get(), r.get(), DFA_MERGE_MAX_STATES, - &rm, cc.grey); - if (!merged) { - DEBUG_PRINTF("merge failed\n"); + dfas.push_back(std::move(r)); + + if (dfas.size() >= cc.grey.smallWriteMergeBatchSize) { + if (!mergeDfas(dfas, rm, cc)) { + dfas.clear(); poisoned = true; return; } - DEBUG_PRINTF("merge succeeded, built %p\n", merged.get()); - rdfa = move(merged); - } else { - rdfa = move(r); } } @@ -710,64 +737,6 @@ unique_ptr buildDfa(LitTrie &trie, bool nocase) { return rdfa; } -bool SmallWriteBuildImpl::determiniseLiterals() { - DEBUG_PRINTF("handling literals\n"); - assert(!poisoned); - assert(num_literals <= cc.grey.smallWriteMaxLiterals); - - if (is_empty(lit_trie) && is_empty(lit_trie_nocase)) { - DEBUG_PRINTF("no literals\n"); - return true; /* nothing to do */ - } - - vector> dfas; - - if (!is_empty(lit_trie)) { - dfas.push_back(buildDfa(lit_trie, false)); - DEBUG_PRINTF("caseful literal dfa with %zu states\n", - dfas.back()->states.size()); - } - if (!is_empty(lit_trie_nocase)) { - dfas.push_back(buildDfa(lit_trie_nocase, true)); - DEBUG_PRINTF("nocase literal dfa with %zu states\n", - dfas.back()->states.size()); - } - - if (rdfa) { - dfas.push_back(move(rdfa)); - DEBUG_PRINTF("general dfa with %zu states\n", - dfas.back()->states.size()); - } - - // If we only have one DFA, no merging is necessary. - if (dfas.size() == 1) { - DEBUG_PRINTF("only one dfa\n"); - rdfa = move(dfas.front()); - return true; - } - - // Merge all DFAs. - vector to_merge; - for (const auto &d : dfas) { - to_merge.push_back(d.get()); - } - - auto merged = mergeAllDfas(to_merge, DFA_MERGE_MAX_STATES, &rm, cc.grey); - - if (!merged) { - DEBUG_PRINTF("merge failed\n"); - poisoned = true; - return false; - } - - DEBUG_PRINTF("merge succeeded, built dfa with %zu states\n", - merged->states.size()); - - // Replace our only DFA with the merged one. - rdfa = move(merged); - return true; -} - #define MAX_GOOD_ACCEL_DEPTH 4 static @@ -890,8 +859,8 @@ unique_ptr makeSmallWriteBuilder(size_t num_patterns, bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase); - const bool has_non_literals = rdfa != nullptr; - if (!rdfa && !has_literals) { + const bool has_non_literals = !dfas.empty(); + if (dfas.empty() && !has_literals) { DEBUG_PRINTF("no smallwrite engine\n"); poisoned = true; return nullptr; @@ -914,16 +883,31 @@ bytecode_ptr SmallWriteBuildImpl::build(u32 roseQuality) { } } - if (!determiniseLiterals()) { - DEBUG_PRINTF("some literal could not be made into a smallwrite dfa\n"); - return nullptr; + if (!is_empty(lit_trie)) { + dfas.push_back(buildDfa(lit_trie, false)); + DEBUG_PRINTF("caseful literal dfa with %zu states\n", + dfas.back()->states.size()); + } + if (!is_empty(lit_trie_nocase)) { + dfas.push_back(buildDfa(lit_trie_nocase, true)); + DEBUG_PRINTF("nocase literal dfa with %zu states\n", + dfas.back()->states.size()); } - if (!rdfa) { + if (dfas.empty()) { DEBUG_PRINTF("no dfa, pruned everything away\n"); return nullptr; } + if (!mergeDfas(dfas, rm, cc)) { + dfas.clear(); + return nullptr; + } + + assert(dfas.size() == 1); + auto rdfa = std::move(dfas.front()); + dfas.clear(); + DEBUG_PRINTF("building rdfa %p\n", rdfa.get()); u32 start_offset; @@ -957,7 +941,8 @@ set SmallWriteBuildImpl::all_reports() const { if (poisoned) { return reports; } - if (rdfa) { + + for (const auto &rdfa : dfas) { insert(&reports, ::ue2::all_reports(*rdfa)); }