mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
smallwrite: batch dfa merge to reduce compile time
This commit is contained in:
parent
67a8f43355
commit
340773481e
@ -139,6 +139,7 @@ Grey::Grey(void) :
|
|||||||
limitSmallWriteOutfixSize(1048576), // 1 MB
|
limitSmallWriteOutfixSize(1048576), // 1 MB
|
||||||
smallWriteMaxPatterns(10000),
|
smallWriteMaxPatterns(10000),
|
||||||
smallWriteMaxLiterals(10000),
|
smallWriteMaxLiterals(10000),
|
||||||
|
smallWriteMergeBatchSize(20),
|
||||||
allowTamarama(true), // Tamarama engine
|
allowTamarama(true), // Tamarama engine
|
||||||
tamaChunkSize(100),
|
tamaChunkSize(100),
|
||||||
dumpFlags(0),
|
dumpFlags(0),
|
||||||
@ -302,6 +303,7 @@ void applyGreyOverrides(Grey *g, const string &s) {
|
|||||||
G_UPDATE(limitSmallWriteOutfixSize);
|
G_UPDATE(limitSmallWriteOutfixSize);
|
||||||
G_UPDATE(smallWriteMaxPatterns);
|
G_UPDATE(smallWriteMaxPatterns);
|
||||||
G_UPDATE(smallWriteMaxLiterals);
|
G_UPDATE(smallWriteMaxLiterals);
|
||||||
|
G_UPDATE(smallWriteMergeBatchSize);
|
||||||
G_UPDATE(allowTamarama);
|
G_UPDATE(allowTamarama);
|
||||||
G_UPDATE(tamaChunkSize);
|
G_UPDATE(tamaChunkSize);
|
||||||
G_UPDATE(limitPatternCount);
|
G_UPDATE(limitPatternCount);
|
||||||
|
@ -157,6 +157,7 @@ struct Grey {
|
|||||||
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
|
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
|
||||||
u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
|
u32 smallWriteMaxPatterns; // only try small writes if fewer patterns
|
||||||
u32 smallWriteMaxLiterals; // only try small writes if fewer literals
|
u32 smallWriteMaxLiterals; // only try small writes if fewer literals
|
||||||
|
u32 smallWriteMergeBatchSize; // number of DFAs to merge in a batch
|
||||||
|
|
||||||
// Tamarama engine
|
// Tamarama engine
|
||||||
bool allowTamarama;
|
bool allowTamarama;
|
||||||
|
@ -132,12 +132,10 @@ public:
|
|||||||
|
|
||||||
set<ReportID> all_reports() const override;
|
set<ReportID> all_reports() const override;
|
||||||
|
|
||||||
bool determiniseLiterals();
|
|
||||||
|
|
||||||
const ReportManager &rm;
|
const ReportManager &rm;
|
||||||
const CompileContext &cc;
|
const CompileContext &cc;
|
||||||
|
|
||||||
unique_ptr<raw_dfa> rdfa;
|
vector<unique_ptr<raw_dfa>> dfas;
|
||||||
LitTrie lit_trie;
|
LitTrie lit_trie;
|
||||||
LitTrie lit_trie_nocase;
|
LitTrie lit_trie_nocase;
|
||||||
size_t num_literals = 0;
|
size_t num_literals = 0;
|
||||||
@ -226,6 +224,40 @@ bool pruneOverlong(NGHolder &g, const depth &max_depth,
|
|||||||
return modified;
|
return modified;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Attempt to merge the set of DFAs given down into a single raw_dfa.
|
||||||
|
* Returns false on failure.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
bool mergeDfas(vector<unique_ptr<raw_dfa>> &dfas, const ReportManager &rm,
|
||||||
|
const CompileContext &cc) {
|
||||||
|
assert(!dfas.empty());
|
||||||
|
|
||||||
|
if (dfas.size() == 1) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("attempting to merge %zu DFAs\n", dfas.size());
|
||||||
|
|
||||||
|
vector<const raw_dfa *> dfa_ptrs;
|
||||||
|
dfa_ptrs.reserve(dfas.size());
|
||||||
|
for (auto &d : dfas) {
|
||||||
|
dfa_ptrs.push_back(d.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
auto merged = mergeAllDfas(dfa_ptrs, DFA_MERGE_MAX_STATES, &rm, cc.grey);
|
||||||
|
if (!merged) {
|
||||||
|
DEBUG_PRINTF("merge failed\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("merge succeeded, result has %zu states\n",
|
||||||
|
merged->states.size());
|
||||||
|
dfas.clear();
|
||||||
|
dfas.push_back(std::move(merged));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
|
void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
|
||||||
// If the graph is poisoned (i.e. we can't build a SmallWrite version),
|
// If the graph is poisoned (i.e. we can't build a SmallWrite version),
|
||||||
// we don't even try.
|
// we don't even try.
|
||||||
@ -283,19 +315,14 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
|
|||||||
minimize_hopcroft(*r, cc.grey);
|
minimize_hopcroft(*r, cc.grey);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rdfa) {
|
dfas.push_back(std::move(r));
|
||||||
// do a merge of the new dfa with the existing dfa
|
|
||||||
auto merged = mergeTwoDfas(rdfa.get(), r.get(), DFA_MERGE_MAX_STATES,
|
if (dfas.size() >= cc.grey.smallWriteMergeBatchSize) {
|
||||||
&rm, cc.grey);
|
if (!mergeDfas(dfas, rm, cc)) {
|
||||||
if (!merged) {
|
dfas.clear();
|
||||||
DEBUG_PRINTF("merge failed\n");
|
|
||||||
poisoned = true;
|
poisoned = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DEBUG_PRINTF("merge succeeded, built %p\n", merged.get());
|
|
||||||
rdfa = move(merged);
|
|
||||||
} else {
|
|
||||||
rdfa = move(r);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -710,64 +737,6 @@ unique_ptr<raw_dfa> buildDfa(LitTrie &trie, bool nocase) {
|
|||||||
return rdfa;
|
return rdfa;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SmallWriteBuildImpl::determiniseLiterals() {
|
|
||||||
DEBUG_PRINTF("handling literals\n");
|
|
||||||
assert(!poisoned);
|
|
||||||
assert(num_literals <= cc.grey.smallWriteMaxLiterals);
|
|
||||||
|
|
||||||
if (is_empty(lit_trie) && is_empty(lit_trie_nocase)) {
|
|
||||||
DEBUG_PRINTF("no literals\n");
|
|
||||||
return true; /* nothing to do */
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<unique_ptr<raw_dfa>> dfas;
|
|
||||||
|
|
||||||
if (!is_empty(lit_trie)) {
|
|
||||||
dfas.push_back(buildDfa(lit_trie, false));
|
|
||||||
DEBUG_PRINTF("caseful literal dfa with %zu states\n",
|
|
||||||
dfas.back()->states.size());
|
|
||||||
}
|
|
||||||
if (!is_empty(lit_trie_nocase)) {
|
|
||||||
dfas.push_back(buildDfa(lit_trie_nocase, true));
|
|
||||||
DEBUG_PRINTF("nocase literal dfa with %zu states\n",
|
|
||||||
dfas.back()->states.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rdfa) {
|
|
||||||
dfas.push_back(move(rdfa));
|
|
||||||
DEBUG_PRINTF("general dfa with %zu states\n",
|
|
||||||
dfas.back()->states.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we only have one DFA, no merging is necessary.
|
|
||||||
if (dfas.size() == 1) {
|
|
||||||
DEBUG_PRINTF("only one dfa\n");
|
|
||||||
rdfa = move(dfas.front());
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Merge all DFAs.
|
|
||||||
vector<const raw_dfa *> to_merge;
|
|
||||||
for (const auto &d : dfas) {
|
|
||||||
to_merge.push_back(d.get());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto merged = mergeAllDfas(to_merge, DFA_MERGE_MAX_STATES, &rm, cc.grey);
|
|
||||||
|
|
||||||
if (!merged) {
|
|
||||||
DEBUG_PRINTF("merge failed\n");
|
|
||||||
poisoned = true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("merge succeeded, built dfa with %zu states\n",
|
|
||||||
merged->states.size());
|
|
||||||
|
|
||||||
// Replace our only DFA with the merged one.
|
|
||||||
rdfa = move(merged);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MAX_GOOD_ACCEL_DEPTH 4
|
#define MAX_GOOD_ACCEL_DEPTH 4
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -890,8 +859,8 @@ unique_ptr<SmallWriteBuild> makeSmallWriteBuilder(size_t num_patterns,
|
|||||||
|
|
||||||
bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
|
bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
|
||||||
const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase);
|
const bool has_literals = !is_empty(lit_trie) || !is_empty(lit_trie_nocase);
|
||||||
const bool has_non_literals = rdfa != nullptr;
|
const bool has_non_literals = !dfas.empty();
|
||||||
if (!rdfa && !has_literals) {
|
if (dfas.empty() && !has_literals) {
|
||||||
DEBUG_PRINTF("no smallwrite engine\n");
|
DEBUG_PRINTF("no smallwrite engine\n");
|
||||||
poisoned = true;
|
poisoned = true;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -914,16 +883,31 @@ bytecode_ptr<SmallWriteEngine> SmallWriteBuildImpl::build(u32 roseQuality) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!determiniseLiterals()) {
|
if (!is_empty(lit_trie)) {
|
||||||
DEBUG_PRINTF("some literal could not be made into a smallwrite dfa\n");
|
dfas.push_back(buildDfa(lit_trie, false));
|
||||||
return nullptr;
|
DEBUG_PRINTF("caseful literal dfa with %zu states\n",
|
||||||
|
dfas.back()->states.size());
|
||||||
|
}
|
||||||
|
if (!is_empty(lit_trie_nocase)) {
|
||||||
|
dfas.push_back(buildDfa(lit_trie_nocase, true));
|
||||||
|
DEBUG_PRINTF("nocase literal dfa with %zu states\n",
|
||||||
|
dfas.back()->states.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rdfa) {
|
if (dfas.empty()) {
|
||||||
DEBUG_PRINTF("no dfa, pruned everything away\n");
|
DEBUG_PRINTF("no dfa, pruned everything away\n");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!mergeDfas(dfas, rm, cc)) {
|
||||||
|
dfas.clear();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(dfas.size() == 1);
|
||||||
|
auto rdfa = std::move(dfas.front());
|
||||||
|
dfas.clear();
|
||||||
|
|
||||||
DEBUG_PRINTF("building rdfa %p\n", rdfa.get());
|
DEBUG_PRINTF("building rdfa %p\n", rdfa.get());
|
||||||
|
|
||||||
u32 start_offset;
|
u32 start_offset;
|
||||||
@ -957,7 +941,8 @@ set<ReportID> SmallWriteBuildImpl::all_reports() const {
|
|||||||
if (poisoned) {
|
if (poisoned) {
|
||||||
return reports;
|
return reports;
|
||||||
}
|
}
|
||||||
if (rdfa) {
|
|
||||||
|
for (const auto &rdfa : dfas) {
|
||||||
insert(&reports, ::ue2::all_reports(*rdfa));
|
insert(&reports, ::ue2::all_reports(*rdfa));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user