smallwrite: simple trie experiment

This commit is contained in:
Justin Viiret 2017-03-02 11:09:27 +11:00 committed by Matthew Barr
parent eec2b8233d
commit b75b169b49

View File

@ -51,6 +51,7 @@
#include "util/compile_context.h" #include "util/compile_context.h"
#include "util/container.h" #include "util/container.h"
#include "util/make_unique.h" #include "util/make_unique.h"
#include "util/ue2_graph.h"
#include "util/ue2string.h" #include "util/ue2string.h"
#include "util/verify_types.h" #include "util/verify_types.h"
@ -65,9 +66,30 @@ namespace ue2 {
#define LITERAL_MERGE_CHUNK_SIZE 25 #define LITERAL_MERGE_CHUNK_SIZE 25
#define DFA_MERGE_MAX_STATES 8000 #define DFA_MERGE_MAX_STATES 8000
#define MAX_TRIE_VERTICES 8000
namespace { // unnamed namespace { // unnamed
struct LitTrieVertexProps {
LitTrieVertexProps() = default;
explicit LitTrieVertexProps(char c_in) : c(c_in) {}
char c = 0;
size_t index; // managed by ue2_graph
};
struct LitTrieEdgeProps {
LitTrieEdgeProps() = default;
size_t index; // managed by ue2_graph
};
struct LitTrie
: public ue2_graph<LitTrie, LitTrieVertexProps, LitTrieEdgeProps> {
LitTrie() : root(add_vertex(*this)) {}
const vertex_descriptor root;
};
// Concrete impl class // Concrete impl class
class SmallWriteBuildImpl : public SmallWriteBuild { class SmallWriteBuildImpl : public SmallWriteBuild {
public: public:
@ -89,6 +111,8 @@ public:
unique_ptr<raw_dfa> rdfa; unique_ptr<raw_dfa> rdfa;
vector<pair<ue2_literal, ReportID> > cand_literals; vector<pair<ue2_literal, ReportID> > cand_literals;
LitTrie lit_trie;
LitTrie lit_trie_nocase;
bool poisoned; bool poisoned;
}; };
@ -247,6 +271,29 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
} }
} }
static
bool add_to_trie(const ue2_literal &literal, LitTrie &trie) {
auto u = trie.root;
for (auto &c : literal) {
auto next = LitTrie::null_vertex();
for (auto v : adjacent_vertices_range(u, trie)) {
if (trie[v].c == c.c) {
next = v;
break;
}
}
if (next == LitTrie::null_vertex()) {
next = add_vertex(LitTrieVertexProps(c.c), trie);
add_edge(u, next, trie);
}
u = next;
}
DEBUG_PRINTF("added '%s' to trie, now %zu vertices\n",
escapeString(literal).c_str(), num_vertices(trie));
return num_vertices(trie) <= MAX_TRIE_VERTICES;
}
void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) { void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) {
// If the graph is poisoned (i.e. we can't build a SmallWrite version), // If the graph is poisoned (i.e. we can't build a SmallWrite version),
// we don't even try. // we don't even try.
@ -260,6 +307,12 @@ void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) {
cand_literals.push_back(make_pair(literal, r)); cand_literals.push_back(make_pair(literal, r));
if (!add_to_trie(literal,
literal.any_nocase() ? lit_trie_nocase : lit_trie)) {
poisoned = true;
return;
}
if (cand_literals.size() > cc.grey.smallWriteMaxLiterals) { if (cand_literals.size() > cc.grey.smallWriteMaxLiterals) {
poisoned = true; poisoned = true;
} }