mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
smallwrite: simple trie experiment
This commit is contained in:
parent
eec2b8233d
commit
b75b169b49
@ -51,6 +51,7 @@
|
|||||||
#include "util/compile_context.h"
|
#include "util/compile_context.h"
|
||||||
#include "util/container.h"
|
#include "util/container.h"
|
||||||
#include "util/make_unique.h"
|
#include "util/make_unique.h"
|
||||||
|
#include "util/ue2_graph.h"
|
||||||
#include "util/ue2string.h"
|
#include "util/ue2string.h"
|
||||||
#include "util/verify_types.h"
|
#include "util/verify_types.h"
|
||||||
|
|
||||||
@ -65,9 +66,30 @@ namespace ue2 {
|
|||||||
|
|
||||||
#define LITERAL_MERGE_CHUNK_SIZE 25
|
#define LITERAL_MERGE_CHUNK_SIZE 25
|
||||||
#define DFA_MERGE_MAX_STATES 8000
|
#define DFA_MERGE_MAX_STATES 8000
|
||||||
|
#define MAX_TRIE_VERTICES 8000
|
||||||
|
|
||||||
namespace { // unnamed
|
namespace { // unnamed
|
||||||
|
|
||||||
|
struct LitTrieVertexProps {
|
||||||
|
LitTrieVertexProps() = default;
|
||||||
|
explicit LitTrieVertexProps(char c_in) : c(c_in) {}
|
||||||
|
char c = 0;
|
||||||
|
size_t index; // managed by ue2_graph
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LitTrieEdgeProps {
|
||||||
|
LitTrieEdgeProps() = default;
|
||||||
|
size_t index; // managed by ue2_graph
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LitTrie
|
||||||
|
: public ue2_graph<LitTrie, LitTrieVertexProps, LitTrieEdgeProps> {
|
||||||
|
|
||||||
|
LitTrie() : root(add_vertex(*this)) {}
|
||||||
|
|
||||||
|
const vertex_descriptor root;
|
||||||
|
};
|
||||||
|
|
||||||
// Concrete impl class
|
// Concrete impl class
|
||||||
class SmallWriteBuildImpl : public SmallWriteBuild {
|
class SmallWriteBuildImpl : public SmallWriteBuild {
|
||||||
public:
|
public:
|
||||||
@ -89,6 +111,8 @@ public:
|
|||||||
|
|
||||||
unique_ptr<raw_dfa> rdfa;
|
unique_ptr<raw_dfa> rdfa;
|
||||||
vector<pair<ue2_literal, ReportID> > cand_literals;
|
vector<pair<ue2_literal, ReportID> > cand_literals;
|
||||||
|
LitTrie lit_trie;
|
||||||
|
LitTrie lit_trie_nocase;
|
||||||
bool poisoned;
|
bool poisoned;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -247,6 +271,29 @@ void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool add_to_trie(const ue2_literal &literal, LitTrie &trie) {
|
||||||
|
auto u = trie.root;
|
||||||
|
for (auto &c : literal) {
|
||||||
|
auto next = LitTrie::null_vertex();
|
||||||
|
for (auto v : adjacent_vertices_range(u, trie)) {
|
||||||
|
if (trie[v].c == c.c) {
|
||||||
|
next = v;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (next == LitTrie::null_vertex()) {
|
||||||
|
next = add_vertex(LitTrieVertexProps(c.c), trie);
|
||||||
|
add_edge(u, next, trie);
|
||||||
|
}
|
||||||
|
u = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("added '%s' to trie, now %zu vertices\n",
|
||||||
|
escapeString(literal).c_str(), num_vertices(trie));
|
||||||
|
return num_vertices(trie) <= MAX_TRIE_VERTICES;
|
||||||
|
}
|
||||||
|
|
||||||
void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) {
|
void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) {
|
||||||
// If the graph is poisoned (i.e. we can't build a SmallWrite version),
|
// If the graph is poisoned (i.e. we can't build a SmallWrite version),
|
||||||
// we don't even try.
|
// we don't even try.
|
||||||
@ -260,6 +307,12 @@ void SmallWriteBuildImpl::add(const ue2_literal &literal, ReportID r) {
|
|||||||
|
|
||||||
cand_literals.push_back(make_pair(literal, r));
|
cand_literals.push_back(make_pair(literal, r));
|
||||||
|
|
||||||
|
if (!add_to_trie(literal,
|
||||||
|
literal.any_nocase() ? lit_trie_nocase : lit_trie)) {
|
||||||
|
poisoned = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (cand_literals.size() > cc.grey.smallWriteMaxLiterals) {
|
if (cand_literals.size() > cc.grey.smallWriteMaxLiterals) {
|
||||||
poisoned = true;
|
poisoned = true;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user