mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
violet: initial implementation
This commit is contained in:
@@ -651,26 +651,94 @@ floating:
|
||||
}
|
||||
|
||||
static
|
||||
unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h,
|
||||
ReportID prefix_report) {
|
||||
unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build,
|
||||
map<flat_set<ReportID>, ReportID> &remap) {
|
||||
assert(generates_callbacks(h));
|
||||
auto g = cloneHolder(h);
|
||||
g->kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX;
|
||||
setReportId(*g, prefix_report);
|
||||
assert(!in_degree(h.accept, h));
|
||||
auto gg = cloneHolder(h);
|
||||
NGHolder &g = *gg;
|
||||
g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX;
|
||||
|
||||
// Move acceptEod edges over to accept.
|
||||
vector<NFAEdge> dead;
|
||||
for (const auto &e : in_edges_range(g->acceptEod, *g)) {
|
||||
NFAVertex u = source(e, *g);
|
||||
if (u == g->accept) {
|
||||
for (const auto &e : in_edges_range(g.acceptEod, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
if (u == g.accept) {
|
||||
continue;
|
||||
}
|
||||
add_edge_if_not_present(u, g->accept, *g);
|
||||
add_edge_if_not_present(u, g.accept, g);
|
||||
dead.push_back(e);
|
||||
|
||||
if (!contains(remap, g[u].reports)) {
|
||||
remap[g[u].reports] = build.getNewNfaReport();
|
||||
}
|
||||
|
||||
g[u].reports = { remap[g[u].reports] };
|
||||
}
|
||||
|
||||
remove_edges(dead, *g);
|
||||
return g;
|
||||
remove_edges(dead, g);
|
||||
return gg;
|
||||
}
|
||||
|
||||
static
|
||||
u32 getEodEventID(RoseBuildImpl &build) {
|
||||
// Allocate the EOD event if it hasn't been already.
|
||||
if (build.eod_event_literal_id == MO_INVALID_IDX) {
|
||||
build.eod_event_literal_id = build.getLiteralId({}, 0, ROSE_EVENT);
|
||||
}
|
||||
|
||||
return build.eod_event_literal_id;
|
||||
}
|
||||
|
||||
static
|
||||
void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u,
|
||||
const NGHolder &h) {
|
||||
assert(!build.isInETable(u));
|
||||
|
||||
RoseGraph &g = build.g;
|
||||
map<flat_set<ReportID>, ReportID> report_remap;
|
||||
shared_ptr<NGHolder> eod_leftfix
|
||||
= makeRoseEodPrefix(h, build, report_remap);
|
||||
|
||||
u32 eod_event = getEodEventID(build);
|
||||
|
||||
for (const auto &report_mapping : report_remap) {
|
||||
RoseVertex v = add_vertex(g);
|
||||
g[v].idx = build.vertexIndex++;
|
||||
g[v].literals.insert(eod_event);
|
||||
build.literal_info[eod_event].vertices.insert(v);
|
||||
|
||||
map<u32, set<ReportID> > report_remap;
|
||||
g[v].left.graph = eod_leftfix;
|
||||
g[v].left.leftfix_report = report_mapping.second;
|
||||
g[v].left.lag = 0;
|
||||
RoseEdge e1 = add_edge(u, v, g).first;
|
||||
g[e1].minBound = 0;
|
||||
g[e1].maxBound = ROSE_BOUND_INF;
|
||||
g[v].min_offset = add_rose_depth(g[u].min_offset,
|
||||
findMinWidth(*g[v].left.graph));
|
||||
g[v].max_offset = ROSE_BOUND_INF;
|
||||
|
||||
depth max_width = findMaxWidth(*g[v].left.graph);
|
||||
if (u != build.root && max_width.is_finite()
|
||||
&& (!build.isAnyStart(u) || isPureAnchored(*g[v].left.graph))) {
|
||||
g[e1].maxBound = max_width;
|
||||
g[v].max_offset = add_rose_depth(g[u].max_offset, max_width);
|
||||
}
|
||||
|
||||
g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix
|
||||
RoseVertex w = add_vertex(g);
|
||||
g[w].idx = build.vertexIndex++;
|
||||
g[w].eod_accept = true;
|
||||
g[w].reports = report_mapping.first;
|
||||
g[w].min_offset = g[v].min_offset;
|
||||
g[w].max_offset = g[v].max_offset;
|
||||
RoseEdge e = add_edge(v, w, g).first;
|
||||
g[e].minBound = 0;
|
||||
g[e].maxBound = 0;
|
||||
g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
|
||||
DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
@@ -686,8 +754,20 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi,
|
||||
RoseVertex u = pv.first;
|
||||
const RoseInEdgeProps &edge_props = bd.ig[pv.second];
|
||||
|
||||
/* We need to duplicate the parent vertices if:
|
||||
*
|
||||
* 1) It already has a suffix, etc as we are going to add the specified
|
||||
* suffix, etc to the parents and we do not want to overwrite the
|
||||
* existing information.
|
||||
*
|
||||
* 2) We are making the an EOD accept and the vertex already has other
|
||||
* out-edges - The LAST_BYTE history used for EOD accepts is
|
||||
* incompatible with normal successors. As accepts are processed last we
|
||||
* do not need to worry about other normal successors being added later.
|
||||
*/
|
||||
if (g[u].suffix || !g[u].reports.empty()
|
||||
/* also poss accept eod edge: TODO check properly */
|
||||
|| (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g)
|
||||
&& !edge_props.graph)
|
||||
|| (!isLeafNode(u, g) && !tbi->isAnyStart(u))) {
|
||||
DEBUG_PRINTF("duplicating for parent %zu\n", g[u].idx);
|
||||
assert(!tbi->isAnyStart(u));
|
||||
@@ -719,74 +799,37 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi,
|
||||
}
|
||||
} else {
|
||||
assert(ig[iv].type == RIV_ACCEPT_EOD);
|
||||
assert(!edge_props.haig);
|
||||
|
||||
if (edge_props.graph && tbi->isInETable(u)) {
|
||||
if (!edge_props.graph) {
|
||||
RoseVertex w = add_vertex(g);
|
||||
g[w].idx = tbi->vertexIndex++;
|
||||
g[w].eod_accept = true;
|
||||
g[w].reports = ig[iv].reports;
|
||||
g[w].min_offset = g[u].min_offset;
|
||||
g[w].max_offset = g[u].max_offset;
|
||||
RoseEdge e = add_edge(u, w, g).first;
|
||||
g[e].minBound = 0;
|
||||
g[e].maxBound = 0;
|
||||
g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
|
||||
DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx);
|
||||
continue;
|
||||
}
|
||||
|
||||
const NGHolder &h = *edge_props.graph;
|
||||
assert(!in_degree(h.accept, h));
|
||||
assert(generates_callbacks(h));
|
||||
|
||||
if (tbi->isInETable(u)) {
|
||||
assert(h.kind == NFA_SUFFIX);
|
||||
assert(!tbi->isAnyStart(u));
|
||||
/* etable can't/shouldn't use eod event */
|
||||
DEBUG_PRINTF("adding suffix to i%zu\n", g[u].idx);
|
||||
g[u].suffix.graph = edge_props.graph;
|
||||
assert(g[u].suffix.graph->kind == NFA_SUFFIX);
|
||||
dumpHolder(*g[u].suffix.graph, 98, "eod_suffix", tbi->cc.grey);
|
||||
assert(!in_degree(g[u].suffix.graph->accept,
|
||||
*g[u].suffix.graph));
|
||||
set<ReportID> reports = all_reports(*g[u].suffix.graph);
|
||||
tbi->rm.getReport(*reports.begin());
|
||||
assert(reports.size() == 1);
|
||||
/* TODO: set dfa_(min|max)_width */
|
||||
continue;
|
||||
} else if (edge_props.graph) {
|
||||
assert(!edge_props.haig);
|
||||
assert(!tbi->isInETable(u));
|
||||
|
||||
// Allocate the EOD event if it hasn't been already.
|
||||
if (tbi->eod_event_literal_id == MO_INVALID_IDX) {
|
||||
tbi->eod_event_literal_id =
|
||||
tbi->getLiteralId(ue2_literal(), 0, ROSE_EVENT);
|
||||
}
|
||||
|
||||
RoseVertex v = add_vertex(g);
|
||||
g[v].idx = tbi->vertexIndex++;
|
||||
g[v].literals.insert(tbi->eod_event_literal_id);
|
||||
tbi->literal_info[tbi->eod_event_literal_id].vertices.insert(v);
|
||||
|
||||
ReportID prefix_report = tbi->getNewNfaReport();
|
||||
g[v].left.graph
|
||||
= makeRoseEodPrefix(*edge_props.graph, prefix_report);
|
||||
g[v].left.leftfix_report = prefix_report;
|
||||
g[v].left.lag = 0;
|
||||
RoseEdge e1 = add_edge(u, v, g).first;
|
||||
g[e1].minBound = 0;
|
||||
g[e1].maxBound = ROSE_BOUND_INF;
|
||||
g[v].min_offset = add_rose_depth(
|
||||
g[u].min_offset, findMinWidth(*g[v].left.graph));
|
||||
g[v].max_offset = ROSE_BOUND_INF;
|
||||
|
||||
DEBUG_PRINTF("hi\n");
|
||||
depth max_width = findMaxWidth(*g[v].left.graph);
|
||||
if (u != tbi->root
|
||||
&& max_width.is_finite()
|
||||
&& (!tbi->isAnyStart(u)
|
||||
|| isPureAnchored(*g[v].left.graph))) {
|
||||
g[e1].maxBound = max_width;
|
||||
g[v].max_offset = add_rose_depth(g[u].max_offset, max_width);
|
||||
}
|
||||
|
||||
g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix
|
||||
u = v;
|
||||
}
|
||||
assert(!edge_props.haig);
|
||||
|
||||
RoseVertex w = add_vertex(g);
|
||||
g[w].idx = tbi->vertexIndex++;
|
||||
g[w].eod_accept = true;
|
||||
g[w].reports = ig[iv].reports;
|
||||
g[w].min_offset = g[u].min_offset;
|
||||
g[w].max_offset = g[u].max_offset;
|
||||
RoseEdge e = add_edge(u, w, g).first;
|
||||
g[e].minBound = 0;
|
||||
g[e].maxBound = 0;
|
||||
g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
|
||||
DEBUG_PRINTF("accept eod vertex (idx=%zu)\n", g[w].idx);
|
||||
makeEodEventLeftfix(*tbi, u, h);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -887,7 +930,8 @@ bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
|
||||
ENSURE_AT_LEAST(&v_depth, (u32)max_width);
|
||||
}
|
||||
|
||||
if (v_depth == ROSE_BOUND_INF || v_depth > cc.grey.maxHistoryAvailable) {
|
||||
if (v_depth == ROSE_BOUND_INF
|
||||
|| v_depth > cc.grey.maxHistoryAvailable) {
|
||||
DEBUG_PRINTF("not suitable for eod table %u\n", v_depth);
|
||||
return false;
|
||||
}
|
||||
@@ -900,6 +944,13 @@ bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void shift_accepts_to_end(const RoseInGraph &ig,
|
||||
vector<RoseInVertex> &topo_order) {
|
||||
stable_partition(begin(topo_order), end(topo_order),
|
||||
[&](RoseInVertex v){ return !is_any_accept(v, ig); });
|
||||
}
|
||||
|
||||
static
|
||||
void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
|
||||
const RoseInGraph &ig = bd.ig;
|
||||
@@ -912,6 +963,7 @@ void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
|
||||
map<RoseInVertex, vector<RoseVertex> > vertex_map;
|
||||
|
||||
vector<RoseInVertex> v_order = topo_order(ig);
|
||||
shift_accepts_to_end(ig, v_order);
|
||||
|
||||
u32 eod_space_required;
|
||||
bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required,
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -336,7 +336,8 @@ void buildLiteralMask(const vector<CharReach> &mask, vector<u8> &msk,
|
||||
}
|
||||
|
||||
static
|
||||
bool validateTransientMask(const vector<CharReach> &mask, bool eod, const Grey &grey) {
|
||||
bool validateTransientMask(const vector<CharReach> &mask, bool anchored,
|
||||
bool eod, const Grey &grey) {
|
||||
assert(!mask.empty());
|
||||
|
||||
// An EOD anchored mask requires that everything fit into history, while an
|
||||
@@ -348,6 +349,12 @@ bool validateTransientMask(const vector<CharReach> &mask, bool eod, const Grey &
|
||||
return false;
|
||||
}
|
||||
|
||||
/* although anchored masks cannot be transient, short masks may be placed
|
||||
* into the atable. */
|
||||
if (anchored && mask.size() > grey.maxAnchoredRegion) {
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<ue2_literal> lits;
|
||||
u32 lit_minBound; /* minBound of each literal in lit */
|
||||
u32 lit_length; /* length of each literal in lit */
|
||||
@@ -703,7 +710,7 @@ bool checkAllowMask(const vector<CharReach> &mask, ue2_literal *lit,
|
||||
|
||||
bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports) {
|
||||
if (validateTransientMask(mask, false, cc.grey)) {
|
||||
if (validateTransientMask(mask, anchored, false, cc.grey)) {
|
||||
bool eod = false;
|
||||
addTransientMask(*this, mask, reports, anchored, eod);
|
||||
return true;
|
||||
@@ -726,8 +733,8 @@ bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask,
|
||||
|
||||
bool RoseBuildImpl::validateMask(const vector<CharReach> &mask,
|
||||
UNUSED const ue2::flat_set<ReportID> &reports,
|
||||
UNUSED bool anchored, bool eod) const {
|
||||
return validateTransientMask(mask, eod, cc.grey);
|
||||
bool anchored, bool eod) const {
|
||||
return validateTransientMask(mask, anchored, eod, cc.grey);
|
||||
}
|
||||
|
||||
static
|
||||
|
@@ -433,6 +433,9 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) {
|
||||
|
||||
// If the bounds are {0,0}, this role can only match precisely at EOD.
|
||||
if (minBound == 0 && maxBound == 0) {
|
||||
/* last byte history will squash the state byte so cannot have other
|
||||
* succ */
|
||||
assert(out_degree(u, g) == 1);
|
||||
return ROSE_ROLE_HISTORY_LAST_BYTE;
|
||||
}
|
||||
|
||||
@@ -915,19 +918,32 @@ void RoseBuildImpl::findTransientLeftfixes(void) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 his = g[v].left.lag + max_width;
|
||||
if (cc.streaming) {
|
||||
/* STREAMING: transient prefixes must be able to run using history
|
||||
* rather than storing state. */
|
||||
u32 his = g[v].left.lag + max_width;
|
||||
|
||||
// If this vertex has an event literal, we need to add one to cope
|
||||
// with it.
|
||||
if (hasLiteralInTable(v, ROSE_EVENT)) {
|
||||
his++;
|
||||
}
|
||||
// If this vertex has an event literal, we need to add one to cope
|
||||
// with it.
|
||||
if (hasLiteralInTable(v, ROSE_EVENT)) {
|
||||
his++;
|
||||
}
|
||||
|
||||
/* +1 as trigger must appear in main buffer and no byte is needed to
|
||||
* decompress the state */
|
||||
if (his <= cc.grey.maxHistoryAvailable + 1) {
|
||||
transient.insert(left);
|
||||
DEBUG_PRINTF("a transient leftfix has been spotted his=%u\n", his);
|
||||
/* +1 as trigger must appear in main buffer and no byte is needed to
|
||||
* decompress the state */
|
||||
if (his <= cc.grey.maxHistoryAvailable + 1) {
|
||||
transient.insert(left);
|
||||
DEBUG_PRINTF("a transient leftfix spotted his=%u\n", his);
|
||||
}
|
||||
} else {
|
||||
/* BLOCK: transientness is less important and more fuzzy, ideally
|
||||
* it should be quick to calculate the state. No need to worry about
|
||||
* history (and hence lag). */
|
||||
if (max_width < depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH)) {
|
||||
transient.insert(left);
|
||||
DEBUG_PRINTF("a transient block leftfix spotted [%u]\n",
|
||||
(u32)max_width);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -61,28 +61,6 @@ using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
string to_string(nfa_kind k) {
|
||||
switch (k) {
|
||||
case NFA_PREFIX:
|
||||
return "PREFIX";
|
||||
case NFA_INFIX:
|
||||
return "INFIX";
|
||||
case NFA_SUFFIX:
|
||||
return "SUFFIX";
|
||||
case NFA_OUTFIX:
|
||||
return "OUTFIX";
|
||||
case NFA_REV_PREFIX:
|
||||
return "REV_PREFIX";
|
||||
case NFA_OUTFIX_RAW:
|
||||
return "OUTFIX_RAW";
|
||||
case NFA_EAGER_PREFIX:
|
||||
return "EAGER_PREFIX";
|
||||
}
|
||||
assert(0);
|
||||
return "?";
|
||||
}
|
||||
|
||||
/** \brief Return the kind of a left_id or a suffix_id. */
|
||||
template<class Graph>
|
||||
string render_kind(const Graph &g) {
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -684,6 +684,10 @@ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
|
||||
|
||||
lookaround.reserve(look.size());
|
||||
for (const auto &m : look) {
|
||||
if (m.first < -128 || m.first > 127) {
|
||||
DEBUG_PRINTF("range too big\n");
|
||||
return false;
|
||||
}
|
||||
s8 offset = verify_s8(m.first);
|
||||
lookaround.emplace_back(offset, m.second);
|
||||
}
|
||||
|
@@ -36,6 +36,9 @@
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Max allowed width for transient graphs in block mode */
|
||||
#define ROSE_BLOCK_TRANSIENT_MAX_WIDTH 255U
|
||||
|
||||
// Comparator for vertices using their index property.
|
||||
struct VertexIndexComp {
|
||||
VertexIndexComp(const RoseGraph &gg) : g(gg) {}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -107,7 +107,8 @@ void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey,
|
||||
size_t id = graph_ids.size();
|
||||
graph_ids[&*ig[e].graph] = id;
|
||||
}
|
||||
fprintf(f, "graph %zu", graph_ids[&*ig[e].graph]);
|
||||
fprintf(f, "graph %zu\n%s", graph_ids[&*ig[e].graph],
|
||||
to_string(ig[e].graph->kind).c_str());
|
||||
}
|
||||
if (ig[e].haig) {
|
||||
fprintf(f, "haig ");
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -106,6 +106,12 @@ public:
|
||||
ROSE_BOUND_INF);
|
||||
}
|
||||
|
||||
/* for when there is a suffix graph which handles the reports */
|
||||
static RoseInVertexProps makeAcceptEod() {
|
||||
return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), 0,
|
||||
ROSE_BOUND_INF);
|
||||
}
|
||||
|
||||
static RoseInVertexProps makeStart(bool anchored) {
|
||||
DEBUG_PRINTF("making %s\n", anchored ? "anchored start" : "start");
|
||||
if (anchored) {
|
||||
|
@@ -46,6 +46,11 @@ void calcVertexOffsets(RoseInGraph &ig);
|
||||
enum nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e);
|
||||
void pruneUseless(RoseInGraph &g);
|
||||
|
||||
inline
|
||||
bool is_any_accept(RoseInVertex v, const RoseInGraph &g) {
|
||||
return g[v].type == RIV_ACCEPT || g[v].type == RIV_ACCEPT_EOD;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -510,7 +510,8 @@ void runEagerPrefixesStream(const struct RoseEngine *t,
|
||||
}
|
||||
|
||||
void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||
DEBUG_PRINTF("OH HAI\n");
|
||||
DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset,
|
||||
scratch->core_info.buf_offset + (u64a)scratch->core_info.len);
|
||||
assert(t);
|
||||
assert(scratch->core_info.hbuf);
|
||||
assert(scratch->core_info.buf);
|
||||
|
Reference in New Issue
Block a user