mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-19 10:34:25 +03:00
ng: split NGWrapper into NGHolder, ExpressionInfo
We now use NGHolder for all graph information, while other expression properties (report, flag information, etc) go in new class ExpressionInfo.
This commit is contained in:
committed by
Matthew Barr
parent
fadfab6d8c
commit
5dfae12a62
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -47,6 +47,7 @@
|
||||
#include "ng_prune.h"
|
||||
#include "ng_redundancy.h"
|
||||
#include "ng_util.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "parser/position.h" // for POS flags
|
||||
#include "util/bitutils.h" // for findAndClearLSB_32
|
||||
#include "util/boundary_reports.h"
|
||||
@@ -184,43 +185,45 @@ void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
|
||||
}
|
||||
|
||||
static
|
||||
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
|
||||
void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
||||
NFAVertex v, s32 adj) {
|
||||
// Don't try and set the report ID of a special vertex.
|
||||
assert(!is_special(v, g));
|
||||
|
||||
// If there's a report set already, we're replacing it.
|
||||
g[v].reports.clear();
|
||||
|
||||
Report ir = rm.getBasicInternalReport(g, adj);
|
||||
Report ir = rm.getBasicInternalReport(expr, adj);
|
||||
|
||||
g[v].reports.insert(rm.getInternalId(ir));
|
||||
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
|
||||
}
|
||||
|
||||
static
|
||||
NFAVertex makeClone(ReportManager &rm, NGWrapper &g, NFAVertex v,
|
||||
const CharReach &cr_mask) {
|
||||
NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
||||
NFAVertex v, const CharReach &cr_mask) {
|
||||
NFAVertex clone = clone_vertex(g, v);
|
||||
g[clone].char_reach &= cr_mask;
|
||||
clone_out_edges(g, v, clone);
|
||||
clone_in_edges(g, v, clone);
|
||||
|
||||
if (v == g.startDs) {
|
||||
if (g.utf8) {
|
||||
if (expr.utf8) {
|
||||
g[clone].char_reach &= ~UTF_START_CR;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("marked as virt\n");
|
||||
g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
|
||||
setReportId(rm, g, clone, 0);
|
||||
setReportId(rm, g, expr, clone, 0);
|
||||
}
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
static
|
||||
void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
|
||||
void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
||||
NFAVertex v, bool ucp) {
|
||||
assert(v != g.start);
|
||||
assert(v != g.accept);
|
||||
assert(v != g.acceptEod);
|
||||
@@ -232,14 +235,14 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
|
||||
auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
|
||||
|
||||
// Split v into word/nonword vertices with only asserting out-edges.
|
||||
NFAVertex w_out = makeClone(rm, g, v, cr_word);
|
||||
NFAVertex nw_out = makeClone(rm, g, v, cr_nonword);
|
||||
NFAVertex w_out = makeClone(rm, g, expr, v, cr_word);
|
||||
NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword);
|
||||
remove_out_edge_if(w_out, has_no_assert, g);
|
||||
remove_out_edge_if(nw_out, has_no_assert, g);
|
||||
|
||||
// Split v into word/nonword vertices with only asserting in-edges.
|
||||
NFAVertex w_in = makeClone(rm, g, v, cr_word);
|
||||
NFAVertex nw_in = makeClone(rm, g, v, cr_nonword);
|
||||
NFAVertex w_in = makeClone(rm, g, expr, v, cr_word);
|
||||
NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword);
|
||||
remove_in_edge_if(w_in, has_no_assert, g);
|
||||
remove_in_edge_if(nw_in, has_no_assert, g);
|
||||
|
||||
@@ -250,7 +253,8 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
|
||||
}
|
||||
|
||||
static
|
||||
void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
||||
set<NFAEdge> *dead) {
|
||||
for (const auto &e : edges_range(g)) {
|
||||
u32 flags = g[e].assert_flags;
|
||||
if (!flags) {
|
||||
@@ -363,7 +367,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
} else if (v_w) {
|
||||
/* need to add a word byte */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
setReportId(rm, g, expr, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_WORD;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
@@ -372,7 +376,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
} else {
|
||||
/* need to add a non word byte or see eod */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
setReportId(rm, g, expr, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_NONWORD;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
@@ -416,7 +420,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
} else if (v_w) {
|
||||
/* need to add a word byte */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
setReportId(rm, g, expr, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
@@ -425,7 +429,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
} else {
|
||||
/* need to add a non word byte or see eod */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
setReportId(rm, g, expr, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
@@ -450,7 +454,8 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
}
|
||||
}
|
||||
|
||||
void resolveAsserts(ReportManager &rm, NGWrapper &g) {
|
||||
void resolveAsserts(ReportManager &rm, NGHolder &g,
|
||||
const ExpressionInfo &expr) {
|
||||
vector<NFAEdge> asserts = getAsserts(g);
|
||||
if (asserts.empty()) {
|
||||
return;
|
||||
@@ -460,20 +465,20 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) {
|
||||
map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
|
||||
findSplitters(g, asserts, &to_split, &to_split_ucp);
|
||||
if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
|
||||
throw CompileError(g.expressionIndex, "Pattern is too large.");
|
||||
throw CompileError(expr.index, "Pattern is too large.");
|
||||
}
|
||||
|
||||
for (const auto &m : to_split) {
|
||||
assert(!contains(to_split_ucp, m.first));
|
||||
splitVertex(rm, g, m.second, false);
|
||||
splitVertex(rm, g, expr, m.second, false);
|
||||
}
|
||||
|
||||
for (const auto &m : to_split_ucp) {
|
||||
splitVertex(rm, g, m.second, true);
|
||||
splitVertex(rm, g, expr, m.second, true);
|
||||
}
|
||||
|
||||
set<NFAEdge> dead;
|
||||
resolveEdges(rm, g, &dead);
|
||||
resolveEdges(rm, g, expr, &dead);
|
||||
|
||||
remove_edges(dead, g);
|
||||
renumber_vertices(g);
|
||||
@@ -485,15 +490,16 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) {
|
||||
clearReports(g);
|
||||
}
|
||||
|
||||
void ensureCodePointStart(ReportManager &rm, NGWrapper &g) {
|
||||
void ensureCodePointStart(ReportManager &rm, NGHolder &g,
|
||||
const ExpressionInfo &expr) {
|
||||
/* In utf8 mode there is an implicit assertion that we start at codepoint
|
||||
* boundaries. Assert resolution handles the badness coming from asserts.
|
||||
* The only other source of trouble is startDs->accept connections.
|
||||
*/
|
||||
NFAEdge orig = edge(g.startDs, g.accept, g);
|
||||
if (g.utf8 && orig) {
|
||||
DEBUG_PRINTF("rectifying %u\n", g.reportId);
|
||||
Report ir = rm.getBasicInternalReport(g);
|
||||
if (expr.utf8 && orig) {
|
||||
DEBUG_PRINTF("rectifying %u\n", expr.report);
|
||||
Report ir = rm.getBasicInternalReport(expr);
|
||||
ReportID rep = rm.getInternalId(ir);
|
||||
|
||||
NFAVertex v_a = add_vertex(g);
|
||||
|
||||
Reference in New Issue
Block a user