mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
ng: split NGWrapper into NGHolder, ExpressionInfo
We now use NGHolder for all graph information, while other expression properties (report, flag information, etc) go in new class ExpressionInfo.
This commit is contained in:
committed by
Matthew Barr
parent
fadfab6d8c
commit
5dfae12a62
@@ -27,10 +27,11 @@
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NG, NGHolder, NGWrapper and graph handling.
|
||||
* \brief NG and graph handling.
|
||||
*/
|
||||
#include "grey.h"
|
||||
#include "ng.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_anchored_acyclic.h"
|
||||
#include "ng_anchored_dots.h"
|
||||
#include "ng_asserts.h"
|
||||
@@ -62,6 +63,7 @@
|
||||
#include "ng_util.h"
|
||||
#include "ng_width.h"
|
||||
#include "ue2common.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "nfa/goughcompile.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "smallwrite/smallwrite_build.h"
|
||||
@@ -100,16 +102,16 @@ NG::~NG() {
|
||||
* \throw CompileError if SOM cannot be supported for the component.
|
||||
*/
|
||||
static
|
||||
bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
|
||||
bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
|
||||
const som_type som, const u32 comp_id) {
|
||||
DEBUG_PRINTF("doing som\n");
|
||||
dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey);
|
||||
dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey);
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
assert(allMatchStatesHaveReports(w));
|
||||
assert(allMatchStatesHaveReports(g));
|
||||
|
||||
// First, we try the "SOM chain" support in ng_som.cpp.
|
||||
|
||||
sombe_rv rv = doSom(ng, g, w, comp_id, som);
|
||||
sombe_rv rv = doSom(ng, g, expr, comp_id, som);
|
||||
if (rv == SOMBE_HANDLED_INTERNAL) {
|
||||
return false;
|
||||
} else if (rv == SOMBE_HANDLED_ALL) {
|
||||
@@ -118,7 +120,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
|
||||
assert(rv == SOMBE_FAIL);
|
||||
|
||||
/* Next, Sombe style approaches */
|
||||
rv = doSomWithHaig(ng, g, w, comp_id, som);
|
||||
rv = doSomWithHaig(ng, g, expr, comp_id, som);
|
||||
if (rv == SOMBE_HANDLED_INTERNAL) {
|
||||
return false;
|
||||
} else if (rv == SOMBE_HANDLED_ALL) {
|
||||
@@ -132,7 +134,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
|
||||
vector<vector<CharReach> > triggers; /* empty for outfix */
|
||||
|
||||
assert(g.kind == NFA_OUTFIX);
|
||||
dumpComponent(g, "haig", w.expressionIndex, comp_id, ng.cc.grey);
|
||||
dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey);
|
||||
makeReportsSomPass(ng.rm, g);
|
||||
auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
|
||||
ng.cc.grey);
|
||||
@@ -145,7 +147,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
|
||||
/* Our various strategies for supporting SOM for this pattern have failed.
|
||||
* Provide a generic pattern not supported/too large return value as it is
|
||||
* unclear what the meaning of a specific SOM error would be */
|
||||
throw CompileError(w.expressionIndex, "Pattern is too large.");
|
||||
throw CompileError(expr.index, "Pattern is too large.");
|
||||
|
||||
assert(0); // unreachable
|
||||
return false;
|
||||
@@ -200,21 +202,21 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8,
|
||||
}
|
||||
|
||||
static
|
||||
bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
|
||||
const u32 comp_id) {
|
||||
bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
|
||||
const som_type som, const u32 comp_id) {
|
||||
const CompileContext &cc = ng.cc;
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
|
||||
w.expressionIndex, comp_id, num_vertices(g), num_edges(g));
|
||||
expr.index, comp_id, num_vertices(g), num_edges(g));
|
||||
|
||||
dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey);
|
||||
dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey);
|
||||
|
||||
assert(allMatchStatesHaveReports(w));
|
||||
assert(allMatchStatesHaveReports(g));
|
||||
|
||||
reduceGraph(g, som, w.utf8, cc);
|
||||
reduceGraph(g, som, expr.utf8, cc);
|
||||
|
||||
dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey);
|
||||
dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey);
|
||||
|
||||
// There may be redundant regions that we can remove
|
||||
if (cc.grey.performGraphSimplification) {
|
||||
@@ -231,12 +233,12 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
|
||||
|
||||
// Start Of Match handling.
|
||||
if (som) {
|
||||
if (addComponentSom(ng, g, w, som, comp_id)) {
|
||||
if (addComponentSom(ng, g, expr, som, comp_id)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
assert(allMatchStatesHaveReports(w));
|
||||
assert(allMatchStatesHaveReports(g));
|
||||
|
||||
if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
|
||||
return true;
|
||||
@@ -251,11 +253,11 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (doViolet(*ng.rose, g, w.prefilter, false, ng.rm, cc)) {
|
||||
if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) {
|
||||
if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -268,7 +270,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (doViolet(*ng.rose, g, w.prefilter, true, ng.rm, cc)) {
|
||||
if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -283,7 +285,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
|
||||
|
||||
// Returns true if all components have been added.
|
||||
static
|
||||
bool processComponents(NG &ng, NGWrapper &w,
|
||||
bool processComponents(NG &ng, ExpressionInfo &expr,
|
||||
deque<unique_ptr<NGHolder>> &g_comp,
|
||||
const som_type som) {
|
||||
const u32 num_components = g_comp.size();
|
||||
@@ -293,7 +295,7 @@ bool processComponents(NG &ng, NGWrapper &w,
|
||||
if (!g_comp[i]) {
|
||||
continue;
|
||||
}
|
||||
if (addComponent(ng, *g_comp[i], w, som, i)) {
|
||||
if (addComponent(ng, *g_comp[i], expr, som, i)) {
|
||||
g_comp[i].reset();
|
||||
continue;
|
||||
}
|
||||
@@ -313,48 +315,48 @@ bool processComponents(NG &ng, NGWrapper &w,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NG::addGraph(NGWrapper &w) {
|
||||
bool NG::addGraph(ExpressionInfo &expr, NGHolder &g) {
|
||||
// remove reports that aren't on vertices connected to accept.
|
||||
clearReports(w);
|
||||
clearReports(g);
|
||||
|
||||
som_type som = w.som;
|
||||
if (som && isVacuous(w)) {
|
||||
throw CompileError(w.expressionIndex, "Start of match is not "
|
||||
som_type som = expr.som;
|
||||
if (som && isVacuous(g)) {
|
||||
throw CompileError(expr.index, "Start of match is not "
|
||||
"currently supported for patterns which match an "
|
||||
"empty buffer.");
|
||||
}
|
||||
|
||||
dumpDotWrapper(w, "01_initial", cc.grey);
|
||||
assert(allMatchStatesHaveReports(w));
|
||||
dumpDotWrapper(g, expr, "01_initial", cc.grey);
|
||||
assert(allMatchStatesHaveReports(g));
|
||||
|
||||
/* ensure utf8 starts at cp boundary */
|
||||
ensureCodePointStart(rm, w);
|
||||
ensureCodePointStart(rm, g, expr);
|
||||
|
||||
if (can_never_match(w)) {
|
||||
throw CompileError(w.expressionIndex, "Pattern can never match.");
|
||||
if (can_never_match(g)) {
|
||||
throw CompileError(expr.index, "Pattern can never match.");
|
||||
}
|
||||
|
||||
// validate graph's suitability for fuzzing before resolving asserts
|
||||
validate_fuzzy_compile(w, w.edit_distance, w.utf8, cc.grey);
|
||||
validate_fuzzy_compile(g, expr.edit_distance, expr.utf8, cc.grey);
|
||||
|
||||
resolveAsserts(rm, w);
|
||||
dumpDotWrapper(w, "02_post_assert_resolve", cc.grey);
|
||||
assert(allMatchStatesHaveReports(w));
|
||||
resolveAsserts(rm, g, expr);
|
||||
dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey);
|
||||
assert(allMatchStatesHaveReports(g));
|
||||
|
||||
make_fuzzy(w, w.edit_distance, cc.grey);
|
||||
dumpDotWrapper(w, "02a_post_fuzz", cc.grey);
|
||||
make_fuzzy(g, expr.edit_distance, cc.grey);
|
||||
dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey);
|
||||
|
||||
pruneUseless(w);
|
||||
pruneEmptyVertices(w);
|
||||
pruneUseless(g);
|
||||
pruneEmptyVertices(g);
|
||||
|
||||
if (can_never_match(w)) {
|
||||
throw CompileError(w.expressionIndex, "Pattern can never match.");
|
||||
if (can_never_match(g)) {
|
||||
throw CompileError(expr.index, "Pattern can never match.");
|
||||
}
|
||||
|
||||
optimiseVirtualStarts(w); /* good for som */
|
||||
optimiseVirtualStarts(g); /* good for som */
|
||||
|
||||
handleExtendedParams(rm, w, cc);
|
||||
if (w.min_length) {
|
||||
handleExtendedParams(rm, g, expr, cc);
|
||||
if (expr.min_length) {
|
||||
// We have a minimum length constraint, which we currently use SOM to
|
||||
// satisfy.
|
||||
som = SOM_LEFT;
|
||||
@@ -368,70 +370,70 @@ bool NG::addGraph(NGWrapper &w) {
|
||||
// first, we can perform graph work that can be done on an individual
|
||||
// expression basis.
|
||||
|
||||
if (w.utf8) {
|
||||
relaxForbiddenUtf8(w);
|
||||
if (expr.utf8) {
|
||||
relaxForbiddenUtf8(g, expr);
|
||||
}
|
||||
|
||||
if (w.highlander && !w.min_length && !w.min_offset) {
|
||||
if (expr.highlander && !expr.min_length && !expr.min_offset) {
|
||||
// In highlander mode: if we don't have constraints on our reports that
|
||||
// may prevent us accepting our first match (i.e. extended params) we
|
||||
// can prune the other out-edges of all vertices connected to accept.
|
||||
pruneHighlanderAccepts(w, rm);
|
||||
pruneHighlanderAccepts(g, rm);
|
||||
}
|
||||
|
||||
dumpDotWrapper(w, "02b_fairly_early", cc.grey);
|
||||
dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey);
|
||||
|
||||
// If we're a vacuous pattern, we can handle this early.
|
||||
if (splitOffVacuous(boundary, rm, w)) {
|
||||
if (splitOffVacuous(boundary, rm, g, expr)) {
|
||||
DEBUG_PRINTF("split off vacuous\n");
|
||||
}
|
||||
|
||||
// We might be done at this point: if we've run out of vertices, we can
|
||||
// stop processing.
|
||||
if (num_vertices(w) == N_SPECIALS) {
|
||||
if (num_vertices(g) == N_SPECIALS) {
|
||||
DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Now that vacuous edges have been removed, update the min width exclusive
|
||||
// of boundary reports.
|
||||
minWidth = min(minWidth, findMinWidth(w));
|
||||
minWidth = min(minWidth, findMinWidth(g));
|
||||
|
||||
// Add the pattern to the small write builder.
|
||||
smwr->add(w);
|
||||
smwr->add(g, expr);
|
||||
|
||||
if (!som) {
|
||||
removeSiblingsOfStartDotStar(w);
|
||||
removeSiblingsOfStartDotStar(g);
|
||||
}
|
||||
|
||||
dumpDotWrapper(w, "03_early", cc.grey);
|
||||
dumpDotWrapper(g, expr, "03_early", cc.grey);
|
||||
|
||||
// Perform a reduction pass to merge sibling character classes together.
|
||||
if (cc.grey.performGraphSimplification) {
|
||||
removeRedundancy(w, som);
|
||||
prunePathsRedundantWithSuccessorOfCyclics(w, som);
|
||||
removeRedundancy(g, som);
|
||||
prunePathsRedundantWithSuccessorOfCyclics(g, som);
|
||||
}
|
||||
|
||||
dumpDotWrapper(w, "04_reduced", cc.grey);
|
||||
dumpDotWrapper(g, expr, "04_reduced", cc.grey);
|
||||
|
||||
// If we've got some literals that span the graph from start to accept, we
|
||||
// can split them off into Rose from here.
|
||||
if (!som) {
|
||||
if (splitOffLiterals(*this, w)) {
|
||||
if (splitOffLiterals(*this, g)) {
|
||||
DEBUG_PRINTF("some vertices claimed by literals\n");
|
||||
}
|
||||
}
|
||||
|
||||
// We might be done at this point: if we've run out of vertices, we can
|
||||
// stop processing.
|
||||
if (num_vertices(w) == N_SPECIALS) {
|
||||
if (num_vertices(g) == N_SPECIALS) {
|
||||
DEBUG_PRINTF("all vertices claimed before calc components\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Split the graph into a set of connected components.
|
||||
|
||||
deque<unique_ptr<NGHolder>> g_comp = calcComponents(w);
|
||||
deque<unique_ptr<NGHolder>> g_comp = calcComponents(g);
|
||||
assert(!g_comp.empty());
|
||||
|
||||
if (!som) {
|
||||
@@ -443,14 +445,14 @@ bool NG::addGraph(NGWrapper &w) {
|
||||
recalcComponents(g_comp);
|
||||
}
|
||||
|
||||
if (processComponents(*this, w, g_comp, som)) {
|
||||
if (processComponents(*this, expr, g_comp, som)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If we're in prefiltering mode, we can run the prefilter reductions and
|
||||
// have another shot at accepting the graph.
|
||||
|
||||
if (cc.grey.prefilterReductions && w.prefilter) {
|
||||
if (cc.grey.prefilterReductions && expr.prefilter) {
|
||||
for (u32 i = 0; i < g_comp.size(); i++) {
|
||||
if (!g_comp[i]) {
|
||||
continue;
|
||||
@@ -459,7 +461,7 @@ bool NG::addGraph(NGWrapper &w) {
|
||||
prefilterReductions(*g_comp[i], cc);
|
||||
}
|
||||
|
||||
if (processComponents(*this, w, g_comp, som)) {
|
||||
if (processComponents(*this, expr, g_comp, som)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -469,7 +471,7 @@ bool NG::addGraph(NGWrapper &w) {
|
||||
if (g_comp[i]) {
|
||||
DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
|
||||
i, num_vertices(*g_comp[i]));
|
||||
throw CompileError(w.expressionIndex, "Pattern is too large.");
|
||||
throw CompileError(expr.index, "Pattern is too large.");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -478,60 +480,60 @@ bool NG::addGraph(NGWrapper &w) {
|
||||
}
|
||||
|
||||
/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
|
||||
bool NG::addHolder(NGHolder &w) {
|
||||
DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(w));
|
||||
assert(allMatchStatesHaveReports(w));
|
||||
assert(hasCorrectlyNumberedVertices(w));
|
||||
bool NG::addHolder(NGHolder &g) {
|
||||
DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g));
|
||||
assert(allMatchStatesHaveReports(g));
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
/* We don't update the global minWidth here as we care about the min width
|
||||
* of the whole pattern - not a just a prefix of it. */
|
||||
|
||||
bool prefilter = false;
|
||||
//dumpDotComp(comp, w, *this, 20, "prefix_init");
|
||||
//dumpDotComp(comp, g, *this, 20, "prefix_init");
|
||||
|
||||
som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
|
||||
themselves track som */
|
||||
bool utf8 = false; // handling done earlier
|
||||
reduceGraph(w, som, utf8, cc);
|
||||
reduceGraph(g, som, utf8, cc);
|
||||
|
||||
// There may be redundant regions that we can remove
|
||||
if (cc.grey.performGraphSimplification) {
|
||||
removeRegionRedundancy(w, som);
|
||||
removeRegionRedundancy(g, som);
|
||||
}
|
||||
|
||||
// "Short Exhaustible Passthrough" patterns always become outfixes.
|
||||
if (isSEP(w, rm, cc.grey)) {
|
||||
if (isSEP(g, rm, cc.grey)) {
|
||||
DEBUG_PRINTF("graph is SEP\n");
|
||||
if (rose->addOutfix(w)) {
|
||||
if (rose->addOutfix(g)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (splitOffAnchoredAcyclic(*rose, w, cc)) {
|
||||
if (splitOffAnchoredAcyclic(*rose, g, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (handleSmallLiteralSets(*rose, w, cc)
|
||||
|| handleFixedWidth(*rose, w, cc.grey)) {
|
||||
if (handleSmallLiteralSets(*rose, g, cc)
|
||||
|| handleFixedWidth(*rose, g, cc.grey)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (handleDecoratedLiterals(*rose, w, cc)) {
|
||||
if (handleDecoratedLiterals(*rose, g, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (doViolet(*rose, w, prefilter, false, rm, cc)) {
|
||||
if (doViolet(*rose, g, prefilter, false, rm, cc)) {
|
||||
return true;
|
||||
}
|
||||
if (splitOffPuffs(*rose, rm, w, prefilter, cc)) {
|
||||
if (splitOffPuffs(*rose, rm, g, prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
if (doViolet(*rose, w, prefilter, true, rm, cc)) {
|
||||
if (doViolet(*rose, g, prefilter, true, rm, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("trying for outfix\n");
|
||||
if (rose->addOutfix(w)) {
|
||||
if (rose->addOutfix(g)) {
|
||||
DEBUG_PRINTF("ok\n");
|
||||
return true;
|
||||
}
|
||||
@@ -586,26 +588,4 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
|
||||
return true;
|
||||
}
|
||||
|
||||
NGWrapper::NGWrapper(unsigned int ei, bool highlander_in, bool utf8_in,
|
||||
bool prefilter_in, som_type som_in, ReportID r,
|
||||
u64a min_offset_in, u64a max_offset_in, u64a min_length_in,
|
||||
u32 edit_distance_in)
|
||||
: expressionIndex(ei), reportId(r), highlander(highlander_in),
|
||||
utf8(utf8_in), prefilter(prefilter_in), som(som_in),
|
||||
min_offset(min_offset_in), max_offset(max_offset_in),
|
||||
min_length(min_length_in), edit_distance(edit_distance_in) {
|
||||
// All special nodes/edges are added in NGHolder's constructor.
|
||||
DEBUG_PRINTF("built %p: expr=%u report=%u%s%s%s%s "
|
||||
"min_offset=%llu max_offset=%llu min_length=%llu "
|
||||
"edit_distance=%u\n",
|
||||
this, expressionIndex, reportId,
|
||||
highlander ? " highlander" : "",
|
||||
utf8 ? " utf8" : "",
|
||||
prefilter ? " prefilter" : "",
|
||||
(som != SOM_NONE) ? " som" : "",
|
||||
min_offset, max_offset, min_length, edit_distance);
|
||||
}
|
||||
|
||||
NGWrapper::~NGWrapper() {}
|
||||
|
||||
} // namespace ue2
|
||||
|
@@ -27,7 +27,7 @@
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NG, NGHolder, NGWrapper declarations.
|
||||
* \brief NG declaration.
|
||||
*/
|
||||
|
||||
#ifndef NG_H
|
||||
@@ -58,31 +58,7 @@ namespace ue2 {
|
||||
struct CompileContext;
|
||||
struct ue2_literal;
|
||||
|
||||
class NGWrapper : public NGHolder {
|
||||
public:
|
||||
NGWrapper(unsigned int expressionIndex, bool highlander, bool utf8,
|
||||
bool prefilter, const som_type som, ReportID rid, u64a min_offset,
|
||||
u64a max_offset, u64a min_length, u32 edit_distance);
|
||||
|
||||
~NGWrapper() override;
|
||||
|
||||
/** index of the expression represented by this graph, used
|
||||
* - down the track in error handling
|
||||
* - identifying parts of an expression in highlander mode
|
||||
*/
|
||||
const unsigned int expressionIndex;
|
||||
|
||||
const ReportID reportId; /**< user-visible report id */
|
||||
const bool highlander; /**< user-specified single match only */
|
||||
const bool utf8; /**< UTF-8 mode */
|
||||
const bool prefilter; /**< prefiltering mode */
|
||||
const som_type som; /**< SOM type requested */
|
||||
u64a min_offset; /**< extparam min_offset value */
|
||||
u64a max_offset; /**< extparam max_offset value */
|
||||
u64a min_length; /**< extparam min_length value */
|
||||
u32 edit_distance; /**< extparam edit_distance value */
|
||||
};
|
||||
|
||||
class ExpressionInfo;
|
||||
class RoseBuild;
|
||||
class SmallWriteBuild;
|
||||
|
||||
@@ -94,14 +70,14 @@ public:
|
||||
|
||||
/** \brief Consumes a pattern, returns false or throws a CompileError
|
||||
* exception if the graph cannot be consumed. */
|
||||
bool addGraph(NGWrapper &w);
|
||||
bool addGraph(ExpressionInfo &expr, NGHolder &h);
|
||||
|
||||
/** \brief Consumes a graph, cut-down version of addGraph for use by SOM
|
||||
* processing. */
|
||||
bool addHolder(NGHolder &h);
|
||||
|
||||
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead of
|
||||
* using \ref addGraph) */
|
||||
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead
|
||||
* of using \ref addGraph) */
|
||||
bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
|
||||
bool highlander, som_type som);
|
||||
|
||||
@@ -128,7 +104,8 @@ public:
|
||||
*
|
||||
* Shared with the small write compiler.
|
||||
*/
|
||||
void reduceGraph(NGHolder &g, som_type som, bool utf8, const CompileContext &cc);
|
||||
void reduceGraph(NGHolder &g, som_type som, bool utf8,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -47,6 +47,7 @@
|
||||
#include "ng_prune.h"
|
||||
#include "ng_redundancy.h"
|
||||
#include "ng_util.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "parser/position.h" // for POS flags
|
||||
#include "util/bitutils.h" // for findAndClearLSB_32
|
||||
#include "util/boundary_reports.h"
|
||||
@@ -184,43 +185,45 @@ void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
|
||||
}
|
||||
|
||||
static
|
||||
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
|
||||
void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
||||
NFAVertex v, s32 adj) {
|
||||
// Don't try and set the report ID of a special vertex.
|
||||
assert(!is_special(v, g));
|
||||
|
||||
// If there's a report set already, we're replacing it.
|
||||
g[v].reports.clear();
|
||||
|
||||
Report ir = rm.getBasicInternalReport(g, adj);
|
||||
Report ir = rm.getBasicInternalReport(expr, adj);
|
||||
|
||||
g[v].reports.insert(rm.getInternalId(ir));
|
||||
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
|
||||
}
|
||||
|
||||
static
|
||||
NFAVertex makeClone(ReportManager &rm, NGWrapper &g, NFAVertex v,
|
||||
const CharReach &cr_mask) {
|
||||
NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
||||
NFAVertex v, const CharReach &cr_mask) {
|
||||
NFAVertex clone = clone_vertex(g, v);
|
||||
g[clone].char_reach &= cr_mask;
|
||||
clone_out_edges(g, v, clone);
|
||||
clone_in_edges(g, v, clone);
|
||||
|
||||
if (v == g.startDs) {
|
||||
if (g.utf8) {
|
||||
if (expr.utf8) {
|
||||
g[clone].char_reach &= ~UTF_START_CR;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("marked as virt\n");
|
||||
g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
|
||||
setReportId(rm, g, clone, 0);
|
||||
setReportId(rm, g, expr, clone, 0);
|
||||
}
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
static
|
||||
void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
|
||||
void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
||||
NFAVertex v, bool ucp) {
|
||||
assert(v != g.start);
|
||||
assert(v != g.accept);
|
||||
assert(v != g.acceptEod);
|
||||
@@ -232,14 +235,14 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
|
||||
auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
|
||||
|
||||
// Split v into word/nonword vertices with only asserting out-edges.
|
||||
NFAVertex w_out = makeClone(rm, g, v, cr_word);
|
||||
NFAVertex nw_out = makeClone(rm, g, v, cr_nonword);
|
||||
NFAVertex w_out = makeClone(rm, g, expr, v, cr_word);
|
||||
NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword);
|
||||
remove_out_edge_if(w_out, has_no_assert, g);
|
||||
remove_out_edge_if(nw_out, has_no_assert, g);
|
||||
|
||||
// Split v into word/nonword vertices with only asserting in-edges.
|
||||
NFAVertex w_in = makeClone(rm, g, v, cr_word);
|
||||
NFAVertex nw_in = makeClone(rm, g, v, cr_nonword);
|
||||
NFAVertex w_in = makeClone(rm, g, expr, v, cr_word);
|
||||
NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword);
|
||||
remove_in_edge_if(w_in, has_no_assert, g);
|
||||
remove_in_edge_if(nw_in, has_no_assert, g);
|
||||
|
||||
@@ -250,7 +253,8 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
|
||||
}
|
||||
|
||||
static
|
||||
void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
|
||||
set<NFAEdge> *dead) {
|
||||
for (const auto &e : edges_range(g)) {
|
||||
u32 flags = g[e].assert_flags;
|
||||
if (!flags) {
|
||||
@@ -363,7 +367,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
} else if (v_w) {
|
||||
/* need to add a word byte */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
setReportId(rm, g, expr, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_WORD;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
@@ -372,7 +376,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
} else {
|
||||
/* need to add a non word byte or see eod */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
setReportId(rm, g, expr, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_NONWORD;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
@@ -416,7 +420,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
} else if (v_w) {
|
||||
/* need to add a word byte */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
setReportId(rm, g, expr, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
@@ -425,7 +429,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
} else {
|
||||
/* need to add a non word byte or see eod */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
setReportId(rm, g, expr, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
@@ -450,7 +454,8 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
}
|
||||
}
|
||||
|
||||
void resolveAsserts(ReportManager &rm, NGWrapper &g) {
|
||||
void resolveAsserts(ReportManager &rm, NGHolder &g,
|
||||
const ExpressionInfo &expr) {
|
||||
vector<NFAEdge> asserts = getAsserts(g);
|
||||
if (asserts.empty()) {
|
||||
return;
|
||||
@@ -460,20 +465,20 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) {
|
||||
map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
|
||||
findSplitters(g, asserts, &to_split, &to_split_ucp);
|
||||
if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
|
||||
throw CompileError(g.expressionIndex, "Pattern is too large.");
|
||||
throw CompileError(expr.index, "Pattern is too large.");
|
||||
}
|
||||
|
||||
for (const auto &m : to_split) {
|
||||
assert(!contains(to_split_ucp, m.first));
|
||||
splitVertex(rm, g, m.second, false);
|
||||
splitVertex(rm, g, expr, m.second, false);
|
||||
}
|
||||
|
||||
for (const auto &m : to_split_ucp) {
|
||||
splitVertex(rm, g, m.second, true);
|
||||
splitVertex(rm, g, expr, m.second, true);
|
||||
}
|
||||
|
||||
set<NFAEdge> dead;
|
||||
resolveEdges(rm, g, &dead);
|
||||
resolveEdges(rm, g, expr, &dead);
|
||||
|
||||
remove_edges(dead, g);
|
||||
renumber_vertices(g);
|
||||
@@ -485,15 +490,16 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) {
|
||||
clearReports(g);
|
||||
}
|
||||
|
||||
void ensureCodePointStart(ReportManager &rm, NGWrapper &g) {
|
||||
void ensureCodePointStart(ReportManager &rm, NGHolder &g,
|
||||
const ExpressionInfo &expr) {
|
||||
/* In utf8 mode there is an implicit assertion that we start at codepoint
|
||||
* boundaries. Assert resolution handles the badness coming from asserts.
|
||||
* The only other source of trouble is startDs->accept connections.
|
||||
*/
|
||||
NFAEdge orig = edge(g.startDs, g.accept, g);
|
||||
if (g.utf8 && orig) {
|
||||
DEBUG_PRINTF("rectifying %u\n", g.reportId);
|
||||
Report ir = rm.getBasicInternalReport(g);
|
||||
if (expr.utf8 && orig) {
|
||||
DEBUG_PRINTF("rectifying %u\n", expr.report);
|
||||
Report ir = rm.getBasicInternalReport(expr);
|
||||
ReportID rep = rm.getInternalId(ir);
|
||||
|
||||
NFAVertex v_a = add_vertex(g);
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -36,12 +36,14 @@
|
||||
namespace ue2 {
|
||||
|
||||
struct BoundaryReports;
|
||||
class NGWrapper;
|
||||
class ExpressionInfo;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
void resolveAsserts(ReportManager &rm, NGWrapper &g);
|
||||
void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr);
|
||||
|
||||
void ensureCodePointStart(ReportManager &rm, NGWrapper &g);
|
||||
void ensureCodePointStart(ReportManager &rm, NGHolder &g,
|
||||
const ExpressionInfo &expr);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@@ -28,11 +28,13 @@
|
||||
|
||||
/** \file
|
||||
* \brief: NFA Graph Builder: used by Glushkov construction to construct an
|
||||
* NGWrapper from a parsed expression.
|
||||
* NGHolder from a parsed expression.
|
||||
*/
|
||||
|
||||
#include "ng_builder.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng.h"
|
||||
#include "ng_builder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "compiler/compiler.h" // for ParsedExpression
|
||||
@@ -79,7 +81,7 @@ public:
|
||||
void cloneRegion(Position first, Position last,
|
||||
unsigned posOffset) override;
|
||||
|
||||
unique_ptr<NGWrapper> getGraph() override;
|
||||
BuiltExpression getGraph() override;
|
||||
|
||||
private:
|
||||
/** fetch a vertex given its Position ID. */
|
||||
@@ -94,8 +96,11 @@ private:
|
||||
/** \brief Greybox: used for resource limits. */
|
||||
const Grey &grey;
|
||||
|
||||
/** \brief Underlying NGWrapper graph. */
|
||||
unique_ptr<NGWrapper> graph;
|
||||
/** \brief Underlying graph. */
|
||||
unique_ptr<NGHolder> graph;
|
||||
|
||||
/** \brief Underlying expression info. */
|
||||
ExpressionInfo expr;
|
||||
|
||||
/** \brief mapping from position to vertex. Use \ref getVertex for access.
|
||||
* */
|
||||
@@ -108,13 +113,9 @@ private:
|
||||
} // namespace
|
||||
|
||||
NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in,
|
||||
const ParsedExpression &expr)
|
||||
: rm(rm_in), grey(grey_in),
|
||||
graph(ue2::make_unique<NGWrapper>(
|
||||
expr.index, expr.highlander, expr.utf8, expr.prefilter, expr.som,
|
||||
expr.id, expr.min_offset, expr.max_offset, expr.min_length,
|
||||
expr.edit_distance)),
|
||||
vertIdx(N_SPECIALS) {
|
||||
const ParsedExpression &parsed)
|
||||
: rm(rm_in), grey(grey_in), graph(ue2::make_unique<NGHolder>()),
|
||||
expr(parsed.expr), vertIdx(N_SPECIALS) {
|
||||
|
||||
// Reserve space for a reasonably-sized NFA
|
||||
id2vertex.reserve(64);
|
||||
@@ -151,7 +152,7 @@ void NFABuilderImpl::addVertex(Position pos) {
|
||||
(*graph)[v].index = pos;
|
||||
}
|
||||
|
||||
unique_ptr<NGWrapper> NFABuilderImpl::getGraph() {
|
||||
BuiltExpression NFABuilderImpl::getGraph() {
|
||||
DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n",
|
||||
num_vertices(*graph), num_edges(*graph));
|
||||
|
||||
@@ -162,13 +163,13 @@ unique_ptr<NGWrapper> NFABuilderImpl::getGraph() {
|
||||
throw CompileError("Pattern too large.");
|
||||
}
|
||||
|
||||
return move(graph);
|
||||
return { expr, move(graph) };
|
||||
}
|
||||
|
||||
void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) {
|
||||
Report ir = rm.getBasicInternalReport(*graph, offsetAdjust);
|
||||
Report ir = rm.getBasicInternalReport(expr, offsetAdjust);
|
||||
DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n",
|
||||
pos, graph->reportId, offsetAdjust, ir.ekey);
|
||||
pos, expr.report, offsetAdjust, ir.ekey);
|
||||
|
||||
NFAVertex v = getVertex(pos);
|
||||
auto &reports = (*graph)[v].reports;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
/** \file
|
||||
* \brief: NFA Graph Builder: used by Glushkov construction to construct an
|
||||
* NGWrapper from a parsed expression.
|
||||
* NGHolder from a parsed expression.
|
||||
*/
|
||||
|
||||
#ifndef NG_BUILDER_H
|
||||
@@ -44,8 +44,8 @@
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
class NGWrapper;
|
||||
class ReportManager;
|
||||
struct BuiltExpression;
|
||||
struct CompileContext;
|
||||
|
||||
class ParsedExpression;
|
||||
@@ -83,10 +83,10 @@ public:
|
||||
unsigned posOffset) = 0;
|
||||
|
||||
/**
|
||||
* \brief Returns the built NGWrapper graph.
|
||||
* \brief Returns the built NGHolder graph and ExpressionInfo.
|
||||
* Note that this builder cannot be used after this call.
|
||||
*/
|
||||
virtual std::unique_ptr<NGWrapper> getGraph() = 0;
|
||||
virtual BuiltExpression getGraph() = 0;
|
||||
};
|
||||
|
||||
/** Construct a usable NFABuilder. */
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -35,24 +35,25 @@
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "ng_dump.h"
|
||||
#include "nfagraph/ng_dump.h"
|
||||
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "ng.h"
|
||||
#include "ng_util.h"
|
||||
#include "parser/position.h"
|
||||
#include "hs_compile.h" /* for HS_MODE_* flags */
|
||||
#include "ue2common.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "nfa/accel.h"
|
||||
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
|
||||
#include "smallwrite/smallwrite_dump.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "parser/position.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_internal.h"
|
||||
#include "smallwrite/smallwrite_dump.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/report.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "hs_compile.h" /* for HS_MODE_* flags */
|
||||
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
@@ -287,13 +288,13 @@ void dumpGraphImpl(const char *name, const GraphT &g,
|
||||
// manual instantiation of templated dumpGraph above.
|
||||
template void dumpGraphImpl(const char *, const NGHolder &);
|
||||
|
||||
void dumpDotWrapperImpl(const NGWrapper &nw, const char *name,
|
||||
const Grey &grey) {
|
||||
void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
|
||||
const char *name, const Grey &grey) {
|
||||
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot";
|
||||
ss << grey.dumpPath << "Expr_" << expr.index << "_" << name << ".dot";
|
||||
DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str());
|
||||
dumpGraphImpl(ss.str().c_str(), nw);
|
||||
dumpGraphImpl(ss.str().c_str(), g);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -48,7 +48,7 @@ namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class NG;
|
||||
class NGWrapper;
|
||||
class ExpressionInfo;
|
||||
class ReportManager;
|
||||
|
||||
// Implementations for stubs below -- all have the suffix "Impl".
|
||||
@@ -61,7 +61,8 @@ void dumpGraphImpl(const char *name, const GraphT &g);
|
||||
template <typename GraphT>
|
||||
void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm);
|
||||
|
||||
void dumpDotWrapperImpl(const NGWrapper &w, const char *name, const Grey &grey);
|
||||
void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
|
||||
const char *name, const Grey &grey);
|
||||
|
||||
void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp,
|
||||
const Grey &grey);
|
||||
@@ -88,10 +89,10 @@ static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) {
|
||||
// Stubs which call through to dump code if compiled in.
|
||||
|
||||
UNUSED static inline
|
||||
void dumpDotWrapper(UNUSED const NGWrapper &w, UNUSED const char *name,
|
||||
UNUSED const Grey &grey) {
|
||||
void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr,
|
||||
UNUSED const char *name, UNUSED const Grey &grey) {
|
||||
#ifdef DUMP_SUPPORT
|
||||
dumpDotWrapperImpl(w, name, grey);
|
||||
dumpDotWrapperImpl(g, expr, name, grey);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@@ -27,8 +27,8 @@
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Code for discovering properties of an NGWrapper used by
|
||||
* hs_expression_info.
|
||||
* \brief Code for discovering properties of an NFA graph used by
|
||||
* hs_expression_info().
|
||||
*/
|
||||
#include "ng_expr_info.h"
|
||||
|
||||
@@ -58,42 +58,42 @@ namespace ue2 {
|
||||
|
||||
/* get rid of leading \b and multiline ^ vertices */
|
||||
static
|
||||
void removeLeadingVirtualVerticesFromRoot(NGWrapper &w, NFAVertex root) {
|
||||
void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) {
|
||||
vector<NFAVertex> victims;
|
||||
|
||||
for (auto v : adjacent_vertices_range(root, w)) {
|
||||
if (w[v].assert_flags & POS_FLAG_VIRTUAL_START) {
|
||||
for (auto v : adjacent_vertices_range(root, g)) {
|
||||
if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
|
||||
DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
|
||||
victims.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto u : victims) {
|
||||
for (auto v : adjacent_vertices_range(u, w)) {
|
||||
add_edge_if_not_present(root, v, w);
|
||||
for (auto v : adjacent_vertices_range(u, g)) {
|
||||
add_edge_if_not_present(root, v, g);
|
||||
}
|
||||
}
|
||||
|
||||
remove_vertices(victims, w);
|
||||
remove_vertices(victims, g);
|
||||
}
|
||||
|
||||
static
|
||||
void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
|
||||
void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v,
|
||||
const vector<DepthMinMax> &depths, DepthMinMax &info) {
|
||||
if (is_any_accept(v, w)) {
|
||||
if (is_any_accept(v, g)) {
|
||||
return;
|
||||
}
|
||||
if (is_any_start(v, w)) {
|
||||
if (is_any_start(v, g)) {
|
||||
info.min = 0;
|
||||
info.max = max(info.max, depth(0));
|
||||
return;
|
||||
}
|
||||
|
||||
u32 idx = w[v].index;
|
||||
u32 idx = g[v].index;
|
||||
assert(idx < depths.size());
|
||||
const DepthMinMax &d = depths.at(idx);
|
||||
|
||||
for (ReportID report_id : w[v].reports) {
|
||||
for (ReportID report_id : g[v].reports) {
|
||||
const Report &report = rm.getReport(report_id);
|
||||
assert(report.type == EXTERNAL_CALLBACK);
|
||||
|
||||
@@ -118,7 +118,7 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
|
||||
rd.max = min(rd.max, max_offset);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("vertex %zu report %u: %s\n", w[v].index, report_id,
|
||||
DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id,
|
||||
rd.str().c_str());
|
||||
|
||||
info = unionDepthMinMax(info, rd);
|
||||
@@ -126,8 +126,8 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
|
||||
}
|
||||
|
||||
static
|
||||
bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) {
|
||||
for (const auto &report_id : all_reports(w)) {
|
||||
bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) {
|
||||
for (const auto &report_id : all_reports(g)) {
|
||||
if (rm.getReport(report_id).offsetAdjust) {
|
||||
return true;
|
||||
}
|
||||
@@ -135,28 +135,29 @@ bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) {
|
||||
void fillExpressionInfo(ReportManager &rm, NGHolder &g,
|
||||
const ExpressionInfo &expr, hs_expr_info *info) {
|
||||
assert(info);
|
||||
|
||||
/* ensure utf8 starts at cp boundary */
|
||||
ensureCodePointStart(rm, w);
|
||||
resolveAsserts(rm, w);
|
||||
optimiseVirtualStarts(w);
|
||||
ensureCodePointStart(rm, g, expr);
|
||||
resolveAsserts(rm, g, expr);
|
||||
optimiseVirtualStarts(g);
|
||||
|
||||
removeLeadingVirtualVerticesFromRoot(w, w.start);
|
||||
removeLeadingVirtualVerticesFromRoot(w, w.startDs);
|
||||
removeLeadingVirtualVerticesFromRoot(g, g.start);
|
||||
removeLeadingVirtualVerticesFromRoot(g, g.startDs);
|
||||
|
||||
vector<DepthMinMax> depths;
|
||||
calcDepthsFrom(w, w.start, depths);
|
||||
calcDepthsFrom(g, g.start, depths);
|
||||
|
||||
DepthMinMax d;
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(w.accept, w)) {
|
||||
checkVertex(rm, w, u, depths, d);
|
||||
for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
checkVertex(rm, g, u, depths, d);
|
||||
}
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(w.acceptEod, w)) {
|
||||
checkVertex(rm, w, u, depths, d);
|
||||
for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
checkVertex(rm, g, u, depths, d);
|
||||
}
|
||||
|
||||
if (d.max.is_finite()) {
|
||||
@@ -170,9 +171,9 @@ void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) {
|
||||
info->min_width = UINT_MAX;
|
||||
}
|
||||
|
||||
info->unordered_matches = hasOffsetAdjust(rm, w);
|
||||
info->matches_at_eod = can_match_at_eod(w);
|
||||
info->matches_only_at_eod = can_only_match_at_eod(w);
|
||||
info->unordered_matches = hasOffsetAdjust(rm, g);
|
||||
info->matches_at_eod = can_match_at_eod(g);
|
||||
info->matches_only_at_eod = can_only_match_at_eod(g);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -27,7 +27,7 @@
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Code for discovering properties of an NGWrapper used by
|
||||
* \brief Code for discovering properties of an expression used by
|
||||
* hs_expression_info.
|
||||
*/
|
||||
|
||||
@@ -36,14 +36,14 @@
|
||||
|
||||
struct hs_expr_info;
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGWrapper;
|
||||
class ExpressionInfo;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info);
|
||||
void fillExpressionInfo(ReportManager &rm, NGHolder &g,
|
||||
const ExpressionInfo &expr, hs_expr_info *info);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -38,16 +38,19 @@
|
||||
* match given these constraints, or transform the graph in order to make a
|
||||
* constraint implicit.
|
||||
*/
|
||||
|
||||
#include "ng_extparam.h"
|
||||
|
||||
#include "ng.h"
|
||||
#include "ng_depth.h"
|
||||
#include "ng_dump.h"
|
||||
#include "ng_extparam.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_som_util.h"
|
||||
#include "ng_width.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "parser/position.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
@@ -129,7 +132,8 @@ DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
|
||||
|
||||
/** \brief Replace the graph's reports with new reports that specify bounds. */
|
||||
static
|
||||
void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept,
|
||||
void updateReportBounds(ReportManager &rm, NGHolder &g,
|
||||
const ExpressionInfo &expr, NFAVertex accept,
|
||||
set<NFAVertex> &done) {
|
||||
for (auto v : inv_adjacent_vertices_range(accept, g)) {
|
||||
// Don't operate on g.accept itself.
|
||||
@@ -153,16 +157,16 @@ void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept,
|
||||
|
||||
// Note that we need to cope with offset adjustment here.
|
||||
|
||||
ir.minOffset = g.min_offset - ir.offsetAdjust;
|
||||
if (g.max_offset == MAX_OFFSET) {
|
||||
ir.minOffset = expr.min_offset - ir.offsetAdjust;
|
||||
if (expr.max_offset == MAX_OFFSET) {
|
||||
ir.maxOffset = MAX_OFFSET;
|
||||
} else {
|
||||
ir.maxOffset = g.max_offset - ir.offsetAdjust;
|
||||
ir.maxOffset = expr.max_offset - ir.offsetAdjust;
|
||||
}
|
||||
assert(ir.maxOffset >= ir.minOffset);
|
||||
|
||||
ir.minLength = g.min_length;
|
||||
if (g.min_length && !g.som) {
|
||||
ir.minLength = expr.min_length;
|
||||
if (expr.min_length && !expr.som) {
|
||||
ir.quashSom = true;
|
||||
}
|
||||
|
||||
@@ -196,22 +200,23 @@ bool hasVirtualStarts(const NGHolder &g) {
|
||||
* anchored and unanchored paths, but it's too tricky for the moment.
|
||||
*/
|
||||
static
|
||||
bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
|
||||
bool anchorPatternWithBoundedRepeat(NGHolder &g, const ExpressionInfo &expr,
|
||||
const depth &minWidth,
|
||||
const depth &maxWidth) {
|
||||
assert(!g.som);
|
||||
assert(g.max_offset != MAX_OFFSET);
|
||||
assert(!expr.som);
|
||||
assert(expr.max_offset != MAX_OFFSET);
|
||||
assert(minWidth <= maxWidth);
|
||||
assert(maxWidth.is_reachable());
|
||||
|
||||
DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
|
||||
minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset,
|
||||
g.max_offset);
|
||||
minWidth.str().c_str(), maxWidth.str().c_str(),
|
||||
expr.min_offset, expr.max_offset);
|
||||
|
||||
if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
|
||||
if (expr.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g.max_offset < minWidth) {
|
||||
if (expr.max_offset < minWidth) {
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
@@ -232,10 +237,10 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
|
||||
u32 min_bound, max_bound;
|
||||
if (maxWidth.is_infinite()) {
|
||||
min_bound = 0;
|
||||
max_bound = g.max_offset - minWidth;
|
||||
max_bound = expr.max_offset - minWidth;
|
||||
} else {
|
||||
min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0;
|
||||
max_bound = g.max_offset - minWidth;
|
||||
min_bound = expr.min_offset > maxWidth ? expr.min_offset - maxWidth : 0;
|
||||
max_bound = expr.max_offset - minWidth;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
|
||||
@@ -315,7 +320,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) {
|
||||
}
|
||||
|
||||
static
|
||||
bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g,
|
||||
bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g,
|
||||
int *adjust) {
|
||||
const auto &reports = all_reports(g);
|
||||
if (reports.empty()) {
|
||||
@@ -342,10 +347,11 @@ bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g,
|
||||
* /foo.*bar/{min_length=100} --> /foo.{94,}bar/
|
||||
*/
|
||||
static
|
||||
bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
|
||||
assert(g.min_length);
|
||||
bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g,
|
||||
ExpressionInfo &expr) {
|
||||
assert(expr.min_length);
|
||||
|
||||
if (g.min_length > MAX_MINLENGTH_TO_CONVERT) {
|
||||
if (expr.min_length > MAX_MINLENGTH_TO_CONVERT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -437,10 +443,10 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
|
||||
DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width,
|
||||
g[cyclic].index);
|
||||
|
||||
if (width >= g.min_length) {
|
||||
if (width >= expr.min_length) {
|
||||
DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
|
||||
g.min_length, width);
|
||||
g.min_length = 0;
|
||||
expr.min_length, width);
|
||||
expr.min_length = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -468,7 +474,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
|
||||
|
||||
const CharReach &cr = g[cyclic].char_reach;
|
||||
|
||||
for (u32 i = 0; i < g.min_length - width - 1; ++i) {
|
||||
for (u32 i = 0; i < expr.min_length - width - 1; ++i) {
|
||||
v = add_vertex(g);
|
||||
g[v].char_reach = cr;
|
||||
|
||||
@@ -487,19 +493,19 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
|
||||
renumber_edges(g);
|
||||
clearReports(g);
|
||||
|
||||
g.min_length = 0;
|
||||
expr.min_length = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool hasExtParams(const NGWrapper &g) {
|
||||
if (g.min_length != 0) {
|
||||
bool hasExtParams(const ExpressionInfo &expr) {
|
||||
if (expr.min_length != 0) {
|
||||
return true;
|
||||
}
|
||||
if (g.min_offset != 0) {
|
||||
if (expr.min_offset != 0) {
|
||||
return true;
|
||||
}
|
||||
if (g.max_offset != MAX_OFFSET) {
|
||||
if (expr.max_offset != MAX_OFFSET) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@@ -535,7 +541,7 @@ const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
|
||||
}
|
||||
|
||||
static
|
||||
bool isEdgePrunable(const NGWrapper &g,
|
||||
bool isEdgePrunable(const NGHolder &g, const ExpressionInfo &expr,
|
||||
const vector<NFAVertexBidiDepth> &depths,
|
||||
const NFAEdge &e) {
|
||||
const NFAVertex u = source(e, g);
|
||||
@@ -564,29 +570,29 @@ bool isEdgePrunable(const NGWrapper &g,
|
||||
const NFAVertexBidiDepth &du = depths.at(u_idx);
|
||||
const NFAVertexBidiDepth &dv = depths.at(v_idx);
|
||||
|
||||
if (g.min_offset) {
|
||||
if (expr.min_offset) {
|
||||
depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv);
|
||||
if (max_offset.is_finite() && max_offset < g.min_offset) {
|
||||
if (max_offset.is_finite() && max_offset < expr.min_offset) {
|
||||
DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (g.max_offset != MAX_OFFSET) {
|
||||
if (expr.max_offset != MAX_OFFSET) {
|
||||
depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
|
||||
assert(min_offset.is_finite());
|
||||
|
||||
if (min_offset > g.max_offset) {
|
||||
if (min_offset > expr.max_offset) {
|
||||
DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (g.min_length && is_any_accept(v, g)) {
|
||||
if (expr.min_length && is_any_accept(v, g)) {
|
||||
// Simple take on min_length. If we're an edge to accept and our max
|
||||
// dist from start is too small, we can be pruned.
|
||||
const depth &width = du.fromStart.max;
|
||||
if (width.is_finite() && width < g.min_length) {
|
||||
if (width.is_finite() && width < expr.min_length) {
|
||||
DEBUG_PRINTF("max width %s from start too small for min_length\n",
|
||||
width.str().c_str());
|
||||
return true;
|
||||
@@ -597,14 +603,14 @@ bool isEdgePrunable(const NGWrapper &g,
|
||||
}
|
||||
|
||||
static
|
||||
void pruneExtUnreachable(NGWrapper &g) {
|
||||
void pruneExtUnreachable(NGHolder &g, const ExpressionInfo &expr) {
|
||||
vector<NFAVertexBidiDepth> depths;
|
||||
calcDepths(g, depths);
|
||||
|
||||
vector<NFAEdge> dead;
|
||||
|
||||
for (const auto &e : edges_range(g)) {
|
||||
if (isEdgePrunable(g, depths, e)) {
|
||||
if (isEdgePrunable(g, expr, depths, e)) {
|
||||
DEBUG_PRINTF("pruning\n");
|
||||
dead.push_back(e);
|
||||
}
|
||||
@@ -621,8 +627,8 @@ void pruneExtUnreachable(NGWrapper &g) {
|
||||
/** Remove vacuous edges in graphs where the min_offset or min_length
|
||||
* constraints dictate that they can never produce a match. */
|
||||
static
|
||||
void pruneVacuousEdges(NGWrapper &g) {
|
||||
if (!g.min_length && !g.min_offset) {
|
||||
void pruneVacuousEdges(NGHolder &g, const ExpressionInfo &expr) {
|
||||
if (!expr.min_length && !expr.min_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -634,14 +640,14 @@ void pruneVacuousEdges(NGWrapper &g) {
|
||||
|
||||
// Special case: Crudely remove vacuous edges from start in graphs with a
|
||||
// min_offset.
|
||||
if (g.min_offset && u == g.start && is_any_accept(v, g)) {
|
||||
if (expr.min_offset && u == g.start && is_any_accept(v, g)) {
|
||||
DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
|
||||
dead.push_back(e);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If a min_length is set, vacuous edges can be removed.
|
||||
if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
|
||||
if (expr.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
|
||||
DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
|
||||
dead.push_back(e);
|
||||
continue;
|
||||
@@ -657,7 +663,8 @@ void pruneVacuousEdges(NGWrapper &g) {
|
||||
}
|
||||
|
||||
static
|
||||
void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
|
||||
void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr,
|
||||
const vector<DepthMinMax> &depths,
|
||||
const ReportManager &rm, NFAVertex accept) {
|
||||
vector<NFAEdge> dead;
|
||||
|
||||
@@ -676,16 +683,16 @@ void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
|
||||
d.min += adj.first;
|
||||
d.max += adj.second;
|
||||
|
||||
if (d.max.is_finite() && d.max < g.min_length) {
|
||||
if (d.max.is_finite() && d.max < expr.min_length) {
|
||||
DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
|
||||
d.max.str().c_str(), g.min_length);
|
||||
d.max.str().c_str(), expr.min_length);
|
||||
dead.push_back(e);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (g.max_offset != MAX_OFFSET && d.min > g.max_offset) {
|
||||
if (expr.max_offset != MAX_OFFSET && d.min > expr.max_offset) {
|
||||
DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
|
||||
d.min.str().c_str(), g.max_offset);
|
||||
d.min.str().c_str(), expr.max_offset);
|
||||
dead.push_back(e);
|
||||
continue;
|
||||
}
|
||||
@@ -697,15 +704,16 @@ void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
|
||||
/** Remove edges to accepts that can never produce a match long enough to
|
||||
* satisfy our min_length and max_offset constraints. */
|
||||
static
|
||||
void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) {
|
||||
if (!g.min_length) {
|
||||
void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr,
|
||||
const ReportManager &rm) {
|
||||
if (!expr.min_length) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<DepthMinMax> depths = getDistancesFromSOM(g);
|
||||
|
||||
pruneUnmatchable(g, depths, rm, g.accept);
|
||||
pruneUnmatchable(g, depths, rm, g.acceptEod);
|
||||
pruneUnmatchable(g, expr, depths, rm, g.accept);
|
||||
pruneUnmatchable(g, expr, depths, rm, g.acceptEod);
|
||||
|
||||
pruneUseless(g);
|
||||
}
|
||||
@@ -732,9 +740,9 @@ bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void handleExtendedParams(ReportManager &rm, NGWrapper &g,
|
||||
void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr,
|
||||
UNUSED const CompileContext &cc) {
|
||||
if (!hasExtParams(g)) {
|
||||
if (!hasExtParams(expr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -751,50 +759,50 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g,
|
||||
DepthMinMax match_depths = findMatchLengths(rm, g);
|
||||
DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
|
||||
|
||||
if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) {
|
||||
if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) {
|
||||
ostringstream oss;
|
||||
oss << "Expression is anchored and cannot satisfy min_offset="
|
||||
<< g.min_offset << " as it can only produce matches of length "
|
||||
<< expr.min_offset << " as it can only produce matches of length "
|
||||
<< maxWidth << " bytes at most.";
|
||||
throw CompileError(g.expressionIndex, oss.str());
|
||||
throw CompileError(expr.index, oss.str());
|
||||
}
|
||||
|
||||
if (minWidth > g.max_offset) {
|
||||
if (minWidth > expr.max_offset) {
|
||||
ostringstream oss;
|
||||
oss << "Expression has max_offset=" << g.max_offset << " but requires "
|
||||
<< minWidth << " bytes to match.";
|
||||
throw CompileError(g.expressionIndex, oss.str());
|
||||
oss << "Expression has max_offset=" << expr.max_offset
|
||||
<< " but requires " << minWidth << " bytes to match.";
|
||||
throw CompileError(expr.index, oss.str());
|
||||
}
|
||||
|
||||
if (maxWidth.is_finite() && match_depths.max < g.min_length) {
|
||||
if (maxWidth.is_finite() && match_depths.max < expr.min_length) {
|
||||
ostringstream oss;
|
||||
oss << "Expression has min_length=" << g.min_length << " but can "
|
||||
oss << "Expression has min_length=" << expr.min_length << " but can "
|
||||
"only produce matches of length " << match_depths.max <<
|
||||
" bytes at most.";
|
||||
throw CompileError(g.expressionIndex, oss.str());
|
||||
throw CompileError(expr.index, oss.str());
|
||||
}
|
||||
|
||||
if (g.min_length && g.min_length <= match_depths.min) {
|
||||
if (expr.min_length && expr.min_length <= match_depths.min) {
|
||||
DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
|
||||
g.min_length);
|
||||
g.min_length = 0;
|
||||
expr.min_length);
|
||||
expr.min_length = 0;
|
||||
}
|
||||
|
||||
if (!hasExtParams(g)) {
|
||||
if (!hasExtParams(expr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
pruneVacuousEdges(g);
|
||||
pruneUnmatchable(g, rm);
|
||||
pruneVacuousEdges(g, expr);
|
||||
pruneUnmatchable(g, expr, rm);
|
||||
|
||||
if (!has_offset_adj) {
|
||||
pruneExtUnreachable(g);
|
||||
pruneExtUnreachable(g, expr);
|
||||
}
|
||||
|
||||
// We may have removed all the edges to accept, in which case this
|
||||
// expression cannot match.
|
||||
if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
|
||||
throw CompileError(g.expressionIndex, "Extended parameter "
|
||||
throw CompileError(expr.index, "Extended parameter "
|
||||
"constraints can not be satisfied for any match from "
|
||||
"this expression.");
|
||||
}
|
||||
@@ -812,27 +820,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g,
|
||||
|
||||
// If the pattern is completely anchored and has a min_length set, this can
|
||||
// be converted to a min_offset.
|
||||
if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) {
|
||||
DEBUG_PRINTF("converting min_length to min_offset=%llu for "
|
||||
"anchored case\n", g.min_length);
|
||||
g.min_offset = g.min_length;
|
||||
g.min_length = 0;
|
||||
if (expr.min_length && (expr.min_offset <= expr.min_length) &&
|
||||
is_anchored) {
|
||||
DEBUG_PRINTF("convertinexpr.min_length to min_offset=%llu for "
|
||||
"anchored case\n", expr.min_length);
|
||||
expr.min_offset = expr.min_length;
|
||||
expr.min_length = 0;
|
||||
}
|
||||
|
||||
if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) {
|
||||
if (expr.min_offset && expr.min_offset <= minWidth && !has_offset_adj) {
|
||||
DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n",
|
||||
g.min_offset);
|
||||
g.min_offset = 0;
|
||||
expr.min_offset);
|
||||
expr.min_offset = 0;
|
||||
}
|
||||
|
||||
if (!hasExtParams(g)) {
|
||||
if (!hasExtParams(expr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the pattern has a min_length and is of "ratchet" form with one
|
||||
// unbounded repeat, that repeat can become a bounded repeat.
|
||||
// e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
|
||||
if (g.min_length && transformMinLengthToRepeat(rm, g)) {
|
||||
if (expr.min_length && transformMinLengthToRepeat(rm, g, expr)) {
|
||||
DEBUG_PRINTF("converted min_length to bounded repeat\n");
|
||||
// recalc
|
||||
minWidth = findMinWidth(g);
|
||||
@@ -846,28 +855,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g,
|
||||
// Note that it is possible to handle graphs that have a combination of
|
||||
// anchored and unanchored paths, but it's too tricky for the moment.
|
||||
|
||||
if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length &&
|
||||
!has_offset_adj && isUnanchored(g)) {
|
||||
if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) {
|
||||
if (expr.max_offset != MAX_OFFSET && !expr.som && !expr.min_length &&
|
||||
!has_offset_adj && isUnanchored(g)) {
|
||||
if (anchorPatternWithBoundedRepeat(g, expr, minWidth, maxWidth)) {
|
||||
DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(),
|
||||
maxWidth.str().c_str());
|
||||
if (minWidth == maxWidth) {
|
||||
// For a fixed width pattern, we can retire the offsets as they
|
||||
// are implicit in the graph now.
|
||||
g.min_offset = 0;
|
||||
g.max_offset = MAX_OFFSET;
|
||||
expr.min_offset = 0;
|
||||
expr.max_offset = MAX_OFFSET;
|
||||
}
|
||||
}
|
||||
}
|
||||
//dumpGraph("final.dot", g);
|
||||
|
||||
if (!hasExtParams(g)) {
|
||||
if (!hasExtParams(expr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
set<NFAVertex> done;
|
||||
updateReportBounds(rm, g, g.accept, done);
|
||||
updateReportBounds(rm, g, g.acceptEod, done);
|
||||
updateReportBounds(rm, g, expr, g.accept, done);
|
||||
updateReportBounds(rm, g, expr, g.acceptEod, done);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -37,10 +37,11 @@
|
||||
namespace ue2 {
|
||||
|
||||
struct CompileContext;
|
||||
class NGWrapper;
|
||||
class ExpressionInfo;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
void handleExtendedParams(ReportManager &rm, NGWrapper &g,
|
||||
void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -30,12 +30,15 @@
|
||||
* \brief Literal Component Splitting. Identifies literals that span the
|
||||
* graph and moves them into Rose.
|
||||
*/
|
||||
|
||||
#include "ng_literal_component.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng.h"
|
||||
#include "ng_literal_component.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph.h"
|
||||
@@ -47,8 +50,8 @@ using namespace std;
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool isLiteralChar(const NGWrapper &g, NFAVertex v,
|
||||
bool &nocase, bool &casefixed) {
|
||||
bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase,
|
||||
bool &casefixed) {
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
const size_t num = cr.count();
|
||||
if (num > 2) {
|
||||
@@ -93,7 +96,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) {
|
||||
}
|
||||
|
||||
static
|
||||
bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored,
|
||||
bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored,
|
||||
set<NFAVertex> &dead) {
|
||||
DEBUG_PRINTF("examine vertex %zu\n", g[v].index);
|
||||
bool nocase = false, casefixed = false;
|
||||
@@ -185,7 +188,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored,
|
||||
}
|
||||
|
||||
/** \brief Split off literals. True if any changes were made to the graph. */
|
||||
bool splitOffLiterals(NG &ng, NGWrapper &g) {
|
||||
bool splitOffLiterals(NG &ng, NGHolder &g) {
|
||||
if (!ng.cc.grey.allowLiteral) {
|
||||
return false;
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -37,10 +37,10 @@
|
||||
namespace ue2 {
|
||||
|
||||
class NG;
|
||||
class NGWrapper;
|
||||
class NGHolder;
|
||||
|
||||
/** \brief Split off literals. True if any changes were made to the graph. */
|
||||
bool splitOffLiterals(NG &ng, NGWrapper &graph);
|
||||
bool splitOffLiterals(NG &ng, NGHolder &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@@ -29,6 +29,9 @@
|
||||
/** \file
|
||||
* \brief SOM ("Start of Match") analysis.
|
||||
*/
|
||||
|
||||
#include "ng_som.h"
|
||||
|
||||
#include "ng.h"
|
||||
#include "ng_dump.h"
|
||||
#include "ng_equivalence.h"
|
||||
@@ -40,7 +43,6 @@
|
||||
#include "ng_redundancy.h"
|
||||
#include "ng_region.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_som.h"
|
||||
#include "ng_som_add_redundancy.h"
|
||||
#include "ng_som_util.h"
|
||||
#include "ng_split.h"
|
||||
@@ -49,6 +51,7 @@
|
||||
#include "ng_width.h"
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "nfa/goughcompile.h"
|
||||
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
|
||||
#include "parser/position.h"
|
||||
@@ -1584,8 +1587,9 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p,
|
||||
* implement the full pattern.
|
||||
*/
|
||||
static
|
||||
void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
|
||||
vector<som_plan> &plan, const u32 first_som_slot) {
|
||||
void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id,
|
||||
NGHolder &g, vector<som_plan> &plan,
|
||||
const u32 first_som_slot) {
|
||||
ReportManager &rm = ng.rm;
|
||||
SomSlotManager &ssm = ng.ssm;
|
||||
|
||||
@@ -1598,14 +1602,14 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
|
||||
|
||||
// Root plan, which already has a SOM slot assigned (first_som_slot).
|
||||
dumpSomPlan(g, plan.front(), 0);
|
||||
dumpSomSubComponent(*plan.front().prefix, "04_som", w.expressionIndex,
|
||||
comp_id, 0, ng.cc.grey);
|
||||
dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0,
|
||||
ng.cc.grey);
|
||||
assert(plan.front().prefix);
|
||||
if (plan.front().escapes.any() && !plan.front().is_reset) {
|
||||
/* setup escaper for first som location */
|
||||
if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
|
||||
first_som_slot)) {
|
||||
throw CompileError(w.expressionIndex, "Pattern is too large.");
|
||||
throw CompileError(expr.index, "Pattern is too large.");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1617,7 +1621,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
|
||||
for (++it; it != plan.end(); ++it) {
|
||||
const u32 plan_num = it - plan.begin();
|
||||
dumpSomPlan(g, *it, plan_num);
|
||||
dumpSomSubComponent(*it->prefix, "04_som", w.expressionIndex, comp_id,
|
||||
dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id,
|
||||
plan_num, ng.cc.grey);
|
||||
|
||||
assert(it->parent < plan_num);
|
||||
@@ -1628,7 +1632,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
|
||||
|
||||
assert(!it->no_implement);
|
||||
if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
|
||||
throw CompileError(w.expressionIndex, "Pattern is too large.");
|
||||
throw CompileError(expr.index, "Pattern is too large.");
|
||||
}
|
||||
updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
|
||||
updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
|
||||
@@ -1639,7 +1643,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
|
||||
renumber_vertices(*plan.front().prefix);
|
||||
assert(plan.front().prefix->kind == NFA_OUTFIX);
|
||||
if (!ng.addHolder(*plan.front().prefix)) {
|
||||
throw CompileError(w.expressionIndex, "Pattern is too large.");
|
||||
throw CompileError(expr.index, "Pattern is too large.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1852,7 +1856,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
|
||||
}
|
||||
|
||||
static
|
||||
u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g,
|
||||
u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g,
|
||||
const CompileContext &cc) {
|
||||
depth maxWidth = findMaxWidth(g);
|
||||
|
||||
@@ -1861,7 +1865,7 @@ u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g,
|
||||
|
||||
auto nfa = makeBareSomRevNfa(g, cc);
|
||||
if (!nfa) {
|
||||
throw CompileError(w.expressionIndex, "Pattern is too large.");
|
||||
throw CompileError(expr.index, "Pattern is too large.");
|
||||
}
|
||||
|
||||
if (ng.cc.streaming) {
|
||||
@@ -2055,8 +2059,8 @@ void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix,
|
||||
}
|
||||
|
||||
static
|
||||
sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
|
||||
som_type som,
|
||||
sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
|
||||
u32 comp_id, som_type som,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ions,
|
||||
const map<u32, region_info> &info,
|
||||
map<u32, region_info>::const_iterator lower_bound) {
|
||||
@@ -2077,7 +2081,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
|
||||
// This is an optimisation: if we can't build a Haig from a portion of
|
||||
// the graph, then we won't be able to manage it as an outfix either
|
||||
// when we fall back.
|
||||
throw CompileError(w.expressionIndex, "Pattern is too large.");
|
||||
throw CompileError(expr.index, "Pattern is too large.");
|
||||
}
|
||||
|
||||
while (1) {
|
||||
@@ -2152,7 +2156,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
|
||||
goto next_try;
|
||||
}
|
||||
|
||||
implementSomPlan(ng, w, comp_id, g, plan, som_loc);
|
||||
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
|
||||
|
||||
Report ir = makeCallback(0U, 0);
|
||||
assert(!plan.empty());
|
||||
@@ -2877,7 +2881,7 @@ unique_ptr<NGHolder> makePrefixForChain(NGHolder &g,
|
||||
return prefix;
|
||||
}
|
||||
|
||||
sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
|
||||
sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id,
|
||||
som_type som) {
|
||||
assert(som);
|
||||
DEBUG_PRINTF("som hello\n");
|
||||
@@ -3001,7 +3005,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
|
||||
/* create prefix to set the som_loc */
|
||||
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
|
||||
if (prefix_by_rev) {
|
||||
u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc);
|
||||
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
|
||||
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
|
||||
}
|
||||
renumber_vertices(*prefix);
|
||||
@@ -3084,18 +3088,18 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
|
||||
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
|
||||
}
|
||||
if (prefix_by_rev && !plan.front().no_implement) {
|
||||
u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc);
|
||||
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
|
||||
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
|
||||
}
|
||||
|
||||
implementSomPlan(ng, w, comp_id, g, plan, som_loc);
|
||||
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
|
||||
|
||||
DEBUG_PRINTF("success\n");
|
||||
return SOMBE_HANDLED_INTERNAL;
|
||||
}
|
||||
|
||||
sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
|
||||
som_type som) {
|
||||
sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
|
||||
u32 comp_id, som_type som) {
|
||||
assert(som);
|
||||
|
||||
DEBUG_PRINTF("som+haig hello\n");
|
||||
@@ -3132,7 +3136,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
|
||||
buildRegionMapping(g, regions, info, true);
|
||||
|
||||
sombe_rv rv =
|
||||
doHaigLitSom(ng, g, w, comp_id, som, regions, info, info.begin());
|
||||
doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin());
|
||||
if (rv == SOMBE_FAIL) {
|
||||
clear_graph(g);
|
||||
cloneHolder(g, g_pristine);
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -34,12 +34,14 @@
|
||||
#define NG_SOM_H
|
||||
|
||||
#include "som/som.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ExpressionInfo;
|
||||
class NG;
|
||||
class NGHolder;
|
||||
class NGWrapper;
|
||||
class ReportManager;
|
||||
struct Grey;
|
||||
|
||||
enum sombe_rv {
|
||||
@@ -63,14 +65,14 @@ enum sombe_rv {
|
||||
* May throw a "Pattern too large" exception if prefixes of the
|
||||
* pattern are too large to compile.
|
||||
*/
|
||||
sombe_rv doSom(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id,
|
||||
sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id,
|
||||
som_type som);
|
||||
|
||||
/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established.
|
||||
* May also throw pattern too large if prefixes of the pattern are too large to
|
||||
* compile. */
|
||||
sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id,
|
||||
som_type som);
|
||||
sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr,
|
||||
u32 comp_id, som_type som);
|
||||
|
||||
void makeReportsSomPass(ReportManager &rm, NGHolder &g);
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "ng.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/unicode_def.h"
|
||||
|
||||
@@ -45,14 +46,14 @@ using namespace std;
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) {
|
||||
if (in_degree(v, w) != 1) {
|
||||
void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) {
|
||||
if (in_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("unexpected pred\n");
|
||||
assert(0); /* should be true due to the early stage of this analysis */
|
||||
return;
|
||||
}
|
||||
|
||||
CharReach &cr = w[v].char_reach;
|
||||
CharReach &cr = g[v].char_reach;
|
||||
if (pred_char == 0xe0) {
|
||||
assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
|
||||
if (cr == CharReach(0xa0, 0xbf)) {
|
||||
@@ -79,8 +80,8 @@ void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) {
|
||||
* above \\x{10ffff} or they represent overlong encodings. As we require valid
|
||||
* UTF-8 input, we have no defined behaviour in these cases, as a result we can
|
||||
* accept them if it simplifies the graph. */
|
||||
void relaxForbiddenUtf8(NGWrapper &w) {
|
||||
if (!w.utf8) {
|
||||
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) {
|
||||
if (!expr.utf8) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -88,12 +89,12 @@ void relaxForbiddenUtf8(NGWrapper &w) {
|
||||
const CharReach f0(0xf0);
|
||||
const CharReach f4(0xf4);
|
||||
|
||||
for (auto v : vertices_range(w)) {
|
||||
const CharReach &cr = w[v].char_reach;
|
||||
for (auto v : vertices_range(g)) {
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
if (cr == e0 || cr == f0 || cr == f4) {
|
||||
u8 pred_char = cr.find_first();
|
||||
for (auto t : adjacent_vertices_range(v, w)) {
|
||||
allowIllegal(w, t, pred_char);
|
||||
for (auto t : adjacent_vertices_range(v, g)) {
|
||||
allowIllegal(g, t, pred_char);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -35,7 +35,7 @@
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGWrapper;
|
||||
class ExpressionInfo;
|
||||
class NGHolder;
|
||||
|
||||
/** \brief Relax forbidden UTF-8 sequences.
|
||||
@@ -44,7 +44,7 @@ class NGHolder;
|
||||
* above \\x{10ffff} or they represent overlong encodings. As we require valid
|
||||
* UTF-8 input, we have no defined behaviour in these cases, as a result we can
|
||||
* accept them if it simplifies the graph. */
|
||||
void relaxForbiddenUtf8(NGWrapper &w);
|
||||
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr);
|
||||
|
||||
/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
|
||||
* where possible, based on the assumption that we will always be matching
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -34,29 +34,31 @@
|
||||
#include "grey.h"
|
||||
#include "ng.h"
|
||||
#include "ng_util.h"
|
||||
#include "compiler/compiler.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
ReportID getInternalId(ReportManager &rm, const NGWrapper &graph) {
|
||||
Report ir = rm.getBasicInternalReport(graph);
|
||||
ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) {
|
||||
Report ir = rm.getBasicInternalReport(expr);
|
||||
|
||||
// Apply any extended params.
|
||||
if (graph.min_offset || graph.max_offset != MAX_OFFSET) {
|
||||
ir.minOffset = graph.min_offset;
|
||||
ir.maxOffset = graph.max_offset;
|
||||
if (expr.min_offset || expr.max_offset != MAX_OFFSET) {
|
||||
ir.minOffset = expr.min_offset;
|
||||
ir.maxOffset = expr.max_offset;
|
||||
}
|
||||
|
||||
assert(!graph.min_length); // should be handled elsewhere.
|
||||
assert(!expr.min_length); // should be handled elsewhere.
|
||||
|
||||
return rm.getInternalId(ir);
|
||||
}
|
||||
|
||||
static
|
||||
void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) {
|
||||
const ReportID r = getInternalId(rm, g);
|
||||
void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g,
|
||||
const ExpressionInfo &expr) {
|
||||
const ReportID r = getInternalId(rm, expr);
|
||||
|
||||
boundary.report_at_0_eod.insert(r);
|
||||
boundary.report_at_0.insert(r);
|
||||
@@ -81,8 +83,8 @@ void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) {
|
||||
|
||||
static
|
||||
void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
|
||||
NGWrapper &g) {
|
||||
boundary.report_at_0.insert(getInternalId(rm, g));
|
||||
NGHolder &g, const ExpressionInfo &expr) {
|
||||
boundary.report_at_0.insert(getInternalId(rm, expr));
|
||||
remove_edge(g.start, g.accept, g);
|
||||
remove_edge(g.start, g.acceptEod, g);
|
||||
g[g.start].reports.clear();
|
||||
@@ -90,8 +92,8 @@ void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
|
||||
|
||||
static
|
||||
void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
|
||||
NGWrapper &g) {
|
||||
boundary.report_at_eod.insert(getInternalId(rm, g));
|
||||
NGHolder &g, const ExpressionInfo &expr) {
|
||||
boundary.report_at_eod.insert(getInternalId(rm, expr));
|
||||
remove_edge(g.startDs, g.acceptEod, g);
|
||||
remove_edge(g.start, g.acceptEod, g);
|
||||
g[g.start].reports.clear();
|
||||
@@ -100,18 +102,18 @@ void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
|
||||
|
||||
static
|
||||
void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm,
|
||||
NGWrapper &g) {
|
||||
boundary.report_at_0_eod.insert(getInternalId(rm, g));
|
||||
NGHolder &g, const ExpressionInfo &expr) {
|
||||
boundary.report_at_0_eod.insert(getInternalId(rm, expr));
|
||||
remove_edge(g.start, g.acceptEod, g);
|
||||
g[g.start].reports.clear();
|
||||
}
|
||||
|
||||
bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
|
||||
NGWrapper &g) {
|
||||
NGHolder &g, const ExpressionInfo &expr) {
|
||||
if (edge(g.startDs, g.accept, g).second) {
|
||||
// e.g. '.*'; match "between" every byte
|
||||
DEBUG_PRINTF("graph is firehose\n");
|
||||
makeFirehose(boundary, rm, g);
|
||||
makeFirehose(boundary, rm, g, expr);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -119,19 +121,19 @@ bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
|
||||
|
||||
if (edge(g.start, g.accept, g).second) {
|
||||
DEBUG_PRINTF("creating anchored acceptor\n");
|
||||
makeAnchoredAcceptor(boundary, rm, g);
|
||||
makeAnchoredAcceptor(boundary, rm, g, expr);
|
||||
work_done = true;
|
||||
}
|
||||
|
||||
if (edge(g.startDs, g.acceptEod, g).second) {
|
||||
DEBUG_PRINTF("creating end-anchored acceptor\n");
|
||||
makeEndAnchoredAcceptor(boundary, rm, g);
|
||||
makeEndAnchoredAcceptor(boundary, rm, g, expr);
|
||||
work_done = true;
|
||||
}
|
||||
|
||||
if (edge(g.start, g.acceptEod, g).second) {
|
||||
DEBUG_PRINTF("creating nothing acceptor\n");
|
||||
makeNothingAcceptor(boundary, rm, g);
|
||||
makeNothingAcceptor(boundary, rm, g, expr);
|
||||
work_done = true;
|
||||
}
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -36,12 +36,13 @@
|
||||
namespace ue2 {
|
||||
|
||||
struct BoundaryReports;
|
||||
class NGWrapper;
|
||||
class ExpressionInfo;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
// Returns true if a "vacuous" reporter was created.
|
||||
bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
|
||||
NGWrapper &graph);
|
||||
NGHolder &g, const ExpressionInfo &expr);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
Reference in New Issue
Block a user