ng: split NGWrapper into NGHolder, ExpressionInfo

We now use NGHolder for all graph information, while other expression
properties (report, flag information, etc) go in new class
ExpressionInfo.
This commit is contained in:
Justin Viiret 2017-03-16 18:18:34 +11:00 committed by Matthew Barr
parent fadfab6d8c
commit 5dfae12a62
41 changed files with 726 additions and 612 deletions

View File

@ -674,6 +674,7 @@ SET (hs_SRCS
src/compiler/compiler.h
src/compiler/error.cpp
src/compiler/error.h
src/compiler/expression_info.h
src/fdr/engine_description.cpp
src/fdr/engine_description.h
src/fdr/fdr_compile.cpp

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -42,6 +42,8 @@
* word-to-word and word-to-nonword) are dropped.
*/
#include "asserts.h"
#include "compiler/compiler.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_prune.h"
#include "nfagraph/ng_redundancy.h"
@ -115,8 +117,8 @@ u32 conjunct(u32 flags1, u32 flags2) {
typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
static
void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
u32 &assert_edge_count) {
void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr,
edge_cache_t &edge_cache, u32 &assert_edge_count) {
DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index);
const u32 flags = g[t].assert_flags;
@ -178,8 +180,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
edge_cache.emplace(cache_key, e);
g[e].assert_flags = flags;
if (++assert_edge_count > MAX_ASSERT_EDGES) {
throw CompileError(g.expressionIndex,
"Pattern is too large.");
throw CompileError(expr.index, "Pattern is too large.");
}
} else {
NFAEdge e = ecit->second;
@ -200,21 +201,23 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
}
static
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, s32 adj) {
// Don't try and set the report ID of a special vertex.
assert(!is_special(v, g));
// There should be no reports set already.
assert(g[v].reports.empty());
Report r = rm.getBasicInternalReport(g, adj);
Report r = rm.getBasicInternalReport(expr, adj);
g[v].reports.insert(rm.getInternalId(r));
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
}
static
void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
void checkForMultilineStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
vector<NFAEdge> dead;
for (auto v : adjacent_vertices_range(g.start, g)) {
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
@ -238,7 +241,7 @@ void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
for (const auto &e : dead) {
NFAVertex dummy = add_vertex(g);
g[dummy].char_reach.setall();
setReportId(rm, g, dummy, -1);
setReportId(rm, g, expr, dummy, -1);
add_edge(source(e, g), dummy, g[e], g);
add_edge(dummy, g.accept, g);
}
@ -263,7 +266,8 @@ bool hasAssertVertices(const NGHolder &g) {
* Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
void removeAssertVertices(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
size_t num = 0;
DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
@ -285,12 +289,12 @@ void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
for (auto v : vertices_range(g)) {
if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
replaceAssertVertex(g, v, edge_cache, assert_edge_count);
replaceAssertVertex(g, v, expr, edge_cache, assert_edge_count);
num++;
}
}
checkForMultilineStart(rm, g);
checkForMultilineStart(rm, g, expr);
if (num) {
DEBUG_PRINTF("resolved %zu assert vertices\n", num);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -35,8 +35,9 @@
namespace ue2 {
class ExpressionInfo;
class ReportManager;
class NGWrapper;
class NGHolder;
/** \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
@ -44,7 +45,8 @@ class NGWrapper;
* Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g);
void removeAssertVertices(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr);
} // namespace ue2

View File

@ -73,7 +73,6 @@ using namespace std;
namespace ue2 {
static
void validateExt(const hs_expr_ext &ext) {
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
@ -100,26 +99,18 @@ void validateExt(const hs_expr_ext &ext) {
}
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
unsigned flags, ReportID actionId,
unsigned flags, ReportID report,
const hs_expr_ext *ext)
: utf8(false),
allow_vacuous(flags & HS_FLAG_ALLOWEMPTY),
highlander(flags & HS_FLAG_SINGLEMATCH),
prefilter(flags & HS_FLAG_PREFILTER),
som(SOM_NONE),
index(index_in),
id(actionId),
min_offset(0),
max_offset(MAX_OFFSET),
min_length(0),
edit_distance(0) {
: expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
0, 0) {
ParseMode mode(flags);
component = parse(expression, mode);
utf8 = mode.utf8; /* utf8 may be set by parse() */
expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
if (utf8 && !isValidUtf8(expression)) {
if (expr.utf8 && !isValidUtf8(expression)) {
throw ParseError("Expression is not valid UTF-8.");
}
@ -147,7 +138,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
// Set SOM type.
if (flags & HS_FLAG_SOM_LEFTMOST) {
som = SOM_LEFT;
expr.som = SOM_LEFT;
}
// Set extended parameters, if we have them.
@ -156,29 +147,29 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
validateExt(*ext);
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
min_offset = ext->min_offset;
expr.min_offset = ext->min_offset;
}
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
max_offset = ext->max_offset;
expr.max_offset = ext->max_offset;
}
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
min_length = ext->min_length;
expr.min_length = ext->min_length;
}
if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) {
edit_distance = ext->edit_distance;
expr.edit_distance = ext->edit_distance;
}
}
// These are validated in validateExt, so an error will already have been
// thrown if these conditions don't hold.
assert(max_offset >= min_offset);
assert(max_offset >= min_length);
assert(expr.max_offset >= expr.min_offset);
assert(expr.max_offset >= expr.min_length);
// Since prefiltering and SOM aren't supported together, we must squash any
// min_length constraint as well.
if (flags & HS_FLAG_PREFILTER && min_length) {
if (flags & HS_FLAG_PREFILTER && expr.min_length) {
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
min_length = 0;
expr.min_length = 0;
}
}
@ -187,25 +178,25 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
* mode.
*/
void dumpExpression(UNUSED const ParsedExpression &expr,
void dumpExpression(UNUSED const ParsedExpression &pe,
UNUSED const char *stage, UNUSED const Grey &grey) {
#if defined(DEBUG)
DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id,
expr.index);
DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n",
pe.expr.report, pe.expr.index);
ostringstream debug_tree;
dumpTree(debug_tree, expr.component.get());
dumpTree(debug_tree, pe.component.get());
printf("%s\n", debug_tree.str().c_str());
#endif // DEBUG
#if defined(DUMP_SUPPORT)
if (grey.dumpFlags & Grey::DUMP_PARSE) {
stringstream ss;
ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_"
ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_"
<< stage << ".txt";
ofstream out(ss.str().c_str());
out << "Component Tree for " << expr.id << endl;
dumpTree(out, expr.component.get());
if (expr.utf8) {
out << "Component Tree for " << pe.expr.report << endl;
dumpTree(out, pe.component.get());
if (pe.expr.utf8) {
out << "UTF8 mode" << endl;
}
}
@ -215,13 +206,13 @@ void dumpExpression(UNUSED const ParsedExpression &expr,
/** \brief Run Component tree optimisations on \a expr. */
static
void optimise(ParsedExpression &expr) {
if (expr.min_length || expr.som) {
void optimise(ParsedExpression &pe) {
if (pe.expr.min_length || pe.expr.som) {
return;
}
DEBUG_PRINTF("optimising\n");
expr.component->optimise(true /* root is connected to sds */);
pe.component->optimise(true /* root is connected to sds */);
}
void addExpression(NG &ng, unsigned index, const char *expression,
@ -238,34 +229,34 @@ void addExpression(NG &ng, unsigned index, const char *expression,
// Do per-expression processing: errors here will result in an exception
// being thrown up to our caller
ParsedExpression expr(index, expression, flags, id, ext);
dumpExpression(expr, "orig", cc.grey);
ParsedExpression pe(index, expression, flags, id, ext);
dumpExpression(pe, "orig", cc.grey);
// Apply prefiltering transformations if desired.
if (expr.prefilter) {
prefilterTree(expr.component, ParseMode(flags));
dumpExpression(expr, "prefiltered", cc.grey);
if (pe.expr.prefilter) {
prefilterTree(pe.component, ParseMode(flags));
dumpExpression(pe, "prefiltered", cc.grey);
}
// Expressions containing zero-width assertions and other extended pcre
// types aren't supported yet. This call will throw a ParseError exception
// if the component tree contains such a construct.
checkUnsupported(*expr.component);
checkUnsupported(*pe.component);
expr.component->checkEmbeddedStartAnchor(true);
expr.component->checkEmbeddedEndAnchor(true);
pe.component->checkEmbeddedStartAnchor(true);
pe.component->checkEmbeddedEndAnchor(true);
if (cc.grey.optimiseComponentTree) {
optimise(expr);
dumpExpression(expr, "opt", cc.grey);
optimise(pe);
dumpExpression(pe, "opt", cc.grey);
}
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
expr.component.get(), expr.index, expr.id);
pe.component.get(), pe.expr.index, pe.expr.report);
// You can only use the SOM flags if you've also specified an SOM
// precision mode.
if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
throw CompileError("To use a SOM expression flag in streaming mode, "
"an SOM precision mode (e.g. "
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
@ -273,26 +264,26 @@ void addExpression(NG &ng, unsigned index, const char *expression,
// If this expression is a literal, we can feed it directly to Rose rather
// than building the NFA graph.
if (shortcutLiteral(ng, expr)) {
if (shortcutLiteral(ng, pe)) {
DEBUG_PRINTF("took literal short cut\n");
return;
}
unique_ptr<NGWrapper> g = buildWrapper(ng.rm, cc, expr);
if (!g) {
auto built_expr = buildGraph(ng.rm, cc, pe);
if (!built_expr.g) {
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
"thrown.\n", expr.id);
"thrown.\n", pe.expr.report);
throw CompileError("Internal error.");
}
if (!expr.allow_vacuous && matches_everywhere(*g)) {
auto &g = *built_expr.g;
if (!pe.expr.allow_vacuous && matches_everywhere(g)) {
throw CompileError("Pattern matches empty buffer; use "
"HS_FLAG_ALLOWEMPTY to enable support.");
}
if (!ng.addGraph(*g)) {
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id);
if (!ng.addGraph(built_expr.expr, g)) {
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report);
throw CompileError("Error compiling expression.");
}
}
@ -453,41 +444,42 @@ bool isSupported(const Component &c) {
}
#endif
unique_ptr<NGWrapper> buildWrapper(ReportManager &rm, const CompileContext &cc,
const ParsedExpression &expr) {
assert(isSupported(*expr.component));
BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
const ParsedExpression &pe) {
assert(isSupported(*pe.component));
const unique_ptr<NFABuilder> builder = makeNFABuilder(rm, cc, expr);
const auto builder = makeNFABuilder(rm, cc, pe);
assert(builder);
// Set up START and ACCEPT states; retrieve the special states
const auto bs = makeGlushkovBuildState(*builder, expr.prefilter);
const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter);
// Map position IDs to characters/components
expr.component->notePositions(*bs);
pe.component->notePositions(*bs);
// Wire the start dotstar state to the firsts
connectInitialStates(*bs, expr);
connectInitialStates(*bs, pe);
DEBUG_PRINTF("wire up body of expr\n");
// Build the rest of the FOLLOW set
vector<PositionInfo> initials = {builder->getStartDotStar(),
builder->getStart()};
expr.component->buildFollowSet(*bs, initials);
pe.component->buildFollowSet(*bs, initials);
// Wire the lasts to the accept state
connectFinalStates(*bs, expr);
connectFinalStates(*bs, pe);
// Create our edges
bs->buildEdges();
auto g = builder->getGraph();
assert(g);
BuiltExpression built_expr = builder->getGraph();
assert(built_expr.g);
dumpDotWrapper(*g, "00_before_asserts", cc.grey);
removeAssertVertices(rm, *g);
dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts",
cc.grey);
removeAssertVertices(rm, *built_expr.g, built_expr.expr);
return g;
return built_expr;
}
} // namespace ue2

View File

@ -35,8 +35,8 @@
#include "ue2common.h"
#include "database.h"
#include "compiler/expression_info.h"
#include "parser/Component.h"
#include "som/som.h"
#include <memory>
#include <boost/core/noncopyable.hpp>
@ -50,35 +50,32 @@ struct CompileContext;
struct Grey;
struct target_t;
class NG;
class NGHolder;
class ReportManager;
class NGWrapper;
/** Class gathering together the pieces of a parsed expression.
* Note: Owns the provided component.
*/
/** \brief Class gathering together the pieces of a parsed expression. */
class ParsedExpression : boost::noncopyable {
public:
ParsedExpression(unsigned index, const char *expression, unsigned flags,
ReportID actionId, const hs_expr_ext *ext = nullptr);
ReportID report, const hs_expr_ext *ext = nullptr);
bool utf8; //!< UTF-8 mode flag specified
/** \brief Expression information (from flags, extparam etc) */
ExpressionInfo expr;
/** \brief root node of parsed component tree. */
std::unique_ptr<ue2::Component> component;
/** \brief Root node of parsed component tree. */
std::unique_ptr<Component> component;
};
const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified
const bool highlander; //!< HS_FLAG_SINGLEMATCH specified
const bool prefilter; //!< HS_FLAG_PREFILTER specified
som_type som; //!< chosen SOM mode, or SOM_NONE
/**
* \brief Class gathering together the pieces of an expression that has been
* built into an NFA graph.
*/
struct BuiltExpression {
/** \brief Expression information (from flags, extparam etc) */
ExpressionInfo expr;
/** \brief index in expressions array passed to \ref hs_compile_multi */
const unsigned index;
const ReportID id; //!< user-specified pattern ID
u64a min_offset; //!< 0 if not used
u64a max_offset; //!< MAX_OFFSET if not used
u64a min_length; //!< 0 if not used
u32 edit_distance; //!< 0 if not used
/** \brief Built Glushkov NFA graph. */
std::unique_ptr<NGHolder> g;
};
/**
@ -95,12 +92,12 @@ public:
* @param ext
* Struct containing extra parameters for this expression, or NULL if
* none.
* @param actionId
* @param report
* The identifier to associate with the expression; returned by engine on
* match.
*/
void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID actionId);
unsigned flags, const hs_expr_ext *ext, ReportID report);
/**
* Build a Hyperscan database out of the expressions we've been given. A
@ -128,9 +125,8 @@ struct hs_database *build(NG &ng, unsigned int *length);
* @return
* nullptr on error.
*/
std::unique_ptr<NGWrapper> buildWrapper(ReportManager &rm,
const CompileContext &cc,
const ParsedExpression &expr);
BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
const ParsedExpression &expr);
/**
* Build a platform_t out of a target_t.

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* \brief ExpressionInfo class for storing the properties of an expression.
*/
#ifndef COMPILER_EXPRESSION_INFO_H
#define COMPILER_EXPRESSION_INFO_H
#include "ue2common.h"
#include "som/som.h"
namespace ue2 {
/** \brief Properties of an expression. */
class ExpressionInfo {
public:
ExpressionInfo(unsigned int index_in, bool allow_vacuous_in,
bool highlander_in, bool utf8_in, bool prefilter_in,
som_type som_in, ReportID report_in, u64a min_offset_in,
u64a max_offset_in, u64a min_length_in, u32 edit_distance_in)
: index(index_in), report(report_in), allow_vacuous(allow_vacuous_in),
highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in),
som(som_in), min_offset(min_offset_in), max_offset(max_offset_in),
min_length(min_length_in), edit_distance(edit_distance_in) {}
/**
* \brief Index of the expression represented by this graph.
*
* Used:
* - down the track in error handling;
* - for identifying parts of an expression in highlander mode.
*/
unsigned int index;
/** \brief Report ID specified by the user. */
ReportID report;
/** \brief Vacuous pattern is allowed. (HS_FLAG_ALLOWEMPTY) */
bool allow_vacuous;
/** \brief "Highlander" (single match) pattern. (HS_FLAG_SINGLEMATCH) */
bool highlander;
/** \brief UTF-8 pattern. (HS_FLAG_UTF8) */
bool utf8;
/** \brief Prefiltering pattern. (HS_FLAG_PREFILTER) */
bool prefilter;
/** \brief Start-of-match type requested, or SOM_NONE. */
som_type som;
/** \brief Minimum match offset extended parameter. 0 if not used. */
u64a min_offset;
/**
* \brief Maximum match offset extended parameter.
* MAX_OFFSET if not used.
*/
u64a max_offset;
/** \brief Minimum match length extended parameter. 0 if not used. */
u64a min_length;
/**
* \brief Approximate matching edit distance extended parameter.
* 0 if not used.
*/
u32 edit_distance;
};
}
#endif // COMPILER_EXPRESSION_INFO_H

View File

@ -369,11 +369,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
assert(pe.component);
// Apply prefiltering transformations if desired.
if (pe.prefilter) {
if (pe.expr.prefilter) {
prefilterTree(pe.component, ParseMode(flags));
}
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, pe);
auto built_expr = buildGraph(rm, cc, pe);
unique_ptr<NGHolder> &g = built_expr.g;
ExpressionInfo &expr = built_expr.expr;
if (!g) {
DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
@ -381,13 +383,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
}
// validate graph's suitability for fuzzing
validate_fuzzy_compile(*g, g->edit_distance, g->utf8, cc.grey);
validate_fuzzy_compile(*g, expr.edit_distance, expr.utf8, cc.grey);
// fuzz graph - this must happen before any transformations are made
make_fuzzy(*g, g->edit_distance, cc.grey);
make_fuzzy(*g, expr.edit_distance, cc.grey);
handleExtendedParams(rm, *g, cc);
fillExpressionInfo(rm, *g, &local_info);
handleExtendedParams(rm, *g, expr, cc);
fillExpressionInfo(rm, *g, expr, &local_info);
}
catch (const CompileError &e) {
// Compiler error occurred

View File

@ -27,10 +27,11 @@
*/
/** \file
* \brief NG, NGHolder, NGWrapper and graph handling.
* \brief NG and graph handling.
*/
#include "grey.h"
#include "ng.h"
#include "grey.h"
#include "ng_anchored_acyclic.h"
#include "ng_anchored_dots.h"
#include "ng_asserts.h"
@ -62,6 +63,7 @@
#include "ng_util.h"
#include "ng_width.h"
#include "ue2common.h"
#include "compiler/compiler.h"
#include "nfa/goughcompile.h"
#include "rose/rose_build.h"
#include "smallwrite/smallwrite_build.h"
@ -100,16 +102,16 @@ NG::~NG() {
* \throw CompileError if SOM cannot be supported for the component.
*/
static
bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
const som_type som, const u32 comp_id) {
DEBUG_PRINTF("doing som\n");
dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey);
dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey);
assert(hasCorrectlyNumberedVertices(g));
assert(allMatchStatesHaveReports(w));
assert(allMatchStatesHaveReports(g));
// First, we try the "SOM chain" support in ng_som.cpp.
sombe_rv rv = doSom(ng, g, w, comp_id, som);
sombe_rv rv = doSom(ng, g, expr, comp_id, som);
if (rv == SOMBE_HANDLED_INTERNAL) {
return false;
} else if (rv == SOMBE_HANDLED_ALL) {
@ -118,7 +120,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
assert(rv == SOMBE_FAIL);
/* Next, Sombe style approaches */
rv = doSomWithHaig(ng, g, w, comp_id, som);
rv = doSomWithHaig(ng, g, expr, comp_id, som);
if (rv == SOMBE_HANDLED_INTERNAL) {
return false;
} else if (rv == SOMBE_HANDLED_ALL) {
@ -132,7 +134,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
vector<vector<CharReach> > triggers; /* empty for outfix */
assert(g.kind == NFA_OUTFIX);
dumpComponent(g, "haig", w.expressionIndex, comp_id, ng.cc.grey);
dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey);
makeReportsSomPass(ng.rm, g);
auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
ng.cc.grey);
@ -145,7 +147,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
/* Our various strategies for supporting SOM for this pattern have failed.
* Provide a generic pattern not supported/too large return value as it is
* unclear what the meaning of a specific SOM error would be */
throw CompileError(w.expressionIndex, "Pattern is too large.");
throw CompileError(expr.index, "Pattern is too large.");
assert(0); // unreachable
return false;
@ -200,21 +202,21 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8,
}
static
bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
const u32 comp_id) {
bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
const som_type som, const u32 comp_id) {
const CompileContext &cc = ng.cc;
assert(hasCorrectlyNumberedVertices(g));
DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
w.expressionIndex, comp_id, num_vertices(g), num_edges(g));
expr.index, comp_id, num_vertices(g), num_edges(g));
dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey);
dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey);
assert(allMatchStatesHaveReports(w));
assert(allMatchStatesHaveReports(g));
reduceGraph(g, som, w.utf8, cc);
reduceGraph(g, som, expr.utf8, cc);
dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey);
dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey);
// There may be redundant regions that we can remove
if (cc.grey.performGraphSimplification) {
@ -231,12 +233,12 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
// Start Of Match handling.
if (som) {
if (addComponentSom(ng, g, w, som, comp_id)) {
if (addComponentSom(ng, g, expr, som, comp_id)) {
return true;
}
}
assert(allMatchStatesHaveReports(w));
assert(allMatchStatesHaveReports(g));
if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
return true;
@ -251,11 +253,11 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
return true;
}
if (doViolet(*ng.rose, g, w.prefilter, false, ng.rm, cc)) {
if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) {
return true;
}
if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) {
if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) {
return true;
}
@ -268,7 +270,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
return true;
}
if (doViolet(*ng.rose, g, w.prefilter, true, ng.rm, cc)) {
if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) {
return true;
}
@ -283,7 +285,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
// Returns true if all components have been added.
static
bool processComponents(NG &ng, NGWrapper &w,
bool processComponents(NG &ng, ExpressionInfo &expr,
deque<unique_ptr<NGHolder>> &g_comp,
const som_type som) {
const u32 num_components = g_comp.size();
@ -293,7 +295,7 @@ bool processComponents(NG &ng, NGWrapper &w,
if (!g_comp[i]) {
continue;
}
if (addComponent(ng, *g_comp[i], w, som, i)) {
if (addComponent(ng, *g_comp[i], expr, som, i)) {
g_comp[i].reset();
continue;
}
@ -313,48 +315,48 @@ bool processComponents(NG &ng, NGWrapper &w,
return false;
}
bool NG::addGraph(NGWrapper &w) {
bool NG::addGraph(ExpressionInfo &expr, NGHolder &g) {
// remove reports that aren't on vertices connected to accept.
clearReports(w);
clearReports(g);
som_type som = w.som;
if (som && isVacuous(w)) {
throw CompileError(w.expressionIndex, "Start of match is not "
som_type som = expr.som;
if (som && isVacuous(g)) {
throw CompileError(expr.index, "Start of match is not "
"currently supported for patterns which match an "
"empty buffer.");
}
dumpDotWrapper(w, "01_initial", cc.grey);
assert(allMatchStatesHaveReports(w));
dumpDotWrapper(g, expr, "01_initial", cc.grey);
assert(allMatchStatesHaveReports(g));
/* ensure utf8 starts at cp boundary */
ensureCodePointStart(rm, w);
ensureCodePointStart(rm, g, expr);
if (can_never_match(w)) {
throw CompileError(w.expressionIndex, "Pattern can never match.");
if (can_never_match(g)) {
throw CompileError(expr.index, "Pattern can never match.");
}
// validate graph's suitability for fuzzing before resolving asserts
validate_fuzzy_compile(w, w.edit_distance, w.utf8, cc.grey);
validate_fuzzy_compile(g, expr.edit_distance, expr.utf8, cc.grey);
resolveAsserts(rm, w);
dumpDotWrapper(w, "02_post_assert_resolve", cc.grey);
assert(allMatchStatesHaveReports(w));
resolveAsserts(rm, g, expr);
dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey);
assert(allMatchStatesHaveReports(g));
make_fuzzy(w, w.edit_distance, cc.grey);
dumpDotWrapper(w, "02a_post_fuzz", cc.grey);
make_fuzzy(g, expr.edit_distance, cc.grey);
dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey);
pruneUseless(w);
pruneEmptyVertices(w);
pruneUseless(g);
pruneEmptyVertices(g);
if (can_never_match(w)) {
throw CompileError(w.expressionIndex, "Pattern can never match.");
if (can_never_match(g)) {
throw CompileError(expr.index, "Pattern can never match.");
}
optimiseVirtualStarts(w); /* good for som */
optimiseVirtualStarts(g); /* good for som */
handleExtendedParams(rm, w, cc);
if (w.min_length) {
handleExtendedParams(rm, g, expr, cc);
if (expr.min_length) {
// We have a minimum length constraint, which we currently use SOM to
// satisfy.
som = SOM_LEFT;
@ -368,70 +370,70 @@ bool NG::addGraph(NGWrapper &w) {
// first, we can perform graph work that can be done on an individual
// expression basis.
if (w.utf8) {
relaxForbiddenUtf8(w);
if (expr.utf8) {
relaxForbiddenUtf8(g, expr);
}
if (w.highlander && !w.min_length && !w.min_offset) {
if (expr.highlander && !expr.min_length && !expr.min_offset) {
// In highlander mode: if we don't have constraints on our reports that
// may prevent us accepting our first match (i.e. extended params) we
// can prune the other out-edges of all vertices connected to accept.
pruneHighlanderAccepts(w, rm);
pruneHighlanderAccepts(g, rm);
}
dumpDotWrapper(w, "02b_fairly_early", cc.grey);
dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey);
// If we're a vacuous pattern, we can handle this early.
if (splitOffVacuous(boundary, rm, w)) {
if (splitOffVacuous(boundary, rm, g, expr)) {
DEBUG_PRINTF("split off vacuous\n");
}
// We might be done at this point: if we've run out of vertices, we can
// stop processing.
if (num_vertices(w) == N_SPECIALS) {
if (num_vertices(g) == N_SPECIALS) {
DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
return true;
}
// Now that vacuous edges have been removed, update the min width exclusive
// of boundary reports.
minWidth = min(minWidth, findMinWidth(w));
minWidth = min(minWidth, findMinWidth(g));
// Add the pattern to the small write builder.
smwr->add(w);
smwr->add(g, expr);
if (!som) {
removeSiblingsOfStartDotStar(w);
removeSiblingsOfStartDotStar(g);
}
dumpDotWrapper(w, "03_early", cc.grey);
dumpDotWrapper(g, expr, "03_early", cc.grey);
// Perform a reduction pass to merge sibling character classes together.
if (cc.grey.performGraphSimplification) {
removeRedundancy(w, som);
prunePathsRedundantWithSuccessorOfCyclics(w, som);
removeRedundancy(g, som);
prunePathsRedundantWithSuccessorOfCyclics(g, som);
}
dumpDotWrapper(w, "04_reduced", cc.grey);
dumpDotWrapper(g, expr, "04_reduced", cc.grey);
// If we've got some literals that span the graph from start to accept, we
// can split them off into Rose from here.
if (!som) {
if (splitOffLiterals(*this, w)) {
if (splitOffLiterals(*this, g)) {
DEBUG_PRINTF("some vertices claimed by literals\n");
}
}
// We might be done at this point: if we've run out of vertices, we can
// stop processing.
if (num_vertices(w) == N_SPECIALS) {
if (num_vertices(g) == N_SPECIALS) {
DEBUG_PRINTF("all vertices claimed before calc components\n");
return true;
}
// Split the graph into a set of connected components.
deque<unique_ptr<NGHolder>> g_comp = calcComponents(w);
deque<unique_ptr<NGHolder>> g_comp = calcComponents(g);
assert(!g_comp.empty());
if (!som) {
@ -443,14 +445,14 @@ bool NG::addGraph(NGWrapper &w) {
recalcComponents(g_comp);
}
if (processComponents(*this, w, g_comp, som)) {
if (processComponents(*this, expr, g_comp, som)) {
return true;
}
// If we're in prefiltering mode, we can run the prefilter reductions and
// have another shot at accepting the graph.
if (cc.grey.prefilterReductions && w.prefilter) {
if (cc.grey.prefilterReductions && expr.prefilter) {
for (u32 i = 0; i < g_comp.size(); i++) {
if (!g_comp[i]) {
continue;
@ -459,7 +461,7 @@ bool NG::addGraph(NGWrapper &w) {
prefilterReductions(*g_comp[i], cc);
}
if (processComponents(*this, w, g_comp, som)) {
if (processComponents(*this, expr, g_comp, som)) {
return true;
}
}
@ -469,7 +471,7 @@ bool NG::addGraph(NGWrapper &w) {
if (g_comp[i]) {
DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
i, num_vertices(*g_comp[i]));
throw CompileError(w.expressionIndex, "Pattern is too large.");
throw CompileError(expr.index, "Pattern is too large.");
}
}
@ -478,60 +480,60 @@ bool NG::addGraph(NGWrapper &w) {
}
/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
bool NG::addHolder(NGHolder &w) {
DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(w));
assert(allMatchStatesHaveReports(w));
assert(hasCorrectlyNumberedVertices(w));
bool NG::addHolder(NGHolder &g) {
DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g));
assert(allMatchStatesHaveReports(g));
assert(hasCorrectlyNumberedVertices(g));
/* We don't update the global minWidth here as we care about the min width
* of the whole pattern - not a just a prefix of it. */
bool prefilter = false;
//dumpDotComp(comp, w, *this, 20, "prefix_init");
//dumpDotComp(comp, g, *this, 20, "prefix_init");
som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
themselves track som */
bool utf8 = false; // handling done earlier
reduceGraph(w, som, utf8, cc);
reduceGraph(g, som, utf8, cc);
// There may be redundant regions that we can remove
if (cc.grey.performGraphSimplification) {
removeRegionRedundancy(w, som);
removeRegionRedundancy(g, som);
}
// "Short Exhaustible Passthrough" patterns always become outfixes.
if (isSEP(w, rm, cc.grey)) {
if (isSEP(g, rm, cc.grey)) {
DEBUG_PRINTF("graph is SEP\n");
if (rose->addOutfix(w)) {
if (rose->addOutfix(g)) {
return true;
}
}
if (splitOffAnchoredAcyclic(*rose, w, cc)) {
if (splitOffAnchoredAcyclic(*rose, g, cc)) {
return true;
}
if (handleSmallLiteralSets(*rose, w, cc)
|| handleFixedWidth(*rose, w, cc.grey)) {
if (handleSmallLiteralSets(*rose, g, cc)
|| handleFixedWidth(*rose, g, cc.grey)) {
return true;
}
if (handleDecoratedLiterals(*rose, w, cc)) {
if (handleDecoratedLiterals(*rose, g, cc)) {
return true;
}
if (doViolet(*rose, w, prefilter, false, rm, cc)) {
if (doViolet(*rose, g, prefilter, false, rm, cc)) {
return true;
}
if (splitOffPuffs(*rose, rm, w, prefilter, cc)) {
if (splitOffPuffs(*rose, rm, g, prefilter, cc)) {
return true;
}
if (doViolet(*rose, w, prefilter, true, rm, cc)) {
if (doViolet(*rose, g, prefilter, true, rm, cc)) {
return true;
}
DEBUG_PRINTF("trying for outfix\n");
if (rose->addOutfix(w)) {
if (rose->addOutfix(g)) {
DEBUG_PRINTF("ok\n");
return true;
}
@ -586,26 +588,4 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
return true;
}
NGWrapper::NGWrapper(unsigned int ei, bool highlander_in, bool utf8_in,
bool prefilter_in, som_type som_in, ReportID r,
u64a min_offset_in, u64a max_offset_in, u64a min_length_in,
u32 edit_distance_in)
: expressionIndex(ei), reportId(r), highlander(highlander_in),
utf8(utf8_in), prefilter(prefilter_in), som(som_in),
min_offset(min_offset_in), max_offset(max_offset_in),
min_length(min_length_in), edit_distance(edit_distance_in) {
// All special nodes/edges are added in NGHolder's constructor.
DEBUG_PRINTF("built %p: expr=%u report=%u%s%s%s%s "
"min_offset=%llu max_offset=%llu min_length=%llu "
"edit_distance=%u\n",
this, expressionIndex, reportId,
highlander ? " highlander" : "",
utf8 ? " utf8" : "",
prefilter ? " prefilter" : "",
(som != SOM_NONE) ? " som" : "",
min_offset, max_offset, min_length, edit_distance);
}
NGWrapper::~NGWrapper() {}
} // namespace ue2

View File

@ -27,7 +27,7 @@
*/
/** \file
* \brief NG, NGHolder, NGWrapper declarations.
* \brief NG declaration.
*/
#ifndef NG_H
@ -58,31 +58,7 @@ namespace ue2 {
struct CompileContext;
struct ue2_literal;
class NGWrapper : public NGHolder {
public:
NGWrapper(unsigned int expressionIndex, bool highlander, bool utf8,
bool prefilter, const som_type som, ReportID rid, u64a min_offset,
u64a max_offset, u64a min_length, u32 edit_distance);
~NGWrapper() override;
/** index of the expression represented by this graph, used
* - down the track in error handling
* - identifying parts of an expression in highlander mode
*/
const unsigned int expressionIndex;
const ReportID reportId; /**< user-visible report id */
const bool highlander; /**< user-specified single match only */
const bool utf8; /**< UTF-8 mode */
const bool prefilter; /**< prefiltering mode */
const som_type som; /**< SOM type requested */
u64a min_offset; /**< extparam min_offset value */
u64a max_offset; /**< extparam max_offset value */
u64a min_length; /**< extparam min_length value */
u32 edit_distance; /**< extparam edit_distance value */
};
class ExpressionInfo;
class RoseBuild;
class SmallWriteBuild;
@ -94,14 +70,14 @@ public:
/** \brief Consumes a pattern, returns false or throws a CompileError
* exception if the graph cannot be consumed. */
bool addGraph(NGWrapper &w);
bool addGraph(ExpressionInfo &expr, NGHolder &h);
/** \brief Consumes a graph, cut-down version of addGraph for use by SOM
* processing. */
bool addHolder(NGHolder &h);
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead of
* using \ref addGraph) */
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead
* of using \ref addGraph) */
bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
bool highlander, som_type som);
@ -128,7 +104,8 @@ public:
*
* Shared with the small write compiler.
*/
void reduceGraph(NGHolder &g, som_type som, bool utf8, const CompileContext &cc);
void reduceGraph(NGHolder &g, som_type som, bool utf8,
const CompileContext &cc);
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -47,6 +47,7 @@
#include "ng_prune.h"
#include "ng_redundancy.h"
#include "ng_util.h"
#include "compiler/compiler.h"
#include "parser/position.h" // for POS flags
#include "util/bitutils.h" // for findAndClearLSB_32
#include "util/boundary_reports.h"
@ -184,43 +185,45 @@ void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
}
static
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, s32 adj) {
// Don't try and set the report ID of a special vertex.
assert(!is_special(v, g));
// If there's a report set already, we're replacing it.
g[v].reports.clear();
Report ir = rm.getBasicInternalReport(g, adj);
Report ir = rm.getBasicInternalReport(expr, adj);
g[v].reports.insert(rm.getInternalId(ir));
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
}
static
NFAVertex makeClone(ReportManager &rm, NGWrapper &g, NFAVertex v,
const CharReach &cr_mask) {
NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, const CharReach &cr_mask) {
NFAVertex clone = clone_vertex(g, v);
g[clone].char_reach &= cr_mask;
clone_out_edges(g, v, clone);
clone_in_edges(g, v, clone);
if (v == g.startDs) {
if (g.utf8) {
if (expr.utf8) {
g[clone].char_reach &= ~UTF_START_CR;
}
DEBUG_PRINTF("marked as virt\n");
g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
setReportId(rm, g, clone, 0);
setReportId(rm, g, expr, clone, 0);
}
return clone;
}
static
void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, bool ucp) {
assert(v != g.start);
assert(v != g.accept);
assert(v != g.acceptEod);
@ -232,14 +235,14 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
// Split v into word/nonword vertices with only asserting out-edges.
NFAVertex w_out = makeClone(rm, g, v, cr_word);
NFAVertex nw_out = makeClone(rm, g, v, cr_nonword);
NFAVertex w_out = makeClone(rm, g, expr, v, cr_word);
NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword);
remove_out_edge_if(w_out, has_no_assert, g);
remove_out_edge_if(nw_out, has_no_assert, g);
// Split v into word/nonword vertices with only asserting in-edges.
NFAVertex w_in = makeClone(rm, g, v, cr_word);
NFAVertex nw_in = makeClone(rm, g, v, cr_nonword);
NFAVertex w_in = makeClone(rm, g, expr, v, cr_word);
NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword);
remove_in_edge_if(w_in, has_no_assert, g);
remove_in_edge_if(nw_in, has_no_assert, g);
@ -250,7 +253,8 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
}
static
void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
set<NFAEdge> *dead) {
for (const auto &e : edges_range(g)) {
u32 flags = g[e].assert_flags;
if (!flags) {
@ -363,7 +367,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
} else if (v_w) {
/* need to add a word byte */
NFAVertex vv = add_vertex(g);
setReportId(rm, g, vv, -1);
setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_WORD;
add_edge(vv, g.accept, g);
g[e].assert_flags = 0;
@ -372,7 +376,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
} else {
/* need to add a non word byte or see eod */
NFAVertex vv = add_vertex(g);
setReportId(rm, g, vv, -1);
setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_NONWORD;
add_edge(vv, g.accept, g);
g[e].assert_flags = 0;
@ -416,7 +420,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
} else if (v_w) {
/* need to add a word byte */
NFAVertex vv = add_vertex(g);
setReportId(rm, g, vv, -1);
setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
add_edge(vv, g.accept, g);
g[e].assert_flags = 0;
@ -425,7 +429,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
} else {
/* need to add a non word byte or see eod */
NFAVertex vv = add_vertex(g);
setReportId(rm, g, vv, -1);
setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
add_edge(vv, g.accept, g);
g[e].assert_flags = 0;
@ -450,7 +454,8 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
}
}
void resolveAsserts(ReportManager &rm, NGWrapper &g) {
void resolveAsserts(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
vector<NFAEdge> asserts = getAsserts(g);
if (asserts.empty()) {
return;
@ -460,20 +465,20 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) {
map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
findSplitters(g, asserts, &to_split, &to_split_ucp);
if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
throw CompileError(g.expressionIndex, "Pattern is too large.");
throw CompileError(expr.index, "Pattern is too large.");
}
for (const auto &m : to_split) {
assert(!contains(to_split_ucp, m.first));
splitVertex(rm, g, m.second, false);
splitVertex(rm, g, expr, m.second, false);
}
for (const auto &m : to_split_ucp) {
splitVertex(rm, g, m.second, true);
splitVertex(rm, g, expr, m.second, true);
}
set<NFAEdge> dead;
resolveEdges(rm, g, &dead);
resolveEdges(rm, g, expr, &dead);
remove_edges(dead, g);
renumber_vertices(g);
@ -485,15 +490,16 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) {
clearReports(g);
}
void ensureCodePointStart(ReportManager &rm, NGWrapper &g) {
void ensureCodePointStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
/* In utf8 mode there is an implicit assertion that we start at codepoint
* boundaries. Assert resolution handles the badness coming from asserts.
* The only other source of trouble is startDs->accept connections.
*/
NFAEdge orig = edge(g.startDs, g.accept, g);
if (g.utf8 && orig) {
DEBUG_PRINTF("rectifying %u\n", g.reportId);
Report ir = rm.getBasicInternalReport(g);
if (expr.utf8 && orig) {
DEBUG_PRINTF("rectifying %u\n", expr.report);
Report ir = rm.getBasicInternalReport(expr);
ReportID rep = rm.getInternalId(ir);
NFAVertex v_a = add_vertex(g);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -36,12 +36,14 @@
namespace ue2 {
struct BoundaryReports;
class NGWrapper;
class ExpressionInfo;
class NGHolder;
class ReportManager;
void resolveAsserts(ReportManager &rm, NGWrapper &g);
void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr);
void ensureCodePointStart(ReportManager &rm, NGWrapper &g);
void ensureCodePointStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr);
} // namespace ue2

View File

@ -28,11 +28,13 @@
/** \file
* \brief: NFA Graph Builder: used by Glushkov construction to construct an
* NGWrapper from a parsed expression.
* NGHolder from a parsed expression.
*/
#include "ng_builder.h"
#include "grey.h"
#include "ng.h"
#include "ng_builder.h"
#include "ng_util.h"
#include "ue2common.h"
#include "compiler/compiler.h" // for ParsedExpression
@ -79,7 +81,7 @@ public:
void cloneRegion(Position first, Position last,
unsigned posOffset) override;
unique_ptr<NGWrapper> getGraph() override;
BuiltExpression getGraph() override;
private:
/** fetch a vertex given its Position ID. */
@ -94,8 +96,11 @@ private:
/** \brief Greybox: used for resource limits. */
const Grey &grey;
/** \brief Underlying NGWrapper graph. */
unique_ptr<NGWrapper> graph;
/** \brief Underlying graph. */
unique_ptr<NGHolder> graph;
/** \brief Underlying expression info. */
ExpressionInfo expr;
/** \brief mapping from position to vertex. Use \ref getVertex for access.
* */
@ -108,13 +113,9 @@ private:
} // namespace
NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in,
const ParsedExpression &expr)
: rm(rm_in), grey(grey_in),
graph(ue2::make_unique<NGWrapper>(
expr.index, expr.highlander, expr.utf8, expr.prefilter, expr.som,
expr.id, expr.min_offset, expr.max_offset, expr.min_length,
expr.edit_distance)),
vertIdx(N_SPECIALS) {
const ParsedExpression &parsed)
: rm(rm_in), grey(grey_in), graph(ue2::make_unique<NGHolder>()),
expr(parsed.expr), vertIdx(N_SPECIALS) {
// Reserve space for a reasonably-sized NFA
id2vertex.reserve(64);
@ -151,7 +152,7 @@ void NFABuilderImpl::addVertex(Position pos) {
(*graph)[v].index = pos;
}
unique_ptr<NGWrapper> NFABuilderImpl::getGraph() {
BuiltExpression NFABuilderImpl::getGraph() {
DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n",
num_vertices(*graph), num_edges(*graph));
@ -162,13 +163,13 @@ unique_ptr<NGWrapper> NFABuilderImpl::getGraph() {
throw CompileError("Pattern too large.");
}
return move(graph);
return { expr, move(graph) };
}
void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) {
Report ir = rm.getBasicInternalReport(*graph, offsetAdjust);
Report ir = rm.getBasicInternalReport(expr, offsetAdjust);
DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n",
pos, graph->reportId, offsetAdjust, ir.ekey);
pos, expr.report, offsetAdjust, ir.ekey);
NFAVertex v = getVertex(pos);
auto &reports = (*graph)[v].reports;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -28,7 +28,7 @@
/** \file
* \brief: NFA Graph Builder: used by Glushkov construction to construct an
* NGWrapper from a parsed expression.
* NGHolder from a parsed expression.
*/
#ifndef NG_BUILDER_H
@ -44,8 +44,8 @@
namespace ue2 {
class CharReach;
class NGWrapper;
class ReportManager;
struct BuiltExpression;
struct CompileContext;
class ParsedExpression;
@ -83,10 +83,10 @@ public:
unsigned posOffset) = 0;
/**
* \brief Returns the built NGWrapper graph.
* \brief Returns the built NGHolder graph and ExpressionInfo.
* Note that this builder cannot be used after this call.
*/
virtual std::unique_ptr<NGWrapper> getGraph() = 0;
virtual BuiltExpression getGraph() = 0;
};
/** Construct a usable NFABuilder. */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -35,24 +35,25 @@
#include "config.h"
#include "ng_dump.h"
#include "nfagraph/ng_dump.h"
#include "hwlm/hwlm_build.h"
#include "ng.h"
#include "ng_util.h"
#include "parser/position.h"
#include "hs_compile.h" /* for HS_MODE_* flags */
#include "ue2common.h"
#include "compiler/compiler.h"
#include "hwlm/hwlm_build.h"
#include "nfa/accel.h"
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
#include "smallwrite/smallwrite_dump.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_util.h"
#include "parser/position.h"
#include "rose/rose_build.h"
#include "rose/rose_internal.h"
#include "smallwrite/smallwrite_dump.h"
#include "util/bitutils.h"
#include "util/dump_charclass.h"
#include "util/report.h"
#include "util/report_manager.h"
#include "util/ue2string.h"
#include "hs_compile.h" /* for HS_MODE_* flags */
#include <cmath>
#include <fstream>
@ -287,13 +288,13 @@ void dumpGraphImpl(const char *name, const GraphT &g,
// manual instantiation of templated dumpGraph above.
template void dumpGraphImpl(const char *, const NGHolder &);
void dumpDotWrapperImpl(const NGWrapper &nw, const char *name,
const Grey &grey) {
void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
const char *name, const Grey &grey) {
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
stringstream ss;
ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot";
ss << grey.dumpPath << "Expr_" << expr.index << "_" << name << ".dot";
DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str());
dumpGraphImpl(ss.str().c_str(), nw);
dumpGraphImpl(ss.str().c_str(), g);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -48,7 +48,7 @@ namespace ue2 {
class NGHolder;
class NG;
class NGWrapper;
class ExpressionInfo;
class ReportManager;
// Implementations for stubs below -- all have the suffix "Impl".
@ -61,7 +61,8 @@ void dumpGraphImpl(const char *name, const GraphT &g);
template <typename GraphT>
void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm);
void dumpDotWrapperImpl(const NGWrapper &w, const char *name, const Grey &grey);
void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
const char *name, const Grey &grey);
void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp,
const Grey &grey);
@ -88,10 +89,10 @@ static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) {
// Stubs which call through to dump code if compiled in.
UNUSED static inline
void dumpDotWrapper(UNUSED const NGWrapper &w, UNUSED const char *name,
UNUSED const Grey &grey) {
void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr,
UNUSED const char *name, UNUSED const Grey &grey) {
#ifdef DUMP_SUPPORT
dumpDotWrapperImpl(w, name, grey);
dumpDotWrapperImpl(g, expr, name, grey);
#endif
}

View File

@ -27,8 +27,8 @@
*/
/** \file
* \brief Code for discovering properties of an NGWrapper used by
* hs_expression_info.
* \brief Code for discovering properties of an NFA graph used by
* hs_expression_info().
*/
#include "ng_expr_info.h"
@ -58,42 +58,42 @@ namespace ue2 {
/* get rid of leading \b and multiline ^ vertices */
static
void removeLeadingVirtualVerticesFromRoot(NGWrapper &w, NFAVertex root) {
void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) {
vector<NFAVertex> victims;
for (auto v : adjacent_vertices_range(root, w)) {
if (w[v].assert_flags & POS_FLAG_VIRTUAL_START) {
for (auto v : adjacent_vertices_range(root, g)) {
if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
victims.push_back(v);
}
}
for (auto u : victims) {
for (auto v : adjacent_vertices_range(u, w)) {
add_edge_if_not_present(root, v, w);
for (auto v : adjacent_vertices_range(u, g)) {
add_edge_if_not_present(root, v, g);
}
}
remove_vertices(victims, w);
remove_vertices(victims, g);
}
static
void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v,
const vector<DepthMinMax> &depths, DepthMinMax &info) {
if (is_any_accept(v, w)) {
if (is_any_accept(v, g)) {
return;
}
if (is_any_start(v, w)) {
if (is_any_start(v, g)) {
info.min = 0;
info.max = max(info.max, depth(0));
return;
}
u32 idx = w[v].index;
u32 idx = g[v].index;
assert(idx < depths.size());
const DepthMinMax &d = depths.at(idx);
for (ReportID report_id : w[v].reports) {
for (ReportID report_id : g[v].reports) {
const Report &report = rm.getReport(report_id);
assert(report.type == EXTERNAL_CALLBACK);
@ -118,7 +118,7 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
rd.max = min(rd.max, max_offset);
}
DEBUG_PRINTF("vertex %zu report %u: %s\n", w[v].index, report_id,
DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id,
rd.str().c_str());
info = unionDepthMinMax(info, rd);
@ -126,8 +126,8 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
}
static
bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) {
for (const auto &report_id : all_reports(w)) {
bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) {
for (const auto &report_id : all_reports(g)) {
if (rm.getReport(report_id).offsetAdjust) {
return true;
}
@ -135,28 +135,29 @@ bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) {
return false;
}
void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) {
void fillExpressionInfo(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr, hs_expr_info *info) {
assert(info);
/* ensure utf8 starts at cp boundary */
ensureCodePointStart(rm, w);
resolveAsserts(rm, w);
optimiseVirtualStarts(w);
ensureCodePointStart(rm, g, expr);
resolveAsserts(rm, g, expr);
optimiseVirtualStarts(g);
removeLeadingVirtualVerticesFromRoot(w, w.start);
removeLeadingVirtualVerticesFromRoot(w, w.startDs);
removeLeadingVirtualVerticesFromRoot(g, g.start);
removeLeadingVirtualVerticesFromRoot(g, g.startDs);
vector<DepthMinMax> depths;
calcDepthsFrom(w, w.start, depths);
calcDepthsFrom(g, g.start, depths);
DepthMinMax d;
for (auto u : inv_adjacent_vertices_range(w.accept, w)) {
checkVertex(rm, w, u, depths, d);
for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
checkVertex(rm, g, u, depths, d);
}
for (auto u : inv_adjacent_vertices_range(w.acceptEod, w)) {
checkVertex(rm, w, u, depths, d);
for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) {
checkVertex(rm, g, u, depths, d);
}
if (d.max.is_finite()) {
@ -170,9 +171,9 @@ void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) {
info->min_width = UINT_MAX;
}
info->unordered_matches = hasOffsetAdjust(rm, w);
info->matches_at_eod = can_match_at_eod(w);
info->matches_only_at_eod = can_only_match_at_eod(w);
info->unordered_matches = hasOffsetAdjust(rm, g);
info->matches_at_eod = can_match_at_eod(g);
info->matches_only_at_eod = can_only_match_at_eod(g);
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -27,7 +27,7 @@
*/
/** \file
* \brief Code for discovering properties of an NGWrapper used by
* \brief Code for discovering properties of an expression used by
* hs_expression_info.
*/
@ -36,14 +36,14 @@
struct hs_expr_info;
#include "ue2common.h"
namespace ue2 {
class NGWrapper;
class ExpressionInfo;
class NGHolder;
class ReportManager;
void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info);
void fillExpressionInfo(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr, hs_expr_info *info);
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -38,16 +38,19 @@
* match given these constraints, or transform the graph in order to make a
* constraint implicit.
*/
#include "ng_extparam.h"
#include "ng.h"
#include "ng_depth.h"
#include "ng_dump.h"
#include "ng_extparam.h"
#include "ng_prune.h"
#include "ng_reports.h"
#include "ng_som_util.h"
#include "ng_width.h"
#include "ng_util.h"
#include "ue2common.h"
#include "compiler/compiler.h"
#include "parser/position.h"
#include "util/compile_context.h"
#include "util/compile_error.h"
@ -129,7 +132,8 @@ DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
/** \brief Replace the graph's reports with new reports that specify bounds. */
static
void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept,
void updateReportBounds(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr, NFAVertex accept,
set<NFAVertex> &done) {
for (auto v : inv_adjacent_vertices_range(accept, g)) {
// Don't operate on g.accept itself.
@ -153,16 +157,16 @@ void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept,
// Note that we need to cope with offset adjustment here.
ir.minOffset = g.min_offset - ir.offsetAdjust;
if (g.max_offset == MAX_OFFSET) {
ir.minOffset = expr.min_offset - ir.offsetAdjust;
if (expr.max_offset == MAX_OFFSET) {
ir.maxOffset = MAX_OFFSET;
} else {
ir.maxOffset = g.max_offset - ir.offsetAdjust;
ir.maxOffset = expr.max_offset - ir.offsetAdjust;
}
assert(ir.maxOffset >= ir.minOffset);
ir.minLength = g.min_length;
if (g.min_length && !g.som) {
ir.minLength = expr.min_length;
if (expr.min_length && !expr.som) {
ir.quashSom = true;
}
@ -196,22 +200,23 @@ bool hasVirtualStarts(const NGHolder &g) {
* anchored and unanchored paths, but it's too tricky for the moment.
*/
static
bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
bool anchorPatternWithBoundedRepeat(NGHolder &g, const ExpressionInfo &expr,
const depth &minWidth,
const depth &maxWidth) {
assert(!g.som);
assert(g.max_offset != MAX_OFFSET);
assert(!expr.som);
assert(expr.max_offset != MAX_OFFSET);
assert(minWidth <= maxWidth);
assert(maxWidth.is_reachable());
DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset,
g.max_offset);
minWidth.str().c_str(), maxWidth.str().c_str(),
expr.min_offset, expr.max_offset);
if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
if (expr.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
return false;
}
if (g.max_offset < minWidth) {
if (expr.max_offset < minWidth) {
assert(0);
return false;
}
@ -232,10 +237,10 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
u32 min_bound, max_bound;
if (maxWidth.is_infinite()) {
min_bound = 0;
max_bound = g.max_offset - minWidth;
max_bound = expr.max_offset - minWidth;
} else {
min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0;
max_bound = g.max_offset - minWidth;
min_bound = expr.min_offset > maxWidth ? expr.min_offset - maxWidth : 0;
max_bound = expr.max_offset - minWidth;
}
DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
@ -315,7 +320,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) {
}
static
bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g,
bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g,
int *adjust) {
const auto &reports = all_reports(g);
if (reports.empty()) {
@ -342,10 +347,11 @@ bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g,
* /foo.*bar/{min_length=100} --> /foo.{94,}bar/
*/
static
bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
assert(g.min_length);
bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g,
ExpressionInfo &expr) {
assert(expr.min_length);
if (g.min_length > MAX_MINLENGTH_TO_CONVERT) {
if (expr.min_length > MAX_MINLENGTH_TO_CONVERT) {
return false;
}
@ -437,10 +443,10 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width,
g[cyclic].index);
if (width >= g.min_length) {
if (width >= expr.min_length) {
DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
g.min_length, width);
g.min_length = 0;
expr.min_length, width);
expr.min_length = 0;
return true;
}
@ -468,7 +474,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
const CharReach &cr = g[cyclic].char_reach;
for (u32 i = 0; i < g.min_length - width - 1; ++i) {
for (u32 i = 0; i < expr.min_length - width - 1; ++i) {
v = add_vertex(g);
g[v].char_reach = cr;
@ -487,19 +493,19 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
renumber_edges(g);
clearReports(g);
g.min_length = 0;
expr.min_length = 0;
return true;
}
static
bool hasExtParams(const NGWrapper &g) {
if (g.min_length != 0) {
bool hasExtParams(const ExpressionInfo &expr) {
if (expr.min_length != 0) {
return true;
}
if (g.min_offset != 0) {
if (expr.min_offset != 0) {
return true;
}
if (g.max_offset != MAX_OFFSET) {
if (expr.max_offset != MAX_OFFSET) {
return true;
}
return false;
@ -535,7 +541,7 @@ const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
}
static
bool isEdgePrunable(const NGWrapper &g,
bool isEdgePrunable(const NGHolder &g, const ExpressionInfo &expr,
const vector<NFAVertexBidiDepth> &depths,
const NFAEdge &e) {
const NFAVertex u = source(e, g);
@ -564,29 +570,29 @@ bool isEdgePrunable(const NGWrapper &g,
const NFAVertexBidiDepth &du = depths.at(u_idx);
const NFAVertexBidiDepth &dv = depths.at(v_idx);
if (g.min_offset) {
if (expr.min_offset) {
depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv);
if (max_offset.is_finite() && max_offset < g.min_offset) {
if (max_offset.is_finite() && max_offset < expr.min_offset) {
DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
return true;
}
}
if (g.max_offset != MAX_OFFSET) {
if (expr.max_offset != MAX_OFFSET) {
depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
assert(min_offset.is_finite());
if (min_offset > g.max_offset) {
if (min_offset > expr.max_offset) {
DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
return true;
}
}
if (g.min_length && is_any_accept(v, g)) {
if (expr.min_length && is_any_accept(v, g)) {
// Simple take on min_length. If we're an edge to accept and our max
// dist from start is too small, we can be pruned.
const depth &width = du.fromStart.max;
if (width.is_finite() && width < g.min_length) {
if (width.is_finite() && width < expr.min_length) {
DEBUG_PRINTF("max width %s from start too small for min_length\n",
width.str().c_str());
return true;
@ -597,14 +603,14 @@ bool isEdgePrunable(const NGWrapper &g,
}
static
void pruneExtUnreachable(NGWrapper &g) {
void pruneExtUnreachable(NGHolder &g, const ExpressionInfo &expr) {
vector<NFAVertexBidiDepth> depths;
calcDepths(g, depths);
vector<NFAEdge> dead;
for (const auto &e : edges_range(g)) {
if (isEdgePrunable(g, depths, e)) {
if (isEdgePrunable(g, expr, depths, e)) {
DEBUG_PRINTF("pruning\n");
dead.push_back(e);
}
@ -621,8 +627,8 @@ void pruneExtUnreachable(NGWrapper &g) {
/** Remove vacuous edges in graphs where the min_offset or min_length
* constraints dictate that they can never produce a match. */
static
void pruneVacuousEdges(NGWrapper &g) {
if (!g.min_length && !g.min_offset) {
void pruneVacuousEdges(NGHolder &g, const ExpressionInfo &expr) {
if (!expr.min_length && !expr.min_offset) {
return;
}
@ -634,14 +640,14 @@ void pruneVacuousEdges(NGWrapper &g) {
// Special case: Crudely remove vacuous edges from start in graphs with a
// min_offset.
if (g.min_offset && u == g.start && is_any_accept(v, g)) {
if (expr.min_offset && u == g.start && is_any_accept(v, g)) {
DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
dead.push_back(e);
continue;
}
// If a min_length is set, vacuous edges can be removed.
if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
if (expr.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
dead.push_back(e);
continue;
@ -657,7 +663,8 @@ void pruneVacuousEdges(NGWrapper &g) {
}
static
void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr,
const vector<DepthMinMax> &depths,
const ReportManager &rm, NFAVertex accept) {
vector<NFAEdge> dead;
@ -676,16 +683,16 @@ void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
d.min += adj.first;
d.max += adj.second;
if (d.max.is_finite() && d.max < g.min_length) {
if (d.max.is_finite() && d.max < expr.min_length) {
DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
d.max.str().c_str(), g.min_length);
d.max.str().c_str(), expr.min_length);
dead.push_back(e);
continue;
}
if (g.max_offset != MAX_OFFSET && d.min > g.max_offset) {
if (expr.max_offset != MAX_OFFSET && d.min > expr.max_offset) {
DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
d.min.str().c_str(), g.max_offset);
d.min.str().c_str(), expr.max_offset);
dead.push_back(e);
continue;
}
@ -697,15 +704,16 @@ void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
/** Remove edges to accepts that can never produce a match long enough to
* satisfy our min_length and max_offset constraints. */
static
void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) {
if (!g.min_length) {
void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr,
const ReportManager &rm) {
if (!expr.min_length) {
return;
}
vector<DepthMinMax> depths = getDistancesFromSOM(g);
pruneUnmatchable(g, depths, rm, g.accept);
pruneUnmatchable(g, depths, rm, g.acceptEod);
pruneUnmatchable(g, expr, depths, rm, g.accept);
pruneUnmatchable(g, expr, depths, rm, g.acceptEod);
pruneUseless(g);
}
@ -732,9 +740,9 @@ bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
return false;
}
void handleExtendedParams(ReportManager &rm, NGWrapper &g,
void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr,
UNUSED const CompileContext &cc) {
if (!hasExtParams(g)) {
if (!hasExtParams(expr)) {
return;
}
@ -751,50 +759,50 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g,
DepthMinMax match_depths = findMatchLengths(rm, g);
DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) {
if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) {
ostringstream oss;
oss << "Expression is anchored and cannot satisfy min_offset="
<< g.min_offset << " as it can only produce matches of length "
<< expr.min_offset << " as it can only produce matches of length "
<< maxWidth << " bytes at most.";
throw CompileError(g.expressionIndex, oss.str());
throw CompileError(expr.index, oss.str());
}
if (minWidth > g.max_offset) {
if (minWidth > expr.max_offset) {
ostringstream oss;
oss << "Expression has max_offset=" << g.max_offset << " but requires "
<< minWidth << " bytes to match.";
throw CompileError(g.expressionIndex, oss.str());
oss << "Expression has max_offset=" << expr.max_offset
<< " but requires " << minWidth << " bytes to match.";
throw CompileError(expr.index, oss.str());
}
if (maxWidth.is_finite() && match_depths.max < g.min_length) {
if (maxWidth.is_finite() && match_depths.max < expr.min_length) {
ostringstream oss;
oss << "Expression has min_length=" << g.min_length << " but can "
oss << "Expression has min_length=" << expr.min_length << " but can "
"only produce matches of length " << match_depths.max <<
" bytes at most.";
throw CompileError(g.expressionIndex, oss.str());
throw CompileError(expr.index, oss.str());
}
if (g.min_length && g.min_length <= match_depths.min) {
if (expr.min_length && expr.min_length <= match_depths.min) {
DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
g.min_length);
g.min_length = 0;
expr.min_length);
expr.min_length = 0;
}
if (!hasExtParams(g)) {
if (!hasExtParams(expr)) {
return;
}
pruneVacuousEdges(g);
pruneUnmatchable(g, rm);
pruneVacuousEdges(g, expr);
pruneUnmatchable(g, expr, rm);
if (!has_offset_adj) {
pruneExtUnreachable(g);
pruneExtUnreachable(g, expr);
}
// We may have removed all the edges to accept, in which case this
// expression cannot match.
if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
throw CompileError(g.expressionIndex, "Extended parameter "
throw CompileError(expr.index, "Extended parameter "
"constraints can not be satisfied for any match from "
"this expression.");
}
@ -812,27 +820,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g,
// If the pattern is completely anchored and has a min_length set, this can
// be converted to a min_offset.
if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) {
DEBUG_PRINTF("converting min_length to min_offset=%llu for "
"anchored case\n", g.min_length);
g.min_offset = g.min_length;
g.min_length = 0;
if (expr.min_length && (expr.min_offset <= expr.min_length) &&
is_anchored) {
DEBUG_PRINTF("convertinexpr.min_length to min_offset=%llu for "
"anchored case\n", expr.min_length);
expr.min_offset = expr.min_length;
expr.min_length = 0;
}
if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) {
if (expr.min_offset && expr.min_offset <= minWidth && !has_offset_adj) {
DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n",
g.min_offset);
g.min_offset = 0;
expr.min_offset);
expr.min_offset = 0;
}
if (!hasExtParams(g)) {
if (!hasExtParams(expr)) {
return;
}
// If the pattern has a min_length and is of "ratchet" form with one
// unbounded repeat, that repeat can become a bounded repeat.
// e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
if (g.min_length && transformMinLengthToRepeat(rm, g)) {
if (expr.min_length && transformMinLengthToRepeat(rm, g, expr)) {
DEBUG_PRINTF("converted min_length to bounded repeat\n");
// recalc
minWidth = findMinWidth(g);
@ -846,28 +855,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g,
// Note that it is possible to handle graphs that have a combination of
// anchored and unanchored paths, but it's too tricky for the moment.
if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length &&
!has_offset_adj && isUnanchored(g)) {
if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) {
if (expr.max_offset != MAX_OFFSET && !expr.som && !expr.min_length &&
!has_offset_adj && isUnanchored(g)) {
if (anchorPatternWithBoundedRepeat(g, expr, minWidth, maxWidth)) {
DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(),
maxWidth.str().c_str());
if (minWidth == maxWidth) {
// For a fixed width pattern, we can retire the offsets as they
// are implicit in the graph now.
g.min_offset = 0;
g.max_offset = MAX_OFFSET;
expr.min_offset = 0;
expr.max_offset = MAX_OFFSET;
}
}
}
//dumpGraph("final.dot", g);
if (!hasExtParams(g)) {
if (!hasExtParams(expr)) {
return;
}
set<NFAVertex> done;
updateReportBounds(rm, g, g.accept, done);
updateReportBounds(rm, g, g.acceptEod, done);
updateReportBounds(rm, g, expr, g.accept, done);
updateReportBounds(rm, g, expr, g.acceptEod, done);
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -37,10 +37,11 @@
namespace ue2 {
struct CompileContext;
class NGWrapper;
class ExpressionInfo;
class NGHolder;
class ReportManager;
void handleExtendedParams(ReportManager &rm, NGWrapper &g,
void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr,
const CompileContext &cc);
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -30,12 +30,15 @@
* \brief Literal Component Splitting. Identifies literals that span the
* graph and moves them into Rose.
*/
#include "ng_literal_component.h"
#include "grey.h"
#include "ng.h"
#include "ng_literal_component.h"
#include "ng_prune.h"
#include "ng_util.h"
#include "ue2common.h"
#include "compiler/compiler.h"
#include "rose/rose_build.h"
#include "util/container.h"
#include "util/graph.h"
@ -47,8 +50,8 @@ using namespace std;
namespace ue2 {
static
bool isLiteralChar(const NGWrapper &g, NFAVertex v,
bool &nocase, bool &casefixed) {
bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase,
bool &casefixed) {
const CharReach &cr = g[v].char_reach;
const size_t num = cr.count();
if (num > 2) {
@ -93,7 +96,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) {
}
static
bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored,
bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored,
set<NFAVertex> &dead) {
DEBUG_PRINTF("examine vertex %zu\n", g[v].index);
bool nocase = false, casefixed = false;
@ -185,7 +188,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored,
}
/** \brief Split off literals. True if any changes were made to the graph. */
bool splitOffLiterals(NG &ng, NGWrapper &g) {
bool splitOffLiterals(NG &ng, NGHolder &g) {
if (!ng.cc.grey.allowLiteral) {
return false;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -37,10 +37,10 @@
namespace ue2 {
class NG;
class NGWrapper;
class NGHolder;
/** \brief Split off literals. True if any changes were made to the graph. */
bool splitOffLiterals(NG &ng, NGWrapper &graph);
bool splitOffLiterals(NG &ng, NGHolder &g);
} // namespace ue2

View File

@ -29,6 +29,9 @@
/** \file
* \brief SOM ("Start of Match") analysis.
*/
#include "ng_som.h"
#include "ng.h"
#include "ng_dump.h"
#include "ng_equivalence.h"
@ -40,7 +43,6 @@
#include "ng_redundancy.h"
#include "ng_region.h"
#include "ng_reports.h"
#include "ng_som.h"
#include "ng_som_add_redundancy.h"
#include "ng_som_util.h"
#include "ng_split.h"
@ -49,6 +51,7 @@
#include "ng_width.h"
#include "grey.h"
#include "ue2common.h"
#include "compiler/compiler.h"
#include "nfa/goughcompile.h"
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
#include "parser/position.h"
@ -1584,8 +1587,9 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p,
* implement the full pattern.
*/
static
void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
vector<som_plan> &plan, const u32 first_som_slot) {
void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id,
NGHolder &g, vector<som_plan> &plan,
const u32 first_som_slot) {
ReportManager &rm = ng.rm;
SomSlotManager &ssm = ng.ssm;
@ -1598,14 +1602,14 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
// Root plan, which already has a SOM slot assigned (first_som_slot).
dumpSomPlan(g, plan.front(), 0);
dumpSomSubComponent(*plan.front().prefix, "04_som", w.expressionIndex,
comp_id, 0, ng.cc.grey);
dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0,
ng.cc.grey);
assert(plan.front().prefix);
if (plan.front().escapes.any() && !plan.front().is_reset) {
/* setup escaper for first som location */
if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
first_som_slot)) {
throw CompileError(w.expressionIndex, "Pattern is too large.");
throw CompileError(expr.index, "Pattern is too large.");
}
}
@ -1617,7 +1621,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
for (++it; it != plan.end(); ++it) {
const u32 plan_num = it - plan.begin();
dumpSomPlan(g, *it, plan_num);
dumpSomSubComponent(*it->prefix, "04_som", w.expressionIndex, comp_id,
dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id,
plan_num, ng.cc.grey);
assert(it->parent < plan_num);
@ -1628,7 +1632,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
assert(!it->no_implement);
if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
throw CompileError(w.expressionIndex, "Pattern is too large.");
throw CompileError(expr.index, "Pattern is too large.");
}
updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
@ -1639,7 +1643,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
renumber_vertices(*plan.front().prefix);
assert(plan.front().prefix->kind == NFA_OUTFIX);
if (!ng.addHolder(*plan.front().prefix)) {
throw CompileError(w.expressionIndex, "Pattern is too large.");
throw CompileError(expr.index, "Pattern is too large.");
}
}
}
@ -1852,7 +1856,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
}
static
u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g,
u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g,
const CompileContext &cc) {
depth maxWidth = findMaxWidth(g);
@ -1861,7 +1865,7 @@ u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g,
auto nfa = makeBareSomRevNfa(g, cc);
if (!nfa) {
throw CompileError(w.expressionIndex, "Pattern is too large.");
throw CompileError(expr.index, "Pattern is too large.");
}
if (ng.cc.streaming) {
@ -2055,8 +2059,8 @@ void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix,
}
static
sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
som_type som,
sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
u32 comp_id, som_type som,
const ue2::unordered_map<NFAVertex, u32> &regions,
const map<u32, region_info> &info,
map<u32, region_info>::const_iterator lower_bound) {
@ -2077,7 +2081,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
// This is an optimisation: if we can't build a Haig from a portion of
// the graph, then we won't be able to manage it as an outfix either
// when we fall back.
throw CompileError(w.expressionIndex, "Pattern is too large.");
throw CompileError(expr.index, "Pattern is too large.");
}
while (1) {
@ -2152,7 +2156,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
goto next_try;
}
implementSomPlan(ng, w, comp_id, g, plan, som_loc);
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
Report ir = makeCallback(0U, 0);
assert(!plan.empty());
@ -2877,7 +2881,7 @@ unique_ptr<NGHolder> makePrefixForChain(NGHolder &g,
return prefix;
}
sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id,
som_type som) {
assert(som);
DEBUG_PRINTF("som hello\n");
@ -3001,7 +3005,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
/* create prefix to set the som_loc */
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
if (prefix_by_rev) {
u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc);
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
}
renumber_vertices(*prefix);
@ -3084,18 +3088,18 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
}
if (prefix_by_rev && !plan.front().no_implement) {
u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc);
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
}
implementSomPlan(ng, w, comp_id, g, plan, som_loc);
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
DEBUG_PRINTF("success\n");
return SOMBE_HANDLED_INTERNAL;
}
sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
som_type som) {
sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
u32 comp_id, som_type som) {
assert(som);
DEBUG_PRINTF("som+haig hello\n");
@ -3132,7 +3136,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
buildRegionMapping(g, regions, info, true);
sombe_rv rv =
doHaigLitSom(ng, g, w, comp_id, som, regions, info, info.begin());
doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin());
if (rv == SOMBE_FAIL) {
clear_graph(g);
cloneHolder(g, g_pristine);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -34,12 +34,14 @@
#define NG_SOM_H
#include "som/som.h"
#include "ue2common.h"
namespace ue2 {
class ExpressionInfo;
class NG;
class NGHolder;
class NGWrapper;
class ReportManager;
struct Grey;
enum sombe_rv {
@ -63,14 +65,14 @@ enum sombe_rv {
* May throw a "Pattern too large" exception if prefixes of the
* pattern are too large to compile.
*/
sombe_rv doSom(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id,
sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id,
som_type som);
/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established.
* May also throw pattern too large if prefixes of the pattern are too large to
* compile. */
sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id,
som_type som);
sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr,
u32 comp_id, som_type som);
void makeReportsSomPass(ReportManager &rm, NGHolder &g);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -34,6 +34,7 @@
#include "ng.h"
#include "ng_prune.h"
#include "ng_util.h"
#include "compiler/compiler.h"
#include "util/graph_range.h"
#include "util/unicode_def.h"
@ -45,14 +46,14 @@ using namespace std;
namespace ue2 {
static
void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) {
if (in_degree(v, w) != 1) {
void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) {
if (in_degree(v, g) != 1) {
DEBUG_PRINTF("unexpected pred\n");
assert(0); /* should be true due to the early stage of this analysis */
return;
}
CharReach &cr = w[v].char_reach;
CharReach &cr = g[v].char_reach;
if (pred_char == 0xe0) {
assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
if (cr == CharReach(0xa0, 0xbf)) {
@ -79,8 +80,8 @@ void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) {
* above \\x{10ffff} or they represent overlong encodings. As we require valid
* UTF-8 input, we have no defined behaviour in these cases, as a result we can
* accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGWrapper &w) {
if (!w.utf8) {
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) {
if (!expr.utf8) {
return;
}
@ -88,12 +89,12 @@ void relaxForbiddenUtf8(NGWrapper &w) {
const CharReach f0(0xf0);
const CharReach f4(0xf4);
for (auto v : vertices_range(w)) {
const CharReach &cr = w[v].char_reach;
for (auto v : vertices_range(g)) {
const CharReach &cr = g[v].char_reach;
if (cr == e0 || cr == f0 || cr == f4) {
u8 pred_char = cr.find_first();
for (auto t : adjacent_vertices_range(v, w)) {
allowIllegal(w, t, pred_char);
for (auto t : adjacent_vertices_range(v, g)) {
allowIllegal(g, t, pred_char);
}
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -35,7 +35,7 @@
namespace ue2 {
class NGWrapper;
class ExpressionInfo;
class NGHolder;
/** \brief Relax forbidden UTF-8 sequences.
@ -44,7 +44,7 @@ class NGHolder;
* above \\x{10ffff} or they represent overlong encodings. As we require valid
* UTF-8 input, we have no defined behaviour in these cases, as a result we can
* accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGWrapper &w);
void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr);
/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
* where possible, based on the assumption that we will always be matching

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -34,29 +34,31 @@
#include "grey.h"
#include "ng.h"
#include "ng_util.h"
#include "compiler/compiler.h"
using namespace std;
namespace ue2 {
static
ReportID getInternalId(ReportManager &rm, const NGWrapper &graph) {
Report ir = rm.getBasicInternalReport(graph);
ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) {
Report ir = rm.getBasicInternalReport(expr);
// Apply any extended params.
if (graph.min_offset || graph.max_offset != MAX_OFFSET) {
ir.minOffset = graph.min_offset;
ir.maxOffset = graph.max_offset;
if (expr.min_offset || expr.max_offset != MAX_OFFSET) {
ir.minOffset = expr.min_offset;
ir.maxOffset = expr.max_offset;
}
assert(!graph.min_length); // should be handled elsewhere.
assert(!expr.min_length); // should be handled elsewhere.
return rm.getInternalId(ir);
}
static
void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) {
const ReportID r = getInternalId(rm, g);
void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
const ReportID r = getInternalId(rm, expr);
boundary.report_at_0_eod.insert(r);
boundary.report_at_0.insert(r);
@ -81,8 +83,8 @@ void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) {
static
void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &g) {
boundary.report_at_0.insert(getInternalId(rm, g));
NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_0.insert(getInternalId(rm, expr));
remove_edge(g.start, g.accept, g);
remove_edge(g.start, g.acceptEod, g);
g[g.start].reports.clear();
@ -90,8 +92,8 @@ void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
static
void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &g) {
boundary.report_at_eod.insert(getInternalId(rm, g));
NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_eod.insert(getInternalId(rm, expr));
remove_edge(g.startDs, g.acceptEod, g);
remove_edge(g.start, g.acceptEod, g);
g[g.start].reports.clear();
@ -100,18 +102,18 @@ void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
static
void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &g) {
boundary.report_at_0_eod.insert(getInternalId(rm, g));
NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_0_eod.insert(getInternalId(rm, expr));
remove_edge(g.start, g.acceptEod, g);
g[g.start].reports.clear();
}
bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &g) {
NGHolder &g, const ExpressionInfo &expr) {
if (edge(g.startDs, g.accept, g).second) {
// e.g. '.*'; match "between" every byte
DEBUG_PRINTF("graph is firehose\n");
makeFirehose(boundary, rm, g);
makeFirehose(boundary, rm, g, expr);
return true;
}
@ -119,19 +121,19 @@ bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
if (edge(g.start, g.accept, g).second) {
DEBUG_PRINTF("creating anchored acceptor\n");
makeAnchoredAcceptor(boundary, rm, g);
makeAnchoredAcceptor(boundary, rm, g, expr);
work_done = true;
}
if (edge(g.startDs, g.acceptEod, g).second) {
DEBUG_PRINTF("creating end-anchored acceptor\n");
makeEndAnchoredAcceptor(boundary, rm, g);
makeEndAnchoredAcceptor(boundary, rm, g, expr);
work_done = true;
}
if (edge(g.start, g.acceptEod, g).second) {
DEBUG_PRINTF("creating nothing acceptor\n");
makeNothingAcceptor(boundary, rm, g);
makeNothingAcceptor(boundary, rm, g, expr);
work_done = true;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -36,12 +36,13 @@
namespace ue2 {
struct BoundaryReports;
class NGWrapper;
class ExpressionInfo;
class NGHolder;
class ReportManager;
// Returns true if a "vacuous" reporter was created.
bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &graph);
NGHolder &g, const ExpressionInfo &expr);
} // namespace ue2

View File

@ -159,13 +159,15 @@ public:
ConstructLiteralVisitor::~ConstructLiteralVisitor() {}
/** \brief True if the literal expression \a expr could be added to Rose. */
bool shortcutLiteral(NG &ng, const ParsedExpression &expr) {
assert(expr.component);
bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
assert(pe.component);
if (!ng.cc.grey.allowLiteral) {
return false;
}
const auto &expr = pe.expr;
// XXX: don't shortcut literals with extended params (yet)
if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length ||
expr.edit_distance) {
@ -175,8 +177,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) {
ConstructLiteralVisitor vis;
try {
assert(expr.component);
expr.component->accept(vis);
assert(pe.component);
pe.component->accept(vis);
assert(vis.repeat_stack.empty());
} catch (const ConstructLiteralVisitor::NotLiteral&) {
DEBUG_PRINTF("not a literal\n");
@ -196,7 +198,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) {
}
DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str());
return ng.addLiteral(lit, expr.index, expr.id, expr.highlander, expr.som);
return ng.addLiteral(lit, expr.index, expr.report, expr.highlander,
expr.som);
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -30,6 +30,7 @@
#include "grey.h"
#include "ue2common.h"
#include "compiler/compiler.h"
#include "nfa/dfa_min.h"
#include "nfa/mcclellancompile.h"
#include "nfa/mcclellancompile_util.h"
@ -74,7 +75,7 @@ public:
// Construct a runtime implementation.
aligned_unique_ptr<SmallWriteEngine> build(u32 roseQuality) override;
void add(const NGWrapper &w) override;
void add(const NGHolder &g, const ExpressionInfo &expr) override;
void add(const ue2_literal &literal, ReportID r) override;
set<ReportID> all_reports() const override;
@ -171,26 +172,26 @@ bool pruneOverlong(NGHolder &g, const depth &max_depth,
return modified;
}
void SmallWriteBuildImpl::add(const NGWrapper &w) {
void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
// If the graph is poisoned (i.e. we can't build a SmallWrite version),
// we don't even try.
if (poisoned) {
return;
}
if (w.som || w.min_length || isVacuous(w)) { /* cannot support in smwr */
poisoned = true;
if (expr.som || expr.min_length || isVacuous(g)) {
poisoned = true; /* cannot support in smwr */
return;
}
DEBUG_PRINTF("w=%p\n", &w);
DEBUG_PRINTF("g=%p\n", &g);
// make a copy of the graph so that we can modify it for our purposes
unique_ptr<NGHolder> h = cloneHolder(w);
unique_ptr<NGHolder> h = cloneHolder(g);
pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm);
reduceGraph(*h, SOM_NONE, w.utf8, cc);
reduceGraph(*h, SOM_NONE, expr.utf8, cc);
if (can_never_match(*h)) {
DEBUG_PRINTF("graph can never match in small block\n");

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -48,8 +48,9 @@ namespace ue2 {
struct CompileContext;
struct ue2_literal;
class NGWrapper;
class ReportManager;
class ExpressionInfo;
class NGHolder;
class ReportManager;
// Abstract interface intended for callers from elsewhere in the tree, real
// underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h.
@ -61,16 +62,16 @@ public:
// Construct a runtime implementation.
virtual ue2::aligned_unique_ptr<SmallWriteEngine> build(u32 roseQuality) = 0;
virtual void add(const NGWrapper &w) = 0;
virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0;
virtual void add(const ue2_literal &literal, ReportID r) = 0;
virtual std::set<ReportID> all_reports() const = 0;
};
// Construct a usable SmallWrite builder.
std::unique_ptr<SmallWriteBuild> makeSmallWriteBuilder(size_t num_patterns,
const ReportManager &rm,
const CompileContext &cc);
std::unique_ptr<SmallWriteBuild>
makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm,
const CompileContext &cc);
size_t smwrSize(const SmallWriteEngine *t);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -29,9 +29,12 @@
/** \file
* \brief ReportManager: tracks Report structures, exhaustion and dedupe keys.
*/
#include "grey.h"
#include "report_manager.h"
#include "grey.h"
#include "ue2common.h"
#include "compiler/compiler.h"
#include "nfagraph/ng.h"
#include "rose/rose_build.h"
#include "util/compile_error.h"
@ -201,20 +204,21 @@ void ReportManager::registerExtReport(ReportID id,
}
}
Report ReportManager::getBasicInternalReport(const NGWrapper &g, s32 adj) {
Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr,
s32 adj) {
/* validate that we are not violating highlander constraints, this will
* throw a CompileError if so. */
registerExtReport(g.reportId,
external_report_info(g.highlander, g.expressionIndex));
registerExtReport(expr.report,
external_report_info(expr.highlander, expr.index));
/* create the internal report */
u32 ekey = INVALID_EKEY;
if (g.highlander) {
if (expr.highlander) {
/* all patterns with the same report id share an ekey */
ekey = getExhaustibleKey(g.reportId);
ekey = getExhaustibleKey(expr.report);
}
return makeECallback(g.reportId, adj, ekey);
return makeECallback(expr.report, adj, ekey);
}
void ReportManager::setProgramOffset(ReportID id, u32 programOffset) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -47,7 +47,7 @@ namespace ue2 {
struct Grey;
class RoseBuild;
class NGWrapper;
class ExpressionInfo;
struct external_report_info {
external_report_info(bool h, u32 fpi)
@ -92,13 +92,13 @@ public:
const std::vector<Report> &reports() const { return reportIds; }
/**
* Get a simple internal report corresponding to the wrapper. An ekey will
* be setup as required.
* Get a simple internal report corresponding to the expression. An ekey
* will be setup if required.
*
* Note: this function may throw a CompileError if constraints on external
* match id are violated (mixed highlander status for example).
*/
Report getBasicInternalReport(const NGWrapper &g, s32 adj = 0);
Report getBasicInternalReport(const ExpressionInfo &expr, s32 adj = 0);
/** \brief Register an external report and validate that we are not
* violating highlander constraints (which will cause an exception to be

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -96,7 +96,8 @@ protected:
const CompileContext cc(true, false, target, grey);
ReportManager rm(cc.grey);
ParsedExpression parsed(0, pattern.c_str(), flags, 0);
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr);
clearReports(*g);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -73,7 +73,8 @@ protected:
CompileContext cc(false, false, target, Grey());
ReportManager rm(cc.grey);
ParsedExpression parsed(0, expr.c_str(), flags, 0);
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr);
clearReports(*g);
@ -306,7 +307,8 @@ protected:
CompileContext cc(false, false, get_current_target(), Grey());
ReportManager rm(cc.grey);
ParsedExpression parsed(0, expr.c_str(), flags, 0);
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr);
clearReports(*g);
@ -365,7 +367,8 @@ protected:
CompileContext cc(true, false, get_current_target(), Grey());
ParsedExpression parsed(0, expr.c_str(), flags, 0);
ReportManager rm(cc.grey);
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed);
auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr);
clearReports(*g);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -40,18 +40,19 @@ namespace ue2 {
// Helper function: construct a graph from an expression, flags and context.
inline
std::unique_ptr<NGWrapper> constructGraphWithCC(const std::string &expr,
CompileContext &cc,
unsigned flags) {
std::unique_ptr<NGHolder> constructGraphWithCC(const std::string &expr,
CompileContext &cc,
unsigned flags) {
ReportManager rm(cc.grey);
ParsedExpression parsed(0, expr.c_str(), flags, 0);
return buildWrapper(rm, cc, parsed);
auto built_expr = buildGraph(rm, cc, parsed);
return std::move(built_expr.g);
}
// Helper function: construct a graph from an expression and its flags.
inline
std::unique_ptr<NGWrapper> constructGraph(const std::string &expr,
unsigned flags) {
std::unique_ptr<NGHolder> constructGraph(const std::string &expr,
unsigned flags) {
CompileContext cc(false, false, get_current_target(), Grey());
return constructGraphWithCC(expr, cc, flags);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -54,7 +54,7 @@ TEST(NFAGraph, RemoveEquivalence1) {
// The graph should be merged into: a(b|c)
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("(ab|ac)", cc, 0));
auto graph(constructGraphWithCC("(ab|ac)", cc, 0));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_SUFFIX;
@ -115,7 +115,7 @@ TEST(NFAGraph, RemoveEquivalence2) {
// The graph should be merged into: (b|c)a
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("(ba|ca)", cc, 0));
auto graph(constructGraphWithCC("(ba|ca)", cc, 0));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_SUFFIX;
@ -176,8 +176,7 @@ TEST(NFAGraph, RemoveEquivalence3) {
// The graph should be merged into: a(..)+(X|Y)
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc,
HS_FLAG_DOTALL));
auto graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_SUFFIX;
@ -266,8 +265,7 @@ TEST(NFAGraph, RemoveEquivalence4) {
// The graph should be merged into: (X|Y)(..)+a
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc,
HS_FLAG_DOTALL));
auto graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_SUFFIX;
@ -363,8 +361,7 @@ TEST(NFAGraph, RemoveEquivalence5) {
// The graph should be merged into: [^\x00]*[\x00]
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]",
cc, 0));
auto graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", cc, 0));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
g.kind = NFA_PREFIX;
@ -420,7 +417,7 @@ TEST(NFAGraph, RemoveEquivalence5) {
TEST(NFAGraph, RemoveEquivalence6) {
// Build a small graph with two redundant vertices: ^(.*|.*)a
// The graph should be merged into: a
unique_ptr<NGWrapper> graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL));
auto graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;
@ -458,7 +455,7 @@ TEST(NFAGraph, RemoveEquivalence6) {
TEST(NFAGraph, RemoveEquivalence7) {
// Build a small graph with no redundant vertices: ^.+a
// Make sure we don't merge anything
unique_ptr<NGWrapper> graph(constructGraph("^.+a", HS_FLAG_DOTALL));
auto graph(constructGraph("^.+a", HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph;

View File

@ -208,7 +208,8 @@ TEST_P(MatchesTest, Check) {
CompileContext cc(false, false, get_current_target(), Grey());
ReportManager rm(cc.grey);
ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0);
auto g = buildWrapper(rm, cc, parsed);
auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
set<pair<size_t, size_t>> matches;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -53,7 +53,7 @@ TEST(NFAGraph, RemoveRedundancy1) {
// The character reachability should be merged into: [ab]c
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("(a|b)c", cc, 0));
auto graph(constructGraphWithCC("(a|b)c", cc, 0));
ASSERT_TRUE(graph.get() != nullptr);
NGHolder &g = *graph;
@ -95,8 +95,7 @@ TEST(NFAGraph, RemoveRedundancy2) {
// Build a small graph with a redundant vertex: a.*b?c
// The dot-star should swallow the 'b?', leaving a.*c
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("a.*b?c", cc,
HS_FLAG_DOTALL));
auto graph(constructGraphWithCC("a.*b?c", cc, HS_FLAG_DOTALL));
ASSERT_TRUE(graph.get() != nullptr);
NGHolder &g = *graph;
@ -152,8 +151,7 @@ TEST(NFAGraph, RemoveRedundancy2) {
TEST(NFAGraph, RemoveRedundancy3) {
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("foobar.*(a|b)?teakettle",
cc, 0));
auto graph(constructGraphWithCC("foobar.*(a|b)?teakettle", cc, 0));
ASSERT_TRUE(graph.get() != nullptr);
unsigned countBefore = num_vertices(*graph);
@ -166,7 +164,7 @@ TEST(NFAGraph, RemoveRedundancy3) {
TEST(NFAGraph, RemoveRedundancy4) {
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0));
auto graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0));
ASSERT_TRUE(graph.get() != nullptr);
unsigned countBefore = num_vertices(*graph);
@ -178,8 +176,7 @@ TEST(NFAGraph, RemoveRedundancy4) {
TEST(NFAGraph, RemoveRedundancy5) {
CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("[0-9]?badgerbrush",
cc, 0));
auto graph(constructGraphWithCC("[0-9]?badgerbrush", cc, 0));
ASSERT_TRUE(graph.get() != nullptr);
unsigned countBefore = num_vertices(*graph);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -79,10 +79,10 @@ INSTANTIATE_TEST_CASE_P(NFAWidth, NFAWidthTest, ValuesIn(widthTests));
TEST_P(NFAWidthTest, Check) {
const WidthTest &t = GetParam();
SCOPED_TRACE(testing::Message() << "Pattern: " << t.pattern);
unique_ptr<NGWrapper> w(constructGraph(t.pattern, 0));
auto g = constructGraph(t.pattern, 0);
ASSERT_EQ(t.minWidth, findMinWidth(*w));
ASSERT_EQ(t.maxWidth, findMaxWidth(*w));
ASSERT_EQ(t.minWidth, findMinWidth(*g));
ASSERT_EQ(t.maxWidth, findMaxWidth(*g));
}
// for google test

View File

@ -35,6 +35,7 @@
#include "ng_corpus_generator.h"
#include "ng_corpus_editor.h"
#include "compiler/compiler.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_util.h"
#include "ue2common.h"
@ -219,8 +220,9 @@ namespace {
/** \brief Concrete implementation */
class CorpusGeneratorImpl : public CorpusGenerator {
public:
CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props);
~CorpusGeneratorImpl() {}
CorpusGeneratorImpl(const NGHolder &graph_in, const ExpressionInfo &expr_in,
CorpusProperties &props);
~CorpusGeneratorImpl() = default;
void generateCorpus(vector<string> &data);
@ -237,6 +239,9 @@ private:
* bytes in length. */
void addRandom(const min_max &mm, string *out);
/** \brief Info about this expression. */
const ExpressionInfo &expr;
/** \brief The NFA graph we operate over. */
const NGHolder &graph;
@ -245,12 +250,13 @@ private:
CorpusProperties &cProps;
};
CorpusGeneratorImpl::CorpusGeneratorImpl(const NGWrapper &graph_in,
CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in,
const ExpressionInfo &expr_in,
CorpusProperties &props)
: graph(graph_in), cProps(props) {
: expr(expr_in), graph(graph_in), cProps(props) {
// if this pattern is to be matched approximately
if (graph_in.edit_distance && !props.editDistance) {
props.editDistance = props.rand(0, graph_in.edit_distance + 1);
if (expr.edit_distance && !props.editDistance) {
props.editDistance = props.rand(0, expr.edit_distance + 1);
}
}
@ -392,8 +398,9 @@ hit_limit:
/** \brief Concrete implementation for UTF-8 */
class CorpusGeneratorUtf8 : public CorpusGenerator {
public:
CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props);
~CorpusGeneratorUtf8() {}
CorpusGeneratorUtf8(const NGHolder &graph_in, const ExpressionInfo &expr_in,
CorpusProperties &props);
~CorpusGeneratorUtf8() = default;
void generateCorpus(vector<string> &data);
@ -410,19 +417,23 @@ private:
* length. */
void addRandom(const min_max &mm, vector<unichar> *out);
/** \brief Info about this expression. */
const ExpressionInfo &expr;
/** \brief The NFA graph we operate over. */
const NGWrapper &graph;
const NGHolder &graph;
/** \brief Reference to our corpus generator properties object (stores some
* state) */
CorpusProperties &cProps;
};
CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGWrapper &graph_in,
CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in,
const ExpressionInfo &expr_in,
CorpusProperties &props)
: graph(graph_in), cProps(props) {
: expr(expr_in), graph(graph_in), cProps(props) {
// we do not support Utf8 for approximate matching
if (graph.edit_distance) {
if (expr.edit_distance) {
throw CorpusGenerationFailure("UTF-8 for edited patterns is not "
"supported.");
}
@ -681,11 +692,12 @@ CorpusGenerator::~CorpusGenerator() { }
// External entry point
unique_ptr<CorpusGenerator> makeCorpusGenerator(const NGWrapper &graph,
unique_ptr<CorpusGenerator> makeCorpusGenerator(const NGHolder &graph,
const ExpressionInfo &expr,
CorpusProperties &props) {
if (graph.utf8) {
return ue2::make_unique<CorpusGeneratorUtf8>(graph, props);
if (expr.utf8) {
return ue2::make_unique<CorpusGeneratorUtf8>(graph, expr, props);
} else {
return ue2::make_unique<CorpusGeneratorImpl>(graph, props);
return ue2::make_unique<CorpusGeneratorImpl>(graph, expr, props);
}
}

View File

@ -41,7 +41,8 @@
namespace ue2 {
class NGWrapper;
class ExpressionInfo;
class NGHolder;
} // namespace ue2
@ -68,6 +69,7 @@ public:
/** \brief Build a concrete impl conforming to the \ref CorpusGenerator
* interface. */
std::unique_ptr<CorpusGenerator>
makeCorpusGenerator(const ue2::NGWrapper &graph, CorpusProperties &props);
makeCorpusGenerator(const ue2::NGHolder &g, const ue2::ExpressionInfo &expr,
CorpusProperties &props);
#endif