ng: split NGWrapper into NGHolder, ExpressionInfo

We now use NGHolder for all graph information, while other expression
properties (report, flag information, etc) go in new class
ExpressionInfo.
This commit is contained in:
Justin Viiret 2017-03-16 18:18:34 +11:00 committed by Matthew Barr
parent fadfab6d8c
commit 5dfae12a62
41 changed files with 726 additions and 612 deletions

View File

@ -674,6 +674,7 @@ SET (hs_SRCS
src/compiler/compiler.h src/compiler/compiler.h
src/compiler/error.cpp src/compiler/error.cpp
src/compiler/error.h src/compiler/error.h
src/compiler/expression_info.h
src/fdr/engine_description.cpp src/fdr/engine_description.cpp
src/fdr/engine_description.h src/fdr/engine_description.h
src/fdr/fdr_compile.cpp src/fdr/fdr_compile.cpp

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -42,6 +42,8 @@
* word-to-word and word-to-nonword) are dropped. * word-to-word and word-to-nonword) are dropped.
*/ */
#include "asserts.h" #include "asserts.h"
#include "compiler/compiler.h"
#include "nfagraph/ng.h" #include "nfagraph/ng.h"
#include "nfagraph/ng_prune.h" #include "nfagraph/ng_prune.h"
#include "nfagraph/ng_redundancy.h" #include "nfagraph/ng_redundancy.h"
@ -115,8 +117,8 @@ u32 conjunct(u32 flags1, u32 flags2) {
typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t; typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
static static
void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr,
u32 &assert_edge_count) { edge_cache_t &edge_cache, u32 &assert_edge_count) {
DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index); DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index);
const u32 flags = g[t].assert_flags; const u32 flags = g[t].assert_flags;
@ -178,8 +180,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
edge_cache.emplace(cache_key, e); edge_cache.emplace(cache_key, e);
g[e].assert_flags = flags; g[e].assert_flags = flags;
if (++assert_edge_count > MAX_ASSERT_EDGES) { if (++assert_edge_count > MAX_ASSERT_EDGES) {
throw CompileError(g.expressionIndex, throw CompileError(expr.index, "Pattern is too large.");
"Pattern is too large.");
} }
} else { } else {
NFAEdge e = ecit->second; NFAEdge e = ecit->second;
@ -200,21 +201,23 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
} }
static static
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, s32 adj) {
// Don't try and set the report ID of a special vertex. // Don't try and set the report ID of a special vertex.
assert(!is_special(v, g)); assert(!is_special(v, g));
// There should be no reports set already. // There should be no reports set already.
assert(g[v].reports.empty()); assert(g[v].reports.empty());
Report r = rm.getBasicInternalReport(g, adj); Report r = rm.getBasicInternalReport(expr, adj);
g[v].reports.insert(rm.getInternalId(r)); g[v].reports.insert(rm.getInternalId(r));
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
} }
static static
void checkForMultilineStart(ReportManager &rm, NGWrapper &g) { void checkForMultilineStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
vector<NFAEdge> dead; vector<NFAEdge> dead;
for (auto v : adjacent_vertices_range(g.start, g)) { for (auto v : adjacent_vertices_range(g.start, g)) {
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) { if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
@ -238,7 +241,7 @@ void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
for (const auto &e : dead) { for (const auto &e : dead) {
NFAVertex dummy = add_vertex(g); NFAVertex dummy = add_vertex(g);
g[dummy].char_reach.setall(); g[dummy].char_reach.setall();
setReportId(rm, g, dummy, -1); setReportId(rm, g, expr, dummy, -1);
add_edge(source(e, g), dummy, g[e], g); add_edge(source(e, g), dummy, g[e], g);
add_edge(dummy, g.accept, g); add_edge(dummy, g.accept, g);
} }
@ -263,7 +266,8 @@ bool hasAssertVertices(const NGHolder &g) {
* Remove the horrors that are the temporary assert vertices which arise from * Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in * our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */ * blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g) { void removeAssertVertices(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
size_t num = 0; size_t num = 0;
DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g)); DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
@ -285,12 +289,12 @@ void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
for (auto v : vertices_range(g)) { for (auto v : vertices_range(g)) {
if (g[v].assert_flags & WORDBOUNDARY_FLAGS) { if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
replaceAssertVertex(g, v, edge_cache, assert_edge_count); replaceAssertVertex(g, v, expr, edge_cache, assert_edge_count);
num++; num++;
} }
} }
checkForMultilineStart(rm, g); checkForMultilineStart(rm, g, expr);
if (num) { if (num) {
DEBUG_PRINTF("resolved %zu assert vertices\n", num); DEBUG_PRINTF("resolved %zu assert vertices\n", num);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -35,8 +35,9 @@
namespace ue2 { namespace ue2 {
class ExpressionInfo;
class ReportManager; class ReportManager;
class NGWrapper; class NGHolder;
/** \brief Convert temporary assert vertices (from construction method) to /** \brief Convert temporary assert vertices (from construction method) to
* edge-based flags. * edge-based flags.
@ -44,7 +45,8 @@ class NGWrapper;
* Remove the horrors that are the temporary assert vertices which arise from * Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in * our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */ * blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g); void removeAssertVertices(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr);
} // namespace ue2 } // namespace ue2

View File

@ -73,7 +73,6 @@ using namespace std;
namespace ue2 { namespace ue2 {
static static
void validateExt(const hs_expr_ext &ext) { void validateExt(const hs_expr_ext &ext) {
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET | static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
@ -100,26 +99,18 @@ void validateExt(const hs_expr_ext &ext) {
} }
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
unsigned flags, ReportID actionId, unsigned flags, ReportID report,
const hs_expr_ext *ext) const hs_expr_ext *ext)
: utf8(false), : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH,
allow_vacuous(flags & HS_FLAG_ALLOWEMPTY), false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET,
highlander(flags & HS_FLAG_SINGLEMATCH), 0, 0) {
prefilter(flags & HS_FLAG_PREFILTER),
som(SOM_NONE),
index(index_in),
id(actionId),
min_offset(0),
max_offset(MAX_OFFSET),
min_length(0),
edit_distance(0) {
ParseMode mode(flags); ParseMode mode(flags);
component = parse(expression, mode); component = parse(expression, mode);
utf8 = mode.utf8; /* utf8 may be set by parse() */ expr.utf8 = mode.utf8; /* utf8 may be set by parse() */
if (utf8 && !isValidUtf8(expression)) { if (expr.utf8 && !isValidUtf8(expression)) {
throw ParseError("Expression is not valid UTF-8."); throw ParseError("Expression is not valid UTF-8.");
} }
@ -147,7 +138,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
// Set SOM type. // Set SOM type.
if (flags & HS_FLAG_SOM_LEFTMOST) { if (flags & HS_FLAG_SOM_LEFTMOST) {
som = SOM_LEFT; expr.som = SOM_LEFT;
} }
// Set extended parameters, if we have them. // Set extended parameters, if we have them.
@ -156,29 +147,29 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
validateExt(*ext); validateExt(*ext);
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) { if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
min_offset = ext->min_offset; expr.min_offset = ext->min_offset;
} }
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
max_offset = ext->max_offset; expr.max_offset = ext->max_offset;
} }
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) { if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
min_length = ext->min_length; expr.min_length = ext->min_length;
} }
if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) { if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) {
edit_distance = ext->edit_distance; expr.edit_distance = ext->edit_distance;
} }
} }
// These are validated in validateExt, so an error will already have been // These are validated in validateExt, so an error will already have been
// thrown if these conditions don't hold. // thrown if these conditions don't hold.
assert(max_offset >= min_offset); assert(expr.max_offset >= expr.min_offset);
assert(max_offset >= min_length); assert(expr.max_offset >= expr.min_length);
// Since prefiltering and SOM aren't supported together, we must squash any // Since prefiltering and SOM aren't supported together, we must squash any
// min_length constraint as well. // min_length constraint as well.
if (flags & HS_FLAG_PREFILTER && min_length) { if (flags & HS_FLAG_PREFILTER && expr.min_length) {
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n"); DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
min_length = 0; expr.min_length = 0;
} }
} }
@ -187,25 +178,25 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
* \brief Dumps the parse tree to screen in debug mode and to disk in dump * \brief Dumps the parse tree to screen in debug mode and to disk in dump
* mode. * mode.
*/ */
void dumpExpression(UNUSED const ParsedExpression &expr, void dumpExpression(UNUSED const ParsedExpression &pe,
UNUSED const char *stage, UNUSED const Grey &grey) { UNUSED const char *stage, UNUSED const Grey &grey) {
#if defined(DEBUG) #if defined(DEBUG)
DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id, DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n",
expr.index); pe.expr.report, pe.expr.index);
ostringstream debug_tree; ostringstream debug_tree;
dumpTree(debug_tree, expr.component.get()); dumpTree(debug_tree, pe.component.get());
printf("%s\n", debug_tree.str().c_str()); printf("%s\n", debug_tree.str().c_str());
#endif // DEBUG #endif // DEBUG
#if defined(DUMP_SUPPORT) #if defined(DUMP_SUPPORT)
if (grey.dumpFlags & Grey::DUMP_PARSE) { if (grey.dumpFlags & Grey::DUMP_PARSE) {
stringstream ss; stringstream ss;
ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_" ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_"
<< stage << ".txt"; << stage << ".txt";
ofstream out(ss.str().c_str()); ofstream out(ss.str().c_str());
out << "Component Tree for " << expr.id << endl; out << "Component Tree for " << pe.expr.report << endl;
dumpTree(out, expr.component.get()); dumpTree(out, pe.component.get());
if (expr.utf8) { if (pe.expr.utf8) {
out << "UTF8 mode" << endl; out << "UTF8 mode" << endl;
} }
} }
@ -215,13 +206,13 @@ void dumpExpression(UNUSED const ParsedExpression &expr,
/** \brief Run Component tree optimisations on \a expr. */ /** \brief Run Component tree optimisations on \a expr. */
static static
void optimise(ParsedExpression &expr) { void optimise(ParsedExpression &pe) {
if (expr.min_length || expr.som) { if (pe.expr.min_length || pe.expr.som) {
return; return;
} }
DEBUG_PRINTF("optimising\n"); DEBUG_PRINTF("optimising\n");
expr.component->optimise(true /* root is connected to sds */); pe.component->optimise(true /* root is connected to sds */);
} }
void addExpression(NG &ng, unsigned index, const char *expression, void addExpression(NG &ng, unsigned index, const char *expression,
@ -238,34 +229,34 @@ void addExpression(NG &ng, unsigned index, const char *expression,
// Do per-expression processing: errors here will result in an exception // Do per-expression processing: errors here will result in an exception
// being thrown up to our caller // being thrown up to our caller
ParsedExpression expr(index, expression, flags, id, ext); ParsedExpression pe(index, expression, flags, id, ext);
dumpExpression(expr, "orig", cc.grey); dumpExpression(pe, "orig", cc.grey);
// Apply prefiltering transformations if desired. // Apply prefiltering transformations if desired.
if (expr.prefilter) { if (pe.expr.prefilter) {
prefilterTree(expr.component, ParseMode(flags)); prefilterTree(pe.component, ParseMode(flags));
dumpExpression(expr, "prefiltered", cc.grey); dumpExpression(pe, "prefiltered", cc.grey);
} }
// Expressions containing zero-width assertions and other extended pcre // Expressions containing zero-width assertions and other extended pcre
// types aren't supported yet. This call will throw a ParseError exception // types aren't supported yet. This call will throw a ParseError exception
// if the component tree contains such a construct. // if the component tree contains such a construct.
checkUnsupported(*expr.component); checkUnsupported(*pe.component);
expr.component->checkEmbeddedStartAnchor(true); pe.component->checkEmbeddedStartAnchor(true);
expr.component->checkEmbeddedEndAnchor(true); pe.component->checkEmbeddedEndAnchor(true);
if (cc.grey.optimiseComponentTree) { if (cc.grey.optimiseComponentTree) {
optimise(expr); optimise(pe);
dumpExpression(expr, "opt", cc.grey); dumpExpression(pe, "opt", cc.grey);
} }
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n", DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
expr.component.get(), expr.index, expr.id); pe.component.get(), pe.expr.index, pe.expr.report);
// You can only use the SOM flags if you've also specified an SOM // You can only use the SOM flags if you've also specified an SOM
// precision mode. // precision mode.
if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
throw CompileError("To use a SOM expression flag in streaming mode, " throw CompileError("To use a SOM expression flag in streaming mode, "
"an SOM precision mode (e.g. " "an SOM precision mode (e.g. "
"HS_MODE_SOM_HORIZON_LARGE) must be specified."); "HS_MODE_SOM_HORIZON_LARGE) must be specified.");
@ -273,26 +264,26 @@ void addExpression(NG &ng, unsigned index, const char *expression,
// If this expression is a literal, we can feed it directly to Rose rather // If this expression is a literal, we can feed it directly to Rose rather
// than building the NFA graph. // than building the NFA graph.
if (shortcutLiteral(ng, expr)) { if (shortcutLiteral(ng, pe)) {
DEBUG_PRINTF("took literal short cut\n"); DEBUG_PRINTF("took literal short cut\n");
return; return;
} }
unique_ptr<NGWrapper> g = buildWrapper(ng.rm, cc, expr); auto built_expr = buildGraph(ng.rm, cc, pe);
if (!built_expr.g) {
if (!g) {
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was " DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
"thrown.\n", expr.id); "thrown.\n", pe.expr.report);
throw CompileError("Internal error."); throw CompileError("Internal error.");
} }
if (!expr.allow_vacuous && matches_everywhere(*g)) { auto &g = *built_expr.g;
if (!pe.expr.allow_vacuous && matches_everywhere(g)) {
throw CompileError("Pattern matches empty buffer; use " throw CompileError("Pattern matches empty buffer; use "
"HS_FLAG_ALLOWEMPTY to enable support."); "HS_FLAG_ALLOWEMPTY to enable support.");
} }
if (!ng.addGraph(*g)) { if (!ng.addGraph(built_expr.expr, g)) {
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id); DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report);
throw CompileError("Error compiling expression."); throw CompileError("Error compiling expression.");
} }
} }
@ -453,41 +444,42 @@ bool isSupported(const Component &c) {
} }
#endif #endif
unique_ptr<NGWrapper> buildWrapper(ReportManager &rm, const CompileContext &cc, BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
const ParsedExpression &expr) { const ParsedExpression &pe) {
assert(isSupported(*expr.component)); assert(isSupported(*pe.component));
const unique_ptr<NFABuilder> builder = makeNFABuilder(rm, cc, expr); const auto builder = makeNFABuilder(rm, cc, pe);
assert(builder); assert(builder);
// Set up START and ACCEPT states; retrieve the special states // Set up START and ACCEPT states; retrieve the special states
const auto bs = makeGlushkovBuildState(*builder, expr.prefilter); const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter);
// Map position IDs to characters/components // Map position IDs to characters/components
expr.component->notePositions(*bs); pe.component->notePositions(*bs);
// Wire the start dotstar state to the firsts // Wire the start dotstar state to the firsts
connectInitialStates(*bs, expr); connectInitialStates(*bs, pe);
DEBUG_PRINTF("wire up body of expr\n"); DEBUG_PRINTF("wire up body of expr\n");
// Build the rest of the FOLLOW set // Build the rest of the FOLLOW set
vector<PositionInfo> initials = {builder->getStartDotStar(), vector<PositionInfo> initials = {builder->getStartDotStar(),
builder->getStart()}; builder->getStart()};
expr.component->buildFollowSet(*bs, initials); pe.component->buildFollowSet(*bs, initials);
// Wire the lasts to the accept state // Wire the lasts to the accept state
connectFinalStates(*bs, expr); connectFinalStates(*bs, pe);
// Create our edges // Create our edges
bs->buildEdges(); bs->buildEdges();
auto g = builder->getGraph(); BuiltExpression built_expr = builder->getGraph();
assert(g); assert(built_expr.g);
dumpDotWrapper(*g, "00_before_asserts", cc.grey); dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts",
removeAssertVertices(rm, *g); cc.grey);
removeAssertVertices(rm, *built_expr.g, built_expr.expr);
return g; return built_expr;
} }
} // namespace ue2 } // namespace ue2

View File

@ -35,8 +35,8 @@
#include "ue2common.h" #include "ue2common.h"
#include "database.h" #include "database.h"
#include "compiler/expression_info.h"
#include "parser/Component.h" #include "parser/Component.h"
#include "som/som.h"
#include <memory> #include <memory>
#include <boost/core/noncopyable.hpp> #include <boost/core/noncopyable.hpp>
@ -50,35 +50,32 @@ struct CompileContext;
struct Grey; struct Grey;
struct target_t; struct target_t;
class NG; class NG;
class NGHolder;
class ReportManager; class ReportManager;
class NGWrapper;
/** Class gathering together the pieces of a parsed expression. /** \brief Class gathering together the pieces of a parsed expression. */
* Note: Owns the provided component.
*/
class ParsedExpression : boost::noncopyable { class ParsedExpression : boost::noncopyable {
public: public:
ParsedExpression(unsigned index, const char *expression, unsigned flags, ParsedExpression(unsigned index, const char *expression, unsigned flags,
ReportID actionId, const hs_expr_ext *ext = nullptr); ReportID report, const hs_expr_ext *ext = nullptr);
bool utf8; //!< UTF-8 mode flag specified /** \brief Expression information (from flags, extparam etc) */
ExpressionInfo expr;
/** \brief root node of parsed component tree. */ /** \brief Root node of parsed component tree. */
std::unique_ptr<ue2::Component> component; std::unique_ptr<Component> component;
};
const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified /**
const bool highlander; //!< HS_FLAG_SINGLEMATCH specified * \brief Class gathering together the pieces of an expression that has been
const bool prefilter; //!< HS_FLAG_PREFILTER specified * built into an NFA graph.
som_type som; //!< chosen SOM mode, or SOM_NONE */
struct BuiltExpression {
/** \brief Expression information (from flags, extparam etc) */
ExpressionInfo expr;
/** \brief index in expressions array passed to \ref hs_compile_multi */ /** \brief Built Glushkov NFA graph. */
const unsigned index; std::unique_ptr<NGHolder> g;
const ReportID id; //!< user-specified pattern ID
u64a min_offset; //!< 0 if not used
u64a max_offset; //!< MAX_OFFSET if not used
u64a min_length; //!< 0 if not used
u32 edit_distance; //!< 0 if not used
}; };
/** /**
@ -95,12 +92,12 @@ public:
* @param ext * @param ext
* Struct containing extra parameters for this expression, or NULL if * Struct containing extra parameters for this expression, or NULL if
* none. * none.
* @param actionId * @param report
* The identifier to associate with the expression; returned by engine on * The identifier to associate with the expression; returned by engine on
* match. * match.
*/ */
void addExpression(NG &ng, unsigned index, const char *expression, void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID actionId); unsigned flags, const hs_expr_ext *ext, ReportID report);
/** /**
* Build a Hyperscan database out of the expressions we've been given. A * Build a Hyperscan database out of the expressions we've been given. A
@ -128,9 +125,8 @@ struct hs_database *build(NG &ng, unsigned int *length);
* @return * @return
* nullptr on error. * nullptr on error.
*/ */
std::unique_ptr<NGWrapper> buildWrapper(ReportManager &rm, BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc,
const CompileContext &cc, const ParsedExpression &expr);
const ParsedExpression &expr);
/** /**
* Build a platform_t out of a target_t. * Build a platform_t out of a target_t.

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* \brief ExpressionInfo class for storing the properties of an expression.
*/
#ifndef COMPILER_EXPRESSION_INFO_H
#define COMPILER_EXPRESSION_INFO_H
#include "ue2common.h"
#include "som/som.h"
namespace ue2 {
/** \brief Properties of an expression. */
class ExpressionInfo {
public:
ExpressionInfo(unsigned int index_in, bool allow_vacuous_in,
bool highlander_in, bool utf8_in, bool prefilter_in,
som_type som_in, ReportID report_in, u64a min_offset_in,
u64a max_offset_in, u64a min_length_in, u32 edit_distance_in)
: index(index_in), report(report_in), allow_vacuous(allow_vacuous_in),
highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in),
som(som_in), min_offset(min_offset_in), max_offset(max_offset_in),
min_length(min_length_in), edit_distance(edit_distance_in) {}
/**
* \brief Index of the expression represented by this graph.
*
* Used:
* - down the track in error handling;
* - for identifying parts of an expression in highlander mode.
*/
unsigned int index;
/** \brief Report ID specified by the user. */
ReportID report;
/** \brief Vacuous pattern is allowed. (HS_FLAG_ALLOWEMPTY) */
bool allow_vacuous;
/** \brief "Highlander" (single match) pattern. (HS_FLAG_SINGLEMATCH) */
bool highlander;
/** \brief UTF-8 pattern. (HS_FLAG_UTF8) */
bool utf8;
/** \brief Prefiltering pattern. (HS_FLAG_PREFILTER) */
bool prefilter;
/** \brief Start-of-match type requested, or SOM_NONE. */
som_type som;
/** \brief Minimum match offset extended parameter. 0 if not used. */
u64a min_offset;
/**
* \brief Maximum match offset extended parameter.
* MAX_OFFSET if not used.
*/
u64a max_offset;
/** \brief Minimum match length extended parameter. 0 if not used. */
u64a min_length;
/**
* \brief Approximate matching edit distance extended parameter.
* 0 if not used.
*/
u32 edit_distance;
};
}
#endif // COMPILER_EXPRESSION_INFO_H

View File

@ -369,11 +369,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
assert(pe.component); assert(pe.component);
// Apply prefiltering transformations if desired. // Apply prefiltering transformations if desired.
if (pe.prefilter) { if (pe.expr.prefilter) {
prefilterTree(pe.component, ParseMode(flags)); prefilterTree(pe.component, ParseMode(flags));
} }
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, pe); auto built_expr = buildGraph(rm, cc, pe);
unique_ptr<NGHolder> &g = built_expr.g;
ExpressionInfo &expr = built_expr.expr;
if (!g) { if (!g) {
DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n"); DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
@ -381,13 +383,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
} }
// validate graph's suitability for fuzzing // validate graph's suitability for fuzzing
validate_fuzzy_compile(*g, g->edit_distance, g->utf8, cc.grey); validate_fuzzy_compile(*g, expr.edit_distance, expr.utf8, cc.grey);
// fuzz graph - this must happen before any transformations are made // fuzz graph - this must happen before any transformations are made
make_fuzzy(*g, g->edit_distance, cc.grey); make_fuzzy(*g, expr.edit_distance, cc.grey);
handleExtendedParams(rm, *g, cc); handleExtendedParams(rm, *g, expr, cc);
fillExpressionInfo(rm, *g, &local_info); fillExpressionInfo(rm, *g, expr, &local_info);
} }
catch (const CompileError &e) { catch (const CompileError &e) {
// Compiler error occurred // Compiler error occurred

View File

@ -27,10 +27,11 @@
*/ */
/** \file /** \file
* \brief NG, NGHolder, NGWrapper and graph handling. * \brief NG and graph handling.
*/ */
#include "grey.h"
#include "ng.h" #include "ng.h"
#include "grey.h"
#include "ng_anchored_acyclic.h" #include "ng_anchored_acyclic.h"
#include "ng_anchored_dots.h" #include "ng_anchored_dots.h"
#include "ng_asserts.h" #include "ng_asserts.h"
@ -62,6 +63,7 @@
#include "ng_util.h" #include "ng_util.h"
#include "ng_width.h" #include "ng_width.h"
#include "ue2common.h" #include "ue2common.h"
#include "compiler/compiler.h"
#include "nfa/goughcompile.h" #include "nfa/goughcompile.h"
#include "rose/rose_build.h" #include "rose/rose_build.h"
#include "smallwrite/smallwrite_build.h" #include "smallwrite/smallwrite_build.h"
@ -100,16 +102,16 @@ NG::~NG() {
* \throw CompileError if SOM cannot be supported for the component. * \throw CompileError if SOM cannot be supported for the component.
*/ */
static static
bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
const som_type som, const u32 comp_id) { const som_type som, const u32 comp_id) {
DEBUG_PRINTF("doing som\n"); DEBUG_PRINTF("doing som\n");
dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey); dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey);
assert(hasCorrectlyNumberedVertices(g)); assert(hasCorrectlyNumberedVertices(g));
assert(allMatchStatesHaveReports(w)); assert(allMatchStatesHaveReports(g));
// First, we try the "SOM chain" support in ng_som.cpp. // First, we try the "SOM chain" support in ng_som.cpp.
sombe_rv rv = doSom(ng, g, w, comp_id, som); sombe_rv rv = doSom(ng, g, expr, comp_id, som);
if (rv == SOMBE_HANDLED_INTERNAL) { if (rv == SOMBE_HANDLED_INTERNAL) {
return false; return false;
} else if (rv == SOMBE_HANDLED_ALL) { } else if (rv == SOMBE_HANDLED_ALL) {
@ -118,7 +120,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
assert(rv == SOMBE_FAIL); assert(rv == SOMBE_FAIL);
/* Next, Sombe style approaches */ /* Next, Sombe style approaches */
rv = doSomWithHaig(ng, g, w, comp_id, som); rv = doSomWithHaig(ng, g, expr, comp_id, som);
if (rv == SOMBE_HANDLED_INTERNAL) { if (rv == SOMBE_HANDLED_INTERNAL) {
return false; return false;
} else if (rv == SOMBE_HANDLED_ALL) { } else if (rv == SOMBE_HANDLED_ALL) {
@ -132,7 +134,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
vector<vector<CharReach> > triggers; /* empty for outfix */ vector<vector<CharReach> > triggers; /* empty for outfix */
assert(g.kind == NFA_OUTFIX); assert(g.kind == NFA_OUTFIX);
dumpComponent(g, "haig", w.expressionIndex, comp_id, ng.cc.grey); dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey);
makeReportsSomPass(ng.rm, g); makeReportsSomPass(ng.rm, g);
auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers, auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
ng.cc.grey); ng.cc.grey);
@ -145,7 +147,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
/* Our various strategies for supporting SOM for this pattern have failed. /* Our various strategies for supporting SOM for this pattern have failed.
* Provide a generic pattern not supported/too large return value as it is * Provide a generic pattern not supported/too large return value as it is
* unclear what the meaning of a specific SOM error would be */ * unclear what the meaning of a specific SOM error would be */
throw CompileError(w.expressionIndex, "Pattern is too large."); throw CompileError(expr.index, "Pattern is too large.");
assert(0); // unreachable assert(0); // unreachable
return false; return false;
@ -200,21 +202,21 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8,
} }
static static
bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr,
const u32 comp_id) { const som_type som, const u32 comp_id) {
const CompileContext &cc = ng.cc; const CompileContext &cc = ng.cc;
assert(hasCorrectlyNumberedVertices(g)); assert(hasCorrectlyNumberedVertices(g));
DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
w.expressionIndex, comp_id, num_vertices(g), num_edges(g)); expr.index, comp_id, num_vertices(g), num_edges(g));
dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey); dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey);
assert(allMatchStatesHaveReports(w)); assert(allMatchStatesHaveReports(g));
reduceGraph(g, som, w.utf8, cc); reduceGraph(g, som, expr.utf8, cc);
dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey); dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey);
// There may be redundant regions that we can remove // There may be redundant regions that we can remove
if (cc.grey.performGraphSimplification) { if (cc.grey.performGraphSimplification) {
@ -231,12 +233,12 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
// Start Of Match handling. // Start Of Match handling.
if (som) { if (som) {
if (addComponentSom(ng, g, w, som, comp_id)) { if (addComponentSom(ng, g, expr, som, comp_id)) {
return true; return true;
} }
} }
assert(allMatchStatesHaveReports(w)); assert(allMatchStatesHaveReports(g));
if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
return true; return true;
@ -251,11 +253,11 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
return true; return true;
} }
if (doViolet(*ng.rose, g, w.prefilter, false, ng.rm, cc)) { if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) {
return true; return true;
} }
if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) { if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) {
return true; return true;
} }
@ -268,7 +270,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
return true; return true;
} }
if (doViolet(*ng.rose, g, w.prefilter, true, ng.rm, cc)) { if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) {
return true; return true;
} }
@ -283,7 +285,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
// Returns true if all components have been added. // Returns true if all components have been added.
static static
bool processComponents(NG &ng, NGWrapper &w, bool processComponents(NG &ng, ExpressionInfo &expr,
deque<unique_ptr<NGHolder>> &g_comp, deque<unique_ptr<NGHolder>> &g_comp,
const som_type som) { const som_type som) {
const u32 num_components = g_comp.size(); const u32 num_components = g_comp.size();
@ -293,7 +295,7 @@ bool processComponents(NG &ng, NGWrapper &w,
if (!g_comp[i]) { if (!g_comp[i]) {
continue; continue;
} }
if (addComponent(ng, *g_comp[i], w, som, i)) { if (addComponent(ng, *g_comp[i], expr, som, i)) {
g_comp[i].reset(); g_comp[i].reset();
continue; continue;
} }
@ -313,48 +315,48 @@ bool processComponents(NG &ng, NGWrapper &w,
return false; return false;
} }
bool NG::addGraph(NGWrapper &w) { bool NG::addGraph(ExpressionInfo &expr, NGHolder &g) {
// remove reports that aren't on vertices connected to accept. // remove reports that aren't on vertices connected to accept.
clearReports(w); clearReports(g);
som_type som = w.som; som_type som = expr.som;
if (som && isVacuous(w)) { if (som && isVacuous(g)) {
throw CompileError(w.expressionIndex, "Start of match is not " throw CompileError(expr.index, "Start of match is not "
"currently supported for patterns which match an " "currently supported for patterns which match an "
"empty buffer."); "empty buffer.");
} }
dumpDotWrapper(w, "01_initial", cc.grey); dumpDotWrapper(g, expr, "01_initial", cc.grey);
assert(allMatchStatesHaveReports(w)); assert(allMatchStatesHaveReports(g));
/* ensure utf8 starts at cp boundary */ /* ensure utf8 starts at cp boundary */
ensureCodePointStart(rm, w); ensureCodePointStart(rm, g, expr);
if (can_never_match(w)) { if (can_never_match(g)) {
throw CompileError(w.expressionIndex, "Pattern can never match."); throw CompileError(expr.index, "Pattern can never match.");
} }
// validate graph's suitability for fuzzing before resolving asserts // validate graph's suitability for fuzzing before resolving asserts
validate_fuzzy_compile(w, w.edit_distance, w.utf8, cc.grey); validate_fuzzy_compile(g, expr.edit_distance, expr.utf8, cc.grey);
resolveAsserts(rm, w); resolveAsserts(rm, g, expr);
dumpDotWrapper(w, "02_post_assert_resolve", cc.grey); dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey);
assert(allMatchStatesHaveReports(w)); assert(allMatchStatesHaveReports(g));
make_fuzzy(w, w.edit_distance, cc.grey); make_fuzzy(g, expr.edit_distance, cc.grey);
dumpDotWrapper(w, "02a_post_fuzz", cc.grey); dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey);
pruneUseless(w); pruneUseless(g);
pruneEmptyVertices(w); pruneEmptyVertices(g);
if (can_never_match(w)) { if (can_never_match(g)) {
throw CompileError(w.expressionIndex, "Pattern can never match."); throw CompileError(expr.index, "Pattern can never match.");
} }
optimiseVirtualStarts(w); /* good for som */ optimiseVirtualStarts(g); /* good for som */
handleExtendedParams(rm, w, cc); handleExtendedParams(rm, g, expr, cc);
if (w.min_length) { if (expr.min_length) {
// We have a minimum length constraint, which we currently use SOM to // We have a minimum length constraint, which we currently use SOM to
// satisfy. // satisfy.
som = SOM_LEFT; som = SOM_LEFT;
@ -368,70 +370,70 @@ bool NG::addGraph(NGWrapper &w) {
// first, we can perform graph work that can be done on an individual // first, we can perform graph work that can be done on an individual
// expression basis. // expression basis.
if (w.utf8) { if (expr.utf8) {
relaxForbiddenUtf8(w); relaxForbiddenUtf8(g, expr);
} }
if (w.highlander && !w.min_length && !w.min_offset) { if (expr.highlander && !expr.min_length && !expr.min_offset) {
// In highlander mode: if we don't have constraints on our reports that // In highlander mode: if we don't have constraints on our reports that
// may prevent us accepting our first match (i.e. extended params) we // may prevent us accepting our first match (i.e. extended params) we
// can prune the other out-edges of all vertices connected to accept. // can prune the other out-edges of all vertices connected to accept.
pruneHighlanderAccepts(w, rm); pruneHighlanderAccepts(g, rm);
} }
dumpDotWrapper(w, "02b_fairly_early", cc.grey); dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey);
// If we're a vacuous pattern, we can handle this early. // If we're a vacuous pattern, we can handle this early.
if (splitOffVacuous(boundary, rm, w)) { if (splitOffVacuous(boundary, rm, g, expr)) {
DEBUG_PRINTF("split off vacuous\n"); DEBUG_PRINTF("split off vacuous\n");
} }
// We might be done at this point: if we've run out of vertices, we can // We might be done at this point: if we've run out of vertices, we can
// stop processing. // stop processing.
if (num_vertices(w) == N_SPECIALS) { if (num_vertices(g) == N_SPECIALS) {
DEBUG_PRINTF("all vertices claimed by vacuous handling\n"); DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
return true; return true;
} }
// Now that vacuous edges have been removed, update the min width exclusive // Now that vacuous edges have been removed, update the min width exclusive
// of boundary reports. // of boundary reports.
minWidth = min(minWidth, findMinWidth(w)); minWidth = min(minWidth, findMinWidth(g));
// Add the pattern to the small write builder. // Add the pattern to the small write builder.
smwr->add(w); smwr->add(g, expr);
if (!som) { if (!som) {
removeSiblingsOfStartDotStar(w); removeSiblingsOfStartDotStar(g);
} }
dumpDotWrapper(w, "03_early", cc.grey); dumpDotWrapper(g, expr, "03_early", cc.grey);
// Perform a reduction pass to merge sibling character classes together. // Perform a reduction pass to merge sibling character classes together.
if (cc.grey.performGraphSimplification) { if (cc.grey.performGraphSimplification) {
removeRedundancy(w, som); removeRedundancy(g, som);
prunePathsRedundantWithSuccessorOfCyclics(w, som); prunePathsRedundantWithSuccessorOfCyclics(g, som);
} }
dumpDotWrapper(w, "04_reduced", cc.grey); dumpDotWrapper(g, expr, "04_reduced", cc.grey);
// If we've got some literals that span the graph from start to accept, we // If we've got some literals that span the graph from start to accept, we
// can split them off into Rose from here. // can split them off into Rose from here.
if (!som) { if (!som) {
if (splitOffLiterals(*this, w)) { if (splitOffLiterals(*this, g)) {
DEBUG_PRINTF("some vertices claimed by literals\n"); DEBUG_PRINTF("some vertices claimed by literals\n");
} }
} }
// We might be done at this point: if we've run out of vertices, we can // We might be done at this point: if we've run out of vertices, we can
// stop processing. // stop processing.
if (num_vertices(w) == N_SPECIALS) { if (num_vertices(g) == N_SPECIALS) {
DEBUG_PRINTF("all vertices claimed before calc components\n"); DEBUG_PRINTF("all vertices claimed before calc components\n");
return true; return true;
} }
// Split the graph into a set of connected components. // Split the graph into a set of connected components.
deque<unique_ptr<NGHolder>> g_comp = calcComponents(w); deque<unique_ptr<NGHolder>> g_comp = calcComponents(g);
assert(!g_comp.empty()); assert(!g_comp.empty());
if (!som) { if (!som) {
@ -443,14 +445,14 @@ bool NG::addGraph(NGWrapper &w) {
recalcComponents(g_comp); recalcComponents(g_comp);
} }
if (processComponents(*this, w, g_comp, som)) { if (processComponents(*this, expr, g_comp, som)) {
return true; return true;
} }
// If we're in prefiltering mode, we can run the prefilter reductions and // If we're in prefiltering mode, we can run the prefilter reductions and
// have another shot at accepting the graph. // have another shot at accepting the graph.
if (cc.grey.prefilterReductions && w.prefilter) { if (cc.grey.prefilterReductions && expr.prefilter) {
for (u32 i = 0; i < g_comp.size(); i++) { for (u32 i = 0; i < g_comp.size(); i++) {
if (!g_comp[i]) { if (!g_comp[i]) {
continue; continue;
@ -459,7 +461,7 @@ bool NG::addGraph(NGWrapper &w) {
prefilterReductions(*g_comp[i], cc); prefilterReductions(*g_comp[i], cc);
} }
if (processComponents(*this, w, g_comp, som)) { if (processComponents(*this, expr, g_comp, som)) {
return true; return true;
} }
} }
@ -469,7 +471,7 @@ bool NG::addGraph(NGWrapper &w) {
if (g_comp[i]) { if (g_comp[i]) {
DEBUG_PRINTF("could not compile component %u with %zu vertices\n", DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
i, num_vertices(*g_comp[i])); i, num_vertices(*g_comp[i]));
throw CompileError(w.expressionIndex, "Pattern is too large."); throw CompileError(expr.index, "Pattern is too large.");
} }
} }
@ -478,60 +480,60 @@ bool NG::addGraph(NGWrapper &w) {
} }
/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */ /** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
bool NG::addHolder(NGHolder &w) { bool NG::addHolder(NGHolder &g) {
DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(w)); DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g));
assert(allMatchStatesHaveReports(w)); assert(allMatchStatesHaveReports(g));
assert(hasCorrectlyNumberedVertices(w)); assert(hasCorrectlyNumberedVertices(g));
/* We don't update the global minWidth here as we care about the min width /* We don't update the global minWidth here as we care about the min width
* of the whole pattern - not a just a prefix of it. */ * of the whole pattern - not a just a prefix of it. */
bool prefilter = false; bool prefilter = false;
//dumpDotComp(comp, w, *this, 20, "prefix_init"); //dumpDotComp(comp, g, *this, 20, "prefix_init");
som_type som = SOM_NONE; /* the prefixes created by the SOM code do not som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
themselves track som */ themselves track som */
bool utf8 = false; // handling done earlier bool utf8 = false; // handling done earlier
reduceGraph(w, som, utf8, cc); reduceGraph(g, som, utf8, cc);
// There may be redundant regions that we can remove // There may be redundant regions that we can remove
if (cc.grey.performGraphSimplification) { if (cc.grey.performGraphSimplification) {
removeRegionRedundancy(w, som); removeRegionRedundancy(g, som);
} }
// "Short Exhaustible Passthrough" patterns always become outfixes. // "Short Exhaustible Passthrough" patterns always become outfixes.
if (isSEP(w, rm, cc.grey)) { if (isSEP(g, rm, cc.grey)) {
DEBUG_PRINTF("graph is SEP\n"); DEBUG_PRINTF("graph is SEP\n");
if (rose->addOutfix(w)) { if (rose->addOutfix(g)) {
return true; return true;
} }
} }
if (splitOffAnchoredAcyclic(*rose, w, cc)) { if (splitOffAnchoredAcyclic(*rose, g, cc)) {
return true; return true;
} }
if (handleSmallLiteralSets(*rose, w, cc) if (handleSmallLiteralSets(*rose, g, cc)
|| handleFixedWidth(*rose, w, cc.grey)) { || handleFixedWidth(*rose, g, cc.grey)) {
return true; return true;
} }
if (handleDecoratedLiterals(*rose, w, cc)) { if (handleDecoratedLiterals(*rose, g, cc)) {
return true; return true;
} }
if (doViolet(*rose, w, prefilter, false, rm, cc)) { if (doViolet(*rose, g, prefilter, false, rm, cc)) {
return true; return true;
} }
if (splitOffPuffs(*rose, rm, w, prefilter, cc)) { if (splitOffPuffs(*rose, rm, g, prefilter, cc)) {
return true; return true;
} }
if (doViolet(*rose, w, prefilter, true, rm, cc)) { if (doViolet(*rose, g, prefilter, true, rm, cc)) {
return true; return true;
} }
DEBUG_PRINTF("trying for outfix\n"); DEBUG_PRINTF("trying for outfix\n");
if (rose->addOutfix(w)) { if (rose->addOutfix(g)) {
DEBUG_PRINTF("ok\n"); DEBUG_PRINTF("ok\n");
return true; return true;
} }
@ -586,26 +588,4 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
return true; return true;
} }
NGWrapper::NGWrapper(unsigned int ei, bool highlander_in, bool utf8_in,
bool prefilter_in, som_type som_in, ReportID r,
u64a min_offset_in, u64a max_offset_in, u64a min_length_in,
u32 edit_distance_in)
: expressionIndex(ei), reportId(r), highlander(highlander_in),
utf8(utf8_in), prefilter(prefilter_in), som(som_in),
min_offset(min_offset_in), max_offset(max_offset_in),
min_length(min_length_in), edit_distance(edit_distance_in) {
// All special nodes/edges are added in NGHolder's constructor.
DEBUG_PRINTF("built %p: expr=%u report=%u%s%s%s%s "
"min_offset=%llu max_offset=%llu min_length=%llu "
"edit_distance=%u\n",
this, expressionIndex, reportId,
highlander ? " highlander" : "",
utf8 ? " utf8" : "",
prefilter ? " prefilter" : "",
(som != SOM_NONE) ? " som" : "",
min_offset, max_offset, min_length, edit_distance);
}
NGWrapper::~NGWrapper() {}
} // namespace ue2 } // namespace ue2

View File

@ -27,7 +27,7 @@
*/ */
/** \file /** \file
* \brief NG, NGHolder, NGWrapper declarations. * \brief NG declaration.
*/ */
#ifndef NG_H #ifndef NG_H
@ -58,31 +58,7 @@ namespace ue2 {
struct CompileContext; struct CompileContext;
struct ue2_literal; struct ue2_literal;
class NGWrapper : public NGHolder { class ExpressionInfo;
public:
NGWrapper(unsigned int expressionIndex, bool highlander, bool utf8,
bool prefilter, const som_type som, ReportID rid, u64a min_offset,
u64a max_offset, u64a min_length, u32 edit_distance);
~NGWrapper() override;
/** index of the expression represented by this graph, used
* - down the track in error handling
* - identifying parts of an expression in highlander mode
*/
const unsigned int expressionIndex;
const ReportID reportId; /**< user-visible report id */
const bool highlander; /**< user-specified single match only */
const bool utf8; /**< UTF-8 mode */
const bool prefilter; /**< prefiltering mode */
const som_type som; /**< SOM type requested */
u64a min_offset; /**< extparam min_offset value */
u64a max_offset; /**< extparam max_offset value */
u64a min_length; /**< extparam min_length value */
u32 edit_distance; /**< extparam edit_distance value */
};
class RoseBuild; class RoseBuild;
class SmallWriteBuild; class SmallWriteBuild;
@ -94,14 +70,14 @@ public:
/** \brief Consumes a pattern, returns false or throws a CompileError /** \brief Consumes a pattern, returns false or throws a CompileError
* exception if the graph cannot be consumed. */ * exception if the graph cannot be consumed. */
bool addGraph(NGWrapper &w); bool addGraph(ExpressionInfo &expr, NGHolder &h);
/** \brief Consumes a graph, cut-down version of addGraph for use by SOM /** \brief Consumes a graph, cut-down version of addGraph for use by SOM
* processing. */ * processing. */
bool addHolder(NGHolder &h); bool addHolder(NGHolder &h);
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead of /** \brief Adds a literal to Rose, used by literal shortcut passes (instead
* using \ref addGraph) */ * of using \ref addGraph) */
bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report, bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
bool highlander, som_type som); bool highlander, som_type som);
@ -128,7 +104,8 @@ public:
* *
* Shared with the small write compiler. * Shared with the small write compiler.
*/ */
void reduceGraph(NGHolder &g, som_type som, bool utf8, const CompileContext &cc); void reduceGraph(NGHolder &g, som_type som, bool utf8,
const CompileContext &cc);
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -47,6 +47,7 @@
#include "ng_prune.h" #include "ng_prune.h"
#include "ng_redundancy.h" #include "ng_redundancy.h"
#include "ng_util.h" #include "ng_util.h"
#include "compiler/compiler.h"
#include "parser/position.h" // for POS flags #include "parser/position.h" // for POS flags
#include "util/bitutils.h" // for findAndClearLSB_32 #include "util/bitutils.h" // for findAndClearLSB_32
#include "util/boundary_reports.h" #include "util/boundary_reports.h"
@ -184,43 +185,45 @@ void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
} }
static static
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, s32 adj) {
// Don't try and set the report ID of a special vertex. // Don't try and set the report ID of a special vertex.
assert(!is_special(v, g)); assert(!is_special(v, g));
// If there's a report set already, we're replacing it. // If there's a report set already, we're replacing it.
g[v].reports.clear(); g[v].reports.clear();
Report ir = rm.getBasicInternalReport(g, adj); Report ir = rm.getBasicInternalReport(expr, adj);
g[v].reports.insert(rm.getInternalId(ir)); g[v].reports.insert(rm.getInternalId(ir));
DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
} }
static static
NFAVertex makeClone(ReportManager &rm, NGWrapper &g, NFAVertex v, NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
const CharReach &cr_mask) { NFAVertex v, const CharReach &cr_mask) {
NFAVertex clone = clone_vertex(g, v); NFAVertex clone = clone_vertex(g, v);
g[clone].char_reach &= cr_mask; g[clone].char_reach &= cr_mask;
clone_out_edges(g, v, clone); clone_out_edges(g, v, clone);
clone_in_edges(g, v, clone); clone_in_edges(g, v, clone);
if (v == g.startDs) { if (v == g.startDs) {
if (g.utf8) { if (expr.utf8) {
g[clone].char_reach &= ~UTF_START_CR; g[clone].char_reach &= ~UTF_START_CR;
} }
DEBUG_PRINTF("marked as virt\n"); DEBUG_PRINTF("marked as virt\n");
g[clone].assert_flags = POS_FLAG_VIRTUAL_START; g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
setReportId(rm, g, clone, 0); setReportId(rm, g, expr, clone, 0);
} }
return clone; return clone;
} }
static static
void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
NFAVertex v, bool ucp) {
assert(v != g.start); assert(v != g.start);
assert(v != g.accept); assert(v != g.accept);
assert(v != g.acceptEod); assert(v != g.acceptEod);
@ -232,14 +235,14 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; }; auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
// Split v into word/nonword vertices with only asserting out-edges. // Split v into word/nonword vertices with only asserting out-edges.
NFAVertex w_out = makeClone(rm, g, v, cr_word); NFAVertex w_out = makeClone(rm, g, expr, v, cr_word);
NFAVertex nw_out = makeClone(rm, g, v, cr_nonword); NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword);
remove_out_edge_if(w_out, has_no_assert, g); remove_out_edge_if(w_out, has_no_assert, g);
remove_out_edge_if(nw_out, has_no_assert, g); remove_out_edge_if(nw_out, has_no_assert, g);
// Split v into word/nonword vertices with only asserting in-edges. // Split v into word/nonword vertices with only asserting in-edges.
NFAVertex w_in = makeClone(rm, g, v, cr_word); NFAVertex w_in = makeClone(rm, g, expr, v, cr_word);
NFAVertex nw_in = makeClone(rm, g, v, cr_nonword); NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword);
remove_in_edge_if(w_in, has_no_assert, g); remove_in_edge_if(w_in, has_no_assert, g);
remove_in_edge_if(nw_in, has_no_assert, g); remove_in_edge_if(nw_in, has_no_assert, g);
@ -250,7 +253,8 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
} }
static static
void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) { void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
set<NFAEdge> *dead) {
for (const auto &e : edges_range(g)) { for (const auto &e : edges_range(g)) {
u32 flags = g[e].assert_flags; u32 flags = g[e].assert_flags;
if (!flags) { if (!flags) {
@ -363,7 +367,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
} else if (v_w) { } else if (v_w) {
/* need to add a word byte */ /* need to add a word byte */
NFAVertex vv = add_vertex(g); NFAVertex vv = add_vertex(g);
setReportId(rm, g, vv, -1); setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_WORD; g[vv].char_reach = CHARREACH_WORD;
add_edge(vv, g.accept, g); add_edge(vv, g.accept, g);
g[e].assert_flags = 0; g[e].assert_flags = 0;
@ -372,7 +376,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
} else { } else {
/* need to add a non word byte or see eod */ /* need to add a non word byte or see eod */
NFAVertex vv = add_vertex(g); NFAVertex vv = add_vertex(g);
setReportId(rm, g, vv, -1); setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_NONWORD; g[vv].char_reach = CHARREACH_NONWORD;
add_edge(vv, g.accept, g); add_edge(vv, g.accept, g);
g[e].assert_flags = 0; g[e].assert_flags = 0;
@ -416,7 +420,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
} else if (v_w) { } else if (v_w) {
/* need to add a word byte */ /* need to add a word byte */
NFAVertex vv = add_vertex(g); NFAVertex vv = add_vertex(g);
setReportId(rm, g, vv, -1); setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_WORD_UCP_PRE; g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
add_edge(vv, g.accept, g); add_edge(vv, g.accept, g);
g[e].assert_flags = 0; g[e].assert_flags = 0;
@ -425,7 +429,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
} else { } else {
/* need to add a non word byte or see eod */ /* need to add a non word byte or see eod */
NFAVertex vv = add_vertex(g); NFAVertex vv = add_vertex(g);
setReportId(rm, g, vv, -1); setReportId(rm, g, expr, vv, -1);
g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE; g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
add_edge(vv, g.accept, g); add_edge(vv, g.accept, g);
g[e].assert_flags = 0; g[e].assert_flags = 0;
@ -450,7 +454,8 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
} }
} }
void resolveAsserts(ReportManager &rm, NGWrapper &g) { void resolveAsserts(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
vector<NFAEdge> asserts = getAsserts(g); vector<NFAEdge> asserts = getAsserts(g);
if (asserts.empty()) { if (asserts.empty()) {
return; return;
@ -460,20 +465,20 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) {
map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */ map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
findSplitters(g, asserts, &to_split, &to_split_ucp); findSplitters(g, asserts, &to_split, &to_split_ucp);
if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) { if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
throw CompileError(g.expressionIndex, "Pattern is too large."); throw CompileError(expr.index, "Pattern is too large.");
} }
for (const auto &m : to_split) { for (const auto &m : to_split) {
assert(!contains(to_split_ucp, m.first)); assert(!contains(to_split_ucp, m.first));
splitVertex(rm, g, m.second, false); splitVertex(rm, g, expr, m.second, false);
} }
for (const auto &m : to_split_ucp) { for (const auto &m : to_split_ucp) {
splitVertex(rm, g, m.second, true); splitVertex(rm, g, expr, m.second, true);
} }
set<NFAEdge> dead; set<NFAEdge> dead;
resolveEdges(rm, g, &dead); resolveEdges(rm, g, expr, &dead);
remove_edges(dead, g); remove_edges(dead, g);
renumber_vertices(g); renumber_vertices(g);
@ -485,15 +490,16 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) {
clearReports(g); clearReports(g);
} }
void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { void ensureCodePointStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr) {
/* In utf8 mode there is an implicit assertion that we start at codepoint /* In utf8 mode there is an implicit assertion that we start at codepoint
* boundaries. Assert resolution handles the badness coming from asserts. * boundaries. Assert resolution handles the badness coming from asserts.
* The only other source of trouble is startDs->accept connections. * The only other source of trouble is startDs->accept connections.
*/ */
NFAEdge orig = edge(g.startDs, g.accept, g); NFAEdge orig = edge(g.startDs, g.accept, g);
if (g.utf8 && orig) { if (expr.utf8 && orig) {
DEBUG_PRINTF("rectifying %u\n", g.reportId); DEBUG_PRINTF("rectifying %u\n", expr.report);
Report ir = rm.getBasicInternalReport(g); Report ir = rm.getBasicInternalReport(expr);
ReportID rep = rm.getInternalId(ir); ReportID rep = rm.getInternalId(ir);
NFAVertex v_a = add_vertex(g); NFAVertex v_a = add_vertex(g);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -36,12 +36,14 @@
namespace ue2 { namespace ue2 {
struct BoundaryReports; struct BoundaryReports;
class NGWrapper; class ExpressionInfo;
class NGHolder;
class ReportManager; class ReportManager;
void resolveAsserts(ReportManager &rm, NGWrapper &g); void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr);
void ensureCodePointStart(ReportManager &rm, NGWrapper &g); void ensureCodePointStart(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr);
} // namespace ue2 } // namespace ue2

View File

@ -28,11 +28,13 @@
/** \file /** \file
* \brief: NFA Graph Builder: used by Glushkov construction to construct an * \brief: NFA Graph Builder: used by Glushkov construction to construct an
* NGWrapper from a parsed expression. * NGHolder from a parsed expression.
*/ */
#include "ng_builder.h"
#include "grey.h" #include "grey.h"
#include "ng.h" #include "ng.h"
#include "ng_builder.h"
#include "ng_util.h" #include "ng_util.h"
#include "ue2common.h" #include "ue2common.h"
#include "compiler/compiler.h" // for ParsedExpression #include "compiler/compiler.h" // for ParsedExpression
@ -79,7 +81,7 @@ public:
void cloneRegion(Position first, Position last, void cloneRegion(Position first, Position last,
unsigned posOffset) override; unsigned posOffset) override;
unique_ptr<NGWrapper> getGraph() override; BuiltExpression getGraph() override;
private: private:
/** fetch a vertex given its Position ID. */ /** fetch a vertex given its Position ID. */
@ -94,8 +96,11 @@ private:
/** \brief Greybox: used for resource limits. */ /** \brief Greybox: used for resource limits. */
const Grey &grey; const Grey &grey;
/** \brief Underlying NGWrapper graph. */ /** \brief Underlying graph. */
unique_ptr<NGWrapper> graph; unique_ptr<NGHolder> graph;
/** \brief Underlying expression info. */
ExpressionInfo expr;
/** \brief mapping from position to vertex. Use \ref getVertex for access. /** \brief mapping from position to vertex. Use \ref getVertex for access.
* */ * */
@ -108,13 +113,9 @@ private:
} // namespace } // namespace
NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in, NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in,
const ParsedExpression &expr) const ParsedExpression &parsed)
: rm(rm_in), grey(grey_in), : rm(rm_in), grey(grey_in), graph(ue2::make_unique<NGHolder>()),
graph(ue2::make_unique<NGWrapper>( expr(parsed.expr), vertIdx(N_SPECIALS) {
expr.index, expr.highlander, expr.utf8, expr.prefilter, expr.som,
expr.id, expr.min_offset, expr.max_offset, expr.min_length,
expr.edit_distance)),
vertIdx(N_SPECIALS) {
// Reserve space for a reasonably-sized NFA // Reserve space for a reasonably-sized NFA
id2vertex.reserve(64); id2vertex.reserve(64);
@ -151,7 +152,7 @@ void NFABuilderImpl::addVertex(Position pos) {
(*graph)[v].index = pos; (*graph)[v].index = pos;
} }
unique_ptr<NGWrapper> NFABuilderImpl::getGraph() { BuiltExpression NFABuilderImpl::getGraph() {
DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n", DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n",
num_vertices(*graph), num_edges(*graph)); num_vertices(*graph), num_edges(*graph));
@ -162,13 +163,13 @@ unique_ptr<NGWrapper> NFABuilderImpl::getGraph() {
throw CompileError("Pattern too large."); throw CompileError("Pattern too large.");
} }
return move(graph); return { expr, move(graph) };
} }
void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) {
Report ir = rm.getBasicInternalReport(*graph, offsetAdjust); Report ir = rm.getBasicInternalReport(expr, offsetAdjust);
DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n", DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n",
pos, graph->reportId, offsetAdjust, ir.ekey); pos, expr.report, offsetAdjust, ir.ekey);
NFAVertex v = getVertex(pos); NFAVertex v = getVertex(pos);
auto &reports = (*graph)[v].reports; auto &reports = (*graph)[v].reports;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -28,7 +28,7 @@
/** \file /** \file
* \brief: NFA Graph Builder: used by Glushkov construction to construct an * \brief: NFA Graph Builder: used by Glushkov construction to construct an
* NGWrapper from a parsed expression. * NGHolder from a parsed expression.
*/ */
#ifndef NG_BUILDER_H #ifndef NG_BUILDER_H
@ -44,8 +44,8 @@
namespace ue2 { namespace ue2 {
class CharReach; class CharReach;
class NGWrapper;
class ReportManager; class ReportManager;
struct BuiltExpression;
struct CompileContext; struct CompileContext;
class ParsedExpression; class ParsedExpression;
@ -83,10 +83,10 @@ public:
unsigned posOffset) = 0; unsigned posOffset) = 0;
/** /**
* \brief Returns the built NGWrapper graph. * \brief Returns the built NGHolder graph and ExpressionInfo.
* Note that this builder cannot be used after this call. * Note that this builder cannot be used after this call.
*/ */
virtual std::unique_ptr<NGWrapper> getGraph() = 0; virtual BuiltExpression getGraph() = 0;
}; };
/** Construct a usable NFABuilder. */ /** Construct a usable NFABuilder. */

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -35,24 +35,25 @@
#include "config.h" #include "config.h"
#include "ng_dump.h" #include "nfagraph/ng_dump.h"
#include "hwlm/hwlm_build.h" #include "hs_compile.h" /* for HS_MODE_* flags */
#include "ng.h"
#include "ng_util.h"
#include "parser/position.h"
#include "ue2common.h" #include "ue2common.h"
#include "compiler/compiler.h"
#include "hwlm/hwlm_build.h"
#include "nfa/accel.h" #include "nfa/accel.h"
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "nfa/nfa_internal.h" // for MO_INVALID_IDX
#include "smallwrite/smallwrite_dump.h" #include "nfagraph/ng.h"
#include "nfagraph/ng_util.h"
#include "parser/position.h"
#include "rose/rose_build.h" #include "rose/rose_build.h"
#include "rose/rose_internal.h" #include "rose/rose_internal.h"
#include "smallwrite/smallwrite_dump.h"
#include "util/bitutils.h" #include "util/bitutils.h"
#include "util/dump_charclass.h" #include "util/dump_charclass.h"
#include "util/report.h" #include "util/report.h"
#include "util/report_manager.h" #include "util/report_manager.h"
#include "util/ue2string.h" #include "util/ue2string.h"
#include "hs_compile.h" /* for HS_MODE_* flags */
#include <cmath> #include <cmath>
#include <fstream> #include <fstream>
@ -287,13 +288,13 @@ void dumpGraphImpl(const char *name, const GraphT &g,
// manual instantiation of templated dumpGraph above. // manual instantiation of templated dumpGraph above.
template void dumpGraphImpl(const char *, const NGHolder &); template void dumpGraphImpl(const char *, const NGHolder &);
void dumpDotWrapperImpl(const NGWrapper &nw, const char *name, void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
const Grey &grey) { const char *name, const Grey &grey) {
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) { if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
stringstream ss; stringstream ss;
ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot"; ss << grey.dumpPath << "Expr_" << expr.index << "_" << name << ".dot";
DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str());
dumpGraphImpl(ss.str().c_str(), nw); dumpGraphImpl(ss.str().c_str(), g);
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -48,7 +48,7 @@ namespace ue2 {
class NGHolder; class NGHolder;
class NG; class NG;
class NGWrapper; class ExpressionInfo;
class ReportManager; class ReportManager;
// Implementations for stubs below -- all have the suffix "Impl". // Implementations for stubs below -- all have the suffix "Impl".
@ -61,7 +61,8 @@ void dumpGraphImpl(const char *name, const GraphT &g);
template <typename GraphT> template <typename GraphT>
void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm); void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm);
void dumpDotWrapperImpl(const NGWrapper &w, const char *name, const Grey &grey); void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr,
const char *name, const Grey &grey);
void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp, void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp,
const Grey &grey); const Grey &grey);
@ -88,10 +89,10 @@ static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) {
// Stubs which call through to dump code if compiled in. // Stubs which call through to dump code if compiled in.
UNUSED static inline UNUSED static inline
void dumpDotWrapper(UNUSED const NGWrapper &w, UNUSED const char *name, void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr,
UNUSED const Grey &grey) { UNUSED const char *name, UNUSED const Grey &grey) {
#ifdef DUMP_SUPPORT #ifdef DUMP_SUPPORT
dumpDotWrapperImpl(w, name, grey); dumpDotWrapperImpl(g, expr, name, grey);
#endif #endif
} }

View File

@ -27,8 +27,8 @@
*/ */
/** \file /** \file
* \brief Code for discovering properties of an NGWrapper used by * \brief Code for discovering properties of an NFA graph used by
* hs_expression_info. * hs_expression_info().
*/ */
#include "ng_expr_info.h" #include "ng_expr_info.h"
@ -58,42 +58,42 @@ namespace ue2 {
/* get rid of leading \b and multiline ^ vertices */ /* get rid of leading \b and multiline ^ vertices */
static static
void removeLeadingVirtualVerticesFromRoot(NGWrapper &w, NFAVertex root) { void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) {
vector<NFAVertex> victims; vector<NFAVertex> victims;
for (auto v : adjacent_vertices_range(root, w)) { for (auto v : adjacent_vertices_range(root, g)) {
if (w[v].assert_flags & POS_FLAG_VIRTUAL_START) { if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n"); DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
victims.push_back(v); victims.push_back(v);
} }
} }
for (auto u : victims) { for (auto u : victims) {
for (auto v : adjacent_vertices_range(u, w)) { for (auto v : adjacent_vertices_range(u, g)) {
add_edge_if_not_present(root, v, w); add_edge_if_not_present(root, v, g);
} }
} }
remove_vertices(victims, w); remove_vertices(victims, g);
} }
static static
void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v,
const vector<DepthMinMax> &depths, DepthMinMax &info) { const vector<DepthMinMax> &depths, DepthMinMax &info) {
if (is_any_accept(v, w)) { if (is_any_accept(v, g)) {
return; return;
} }
if (is_any_start(v, w)) { if (is_any_start(v, g)) {
info.min = 0; info.min = 0;
info.max = max(info.max, depth(0)); info.max = max(info.max, depth(0));
return; return;
} }
u32 idx = w[v].index; u32 idx = g[v].index;
assert(idx < depths.size()); assert(idx < depths.size());
const DepthMinMax &d = depths.at(idx); const DepthMinMax &d = depths.at(idx);
for (ReportID report_id : w[v].reports) { for (ReportID report_id : g[v].reports) {
const Report &report = rm.getReport(report_id); const Report &report = rm.getReport(report_id);
assert(report.type == EXTERNAL_CALLBACK); assert(report.type == EXTERNAL_CALLBACK);
@ -118,7 +118,7 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
rd.max = min(rd.max, max_offset); rd.max = min(rd.max, max_offset);
} }
DEBUG_PRINTF("vertex %zu report %u: %s\n", w[v].index, report_id, DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id,
rd.str().c_str()); rd.str().c_str());
info = unionDepthMinMax(info, rd); info = unionDepthMinMax(info, rd);
@ -126,8 +126,8 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
} }
static static
bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) { bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) {
for (const auto &report_id : all_reports(w)) { for (const auto &report_id : all_reports(g)) {
if (rm.getReport(report_id).offsetAdjust) { if (rm.getReport(report_id).offsetAdjust) {
return true; return true;
} }
@ -135,28 +135,29 @@ bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) {
return false; return false;
} }
void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) { void fillExpressionInfo(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr, hs_expr_info *info) {
assert(info); assert(info);
/* ensure utf8 starts at cp boundary */ /* ensure utf8 starts at cp boundary */
ensureCodePointStart(rm, w); ensureCodePointStart(rm, g, expr);
resolveAsserts(rm, w); resolveAsserts(rm, g, expr);
optimiseVirtualStarts(w); optimiseVirtualStarts(g);
removeLeadingVirtualVerticesFromRoot(w, w.start); removeLeadingVirtualVerticesFromRoot(g, g.start);
removeLeadingVirtualVerticesFromRoot(w, w.startDs); removeLeadingVirtualVerticesFromRoot(g, g.startDs);
vector<DepthMinMax> depths; vector<DepthMinMax> depths;
calcDepthsFrom(w, w.start, depths); calcDepthsFrom(g, g.start, depths);
DepthMinMax d; DepthMinMax d;
for (auto u : inv_adjacent_vertices_range(w.accept, w)) { for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
checkVertex(rm, w, u, depths, d); checkVertex(rm, g, u, depths, d);
} }
for (auto u : inv_adjacent_vertices_range(w.acceptEod, w)) { for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) {
checkVertex(rm, w, u, depths, d); checkVertex(rm, g, u, depths, d);
} }
if (d.max.is_finite()) { if (d.max.is_finite()) {
@ -170,9 +171,9 @@ void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) {
info->min_width = UINT_MAX; info->min_width = UINT_MAX;
} }
info->unordered_matches = hasOffsetAdjust(rm, w); info->unordered_matches = hasOffsetAdjust(rm, g);
info->matches_at_eod = can_match_at_eod(w); info->matches_at_eod = can_match_at_eod(g);
info->matches_only_at_eod = can_only_match_at_eod(w); info->matches_only_at_eod = can_only_match_at_eod(g);
} }
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -27,7 +27,7 @@
*/ */
/** \file /** \file
* \brief Code for discovering properties of an NGWrapper used by * \brief Code for discovering properties of an expression used by
* hs_expression_info. * hs_expression_info.
*/ */
@ -36,14 +36,14 @@
struct hs_expr_info; struct hs_expr_info;
#include "ue2common.h"
namespace ue2 { namespace ue2 {
class NGWrapper; class ExpressionInfo;
class NGHolder;
class ReportManager; class ReportManager;
void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info); void fillExpressionInfo(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr, hs_expr_info *info);
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -38,16 +38,19 @@
* match given these constraints, or transform the graph in order to make a * match given these constraints, or transform the graph in order to make a
* constraint implicit. * constraint implicit.
*/ */
#include "ng_extparam.h"
#include "ng.h" #include "ng.h"
#include "ng_depth.h" #include "ng_depth.h"
#include "ng_dump.h" #include "ng_dump.h"
#include "ng_extparam.h"
#include "ng_prune.h" #include "ng_prune.h"
#include "ng_reports.h" #include "ng_reports.h"
#include "ng_som_util.h" #include "ng_som_util.h"
#include "ng_width.h" #include "ng_width.h"
#include "ng_util.h" #include "ng_util.h"
#include "ue2common.h" #include "ue2common.h"
#include "compiler/compiler.h"
#include "parser/position.h" #include "parser/position.h"
#include "util/compile_context.h" #include "util/compile_context.h"
#include "util/compile_error.h" #include "util/compile_error.h"
@ -129,7 +132,8 @@ DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
/** \brief Replace the graph's reports with new reports that specify bounds. */ /** \brief Replace the graph's reports with new reports that specify bounds. */
static static
void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept, void updateReportBounds(ReportManager &rm, NGHolder &g,
const ExpressionInfo &expr, NFAVertex accept,
set<NFAVertex> &done) { set<NFAVertex> &done) {
for (auto v : inv_adjacent_vertices_range(accept, g)) { for (auto v : inv_adjacent_vertices_range(accept, g)) {
// Don't operate on g.accept itself. // Don't operate on g.accept itself.
@ -153,16 +157,16 @@ void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept,
// Note that we need to cope with offset adjustment here. // Note that we need to cope with offset adjustment here.
ir.minOffset = g.min_offset - ir.offsetAdjust; ir.minOffset = expr.min_offset - ir.offsetAdjust;
if (g.max_offset == MAX_OFFSET) { if (expr.max_offset == MAX_OFFSET) {
ir.maxOffset = MAX_OFFSET; ir.maxOffset = MAX_OFFSET;
} else { } else {
ir.maxOffset = g.max_offset - ir.offsetAdjust; ir.maxOffset = expr.max_offset - ir.offsetAdjust;
} }
assert(ir.maxOffset >= ir.minOffset); assert(ir.maxOffset >= ir.minOffset);
ir.minLength = g.min_length; ir.minLength = expr.min_length;
if (g.min_length && !g.som) { if (expr.min_length && !expr.som) {
ir.quashSom = true; ir.quashSom = true;
} }
@ -196,22 +200,23 @@ bool hasVirtualStarts(const NGHolder &g) {
* anchored and unanchored paths, but it's too tricky for the moment. * anchored and unanchored paths, but it's too tricky for the moment.
*/ */
static static
bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, bool anchorPatternWithBoundedRepeat(NGHolder &g, const ExpressionInfo &expr,
const depth &minWidth,
const depth &maxWidth) { const depth &maxWidth) {
assert(!g.som); assert(!expr.som);
assert(g.max_offset != MAX_OFFSET); assert(expr.max_offset != MAX_OFFSET);
assert(minWidth <= maxWidth); assert(minWidth <= maxWidth);
assert(maxWidth.is_reachable()); assert(maxWidth.is_reachable());
DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset, minWidth.str().c_str(), maxWidth.str().c_str(),
g.max_offset); expr.min_offset, expr.max_offset);
if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) { if (expr.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
return false; return false;
} }
if (g.max_offset < minWidth) { if (expr.max_offset < minWidth) {
assert(0); assert(0);
return false; return false;
} }
@ -232,10 +237,10 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
u32 min_bound, max_bound; u32 min_bound, max_bound;
if (maxWidth.is_infinite()) { if (maxWidth.is_infinite()) {
min_bound = 0; min_bound = 0;
max_bound = g.max_offset - minWidth; max_bound = expr.max_offset - minWidth;
} else { } else {
min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0; min_bound = expr.min_offset > maxWidth ? expr.min_offset - maxWidth : 0;
max_bound = g.max_offset - minWidth; max_bound = expr.max_offset - minWidth;
} }
DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
@ -315,7 +320,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) {
} }
static static
bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g, bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g,
int *adjust) { int *adjust) {
const auto &reports = all_reports(g); const auto &reports = all_reports(g);
if (reports.empty()) { if (reports.empty()) {
@ -342,10 +347,11 @@ bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g,
* /foo.*bar/{min_length=100} --> /foo.{94,}bar/ * /foo.*bar/{min_length=100} --> /foo.{94,}bar/
*/ */
static static
bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g,
assert(g.min_length); ExpressionInfo &expr) {
assert(expr.min_length);
if (g.min_length > MAX_MINLENGTH_TO_CONVERT) { if (expr.min_length > MAX_MINLENGTH_TO_CONVERT) {
return false; return false;
} }
@ -437,10 +443,10 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width,
g[cyclic].index); g[cyclic].index);
if (width >= g.min_length) { if (width >= expr.min_length) {
DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n", DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
g.min_length, width); expr.min_length, width);
g.min_length = 0; expr.min_length = 0;
return true; return true;
} }
@ -468,7 +474,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
const CharReach &cr = g[cyclic].char_reach; const CharReach &cr = g[cyclic].char_reach;
for (u32 i = 0; i < g.min_length - width - 1; ++i) { for (u32 i = 0; i < expr.min_length - width - 1; ++i) {
v = add_vertex(g); v = add_vertex(g);
g[v].char_reach = cr; g[v].char_reach = cr;
@ -487,19 +493,19 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
renumber_edges(g); renumber_edges(g);
clearReports(g); clearReports(g);
g.min_length = 0; expr.min_length = 0;
return true; return true;
} }
static static
bool hasExtParams(const NGWrapper &g) { bool hasExtParams(const ExpressionInfo &expr) {
if (g.min_length != 0) { if (expr.min_length != 0) {
return true; return true;
} }
if (g.min_offset != 0) { if (expr.min_offset != 0) {
return true; return true;
} }
if (g.max_offset != MAX_OFFSET) { if (expr.max_offset != MAX_OFFSET) {
return true; return true;
} }
return false; return false;
@ -535,7 +541,7 @@ const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
} }
static static
bool isEdgePrunable(const NGWrapper &g, bool isEdgePrunable(const NGHolder &g, const ExpressionInfo &expr,
const vector<NFAVertexBidiDepth> &depths, const vector<NFAVertexBidiDepth> &depths,
const NFAEdge &e) { const NFAEdge &e) {
const NFAVertex u = source(e, g); const NFAVertex u = source(e, g);
@ -564,29 +570,29 @@ bool isEdgePrunable(const NGWrapper &g,
const NFAVertexBidiDepth &du = depths.at(u_idx); const NFAVertexBidiDepth &du = depths.at(u_idx);
const NFAVertexBidiDepth &dv = depths.at(v_idx); const NFAVertexBidiDepth &dv = depths.at(v_idx);
if (g.min_offset) { if (expr.min_offset) {
depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv); depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv);
if (max_offset.is_finite() && max_offset < g.min_offset) { if (max_offset.is_finite() && max_offset < expr.min_offset) {
DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str()); DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
return true; return true;
} }
} }
if (g.max_offset != MAX_OFFSET) { if (expr.max_offset != MAX_OFFSET) {
depth min_offset = minDistFromStart(du) + minDistToAccept(dv); depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
assert(min_offset.is_finite()); assert(min_offset.is_finite());
if (min_offset > g.max_offset) { if (min_offset > expr.max_offset) {
DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str()); DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
return true; return true;
} }
} }
if (g.min_length && is_any_accept(v, g)) { if (expr.min_length && is_any_accept(v, g)) {
// Simple take on min_length. If we're an edge to accept and our max // Simple take on min_length. If we're an edge to accept and our max
// dist from start is too small, we can be pruned. // dist from start is too small, we can be pruned.
const depth &width = du.fromStart.max; const depth &width = du.fromStart.max;
if (width.is_finite() && width < g.min_length) { if (width.is_finite() && width < expr.min_length) {
DEBUG_PRINTF("max width %s from start too small for min_length\n", DEBUG_PRINTF("max width %s from start too small for min_length\n",
width.str().c_str()); width.str().c_str());
return true; return true;
@ -597,14 +603,14 @@ bool isEdgePrunable(const NGWrapper &g,
} }
static static
void pruneExtUnreachable(NGWrapper &g) { void pruneExtUnreachable(NGHolder &g, const ExpressionInfo &expr) {
vector<NFAVertexBidiDepth> depths; vector<NFAVertexBidiDepth> depths;
calcDepths(g, depths); calcDepths(g, depths);
vector<NFAEdge> dead; vector<NFAEdge> dead;
for (const auto &e : edges_range(g)) { for (const auto &e : edges_range(g)) {
if (isEdgePrunable(g, depths, e)) { if (isEdgePrunable(g, expr, depths, e)) {
DEBUG_PRINTF("pruning\n"); DEBUG_PRINTF("pruning\n");
dead.push_back(e); dead.push_back(e);
} }
@ -621,8 +627,8 @@ void pruneExtUnreachable(NGWrapper &g) {
/** Remove vacuous edges in graphs where the min_offset or min_length /** Remove vacuous edges in graphs where the min_offset or min_length
* constraints dictate that they can never produce a match. */ * constraints dictate that they can never produce a match. */
static static
void pruneVacuousEdges(NGWrapper &g) { void pruneVacuousEdges(NGHolder &g, const ExpressionInfo &expr) {
if (!g.min_length && !g.min_offset) { if (!expr.min_length && !expr.min_offset) {
return; return;
} }
@ -634,14 +640,14 @@ void pruneVacuousEdges(NGWrapper &g) {
// Special case: Crudely remove vacuous edges from start in graphs with a // Special case: Crudely remove vacuous edges from start in graphs with a
// min_offset. // min_offset.
if (g.min_offset && u == g.start && is_any_accept(v, g)) { if (expr.min_offset && u == g.start && is_any_accept(v, g)) {
DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
dead.push_back(e); dead.push_back(e);
continue; continue;
} }
// If a min_length is set, vacuous edges can be removed. // If a min_length is set, vacuous edges can be removed.
if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) { if (expr.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
dead.push_back(e); dead.push_back(e);
continue; continue;
@ -657,7 +663,8 @@ void pruneVacuousEdges(NGWrapper &g) {
} }
static static
void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths, void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr,
const vector<DepthMinMax> &depths,
const ReportManager &rm, NFAVertex accept) { const ReportManager &rm, NFAVertex accept) {
vector<NFAEdge> dead; vector<NFAEdge> dead;
@ -676,16 +683,16 @@ void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
d.min += adj.first; d.min += adj.first;
d.max += adj.second; d.max += adj.second;
if (d.max.is_finite() && d.max < g.min_length) { if (d.max.is_finite() && d.max < expr.min_length) {
DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n", DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
d.max.str().c_str(), g.min_length); d.max.str().c_str(), expr.min_length);
dead.push_back(e); dead.push_back(e);
continue; continue;
} }
if (g.max_offset != MAX_OFFSET && d.min > g.max_offset) { if (expr.max_offset != MAX_OFFSET && d.min > expr.max_offset) {
DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n", DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
d.min.str().c_str(), g.max_offset); d.min.str().c_str(), expr.max_offset);
dead.push_back(e); dead.push_back(e);
continue; continue;
} }
@ -697,15 +704,16 @@ void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
/** Remove edges to accepts that can never produce a match long enough to /** Remove edges to accepts that can never produce a match long enough to
* satisfy our min_length and max_offset constraints. */ * satisfy our min_length and max_offset constraints. */
static static
void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) { void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr,
if (!g.min_length) { const ReportManager &rm) {
if (!expr.min_length) {
return; return;
} }
vector<DepthMinMax> depths = getDistancesFromSOM(g); vector<DepthMinMax> depths = getDistancesFromSOM(g);
pruneUnmatchable(g, depths, rm, g.accept); pruneUnmatchable(g, expr, depths, rm, g.accept);
pruneUnmatchable(g, depths, rm, g.acceptEod); pruneUnmatchable(g, expr, depths, rm, g.acceptEod);
pruneUseless(g); pruneUseless(g);
} }
@ -732,9 +740,9 @@ bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
return false; return false;
} }
void handleExtendedParams(ReportManager &rm, NGWrapper &g, void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr,
UNUSED const CompileContext &cc) { UNUSED const CompileContext &cc) {
if (!hasExtParams(g)) { if (!hasExtParams(expr)) {
return; return;
} }
@ -751,50 +759,50 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g,
DepthMinMax match_depths = findMatchLengths(rm, g); DepthMinMax match_depths = findMatchLengths(rm, g);
DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) { if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) {
ostringstream oss; ostringstream oss;
oss << "Expression is anchored and cannot satisfy min_offset=" oss << "Expression is anchored and cannot satisfy min_offset="
<< g.min_offset << " as it can only produce matches of length " << expr.min_offset << " as it can only produce matches of length "
<< maxWidth << " bytes at most."; << maxWidth << " bytes at most.";
throw CompileError(g.expressionIndex, oss.str()); throw CompileError(expr.index, oss.str());
} }
if (minWidth > g.max_offset) { if (minWidth > expr.max_offset) {
ostringstream oss; ostringstream oss;
oss << "Expression has max_offset=" << g.max_offset << " but requires " oss << "Expression has max_offset=" << expr.max_offset
<< minWidth << " bytes to match."; << " but requires " << minWidth << " bytes to match.";
throw CompileError(g.expressionIndex, oss.str()); throw CompileError(expr.index, oss.str());
} }
if (maxWidth.is_finite() && match_depths.max < g.min_length) { if (maxWidth.is_finite() && match_depths.max < expr.min_length) {
ostringstream oss; ostringstream oss;
oss << "Expression has min_length=" << g.min_length << " but can " oss << "Expression has min_length=" << expr.min_length << " but can "
"only produce matches of length " << match_depths.max << "only produce matches of length " << match_depths.max <<
" bytes at most."; " bytes at most.";
throw CompileError(g.expressionIndex, oss.str()); throw CompileError(expr.index, oss.str());
} }
if (g.min_length && g.min_length <= match_depths.min) { if (expr.min_length && expr.min_length <= match_depths.min) {
DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n", DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
g.min_length); expr.min_length);
g.min_length = 0; expr.min_length = 0;
} }
if (!hasExtParams(g)) { if (!hasExtParams(expr)) {
return; return;
} }
pruneVacuousEdges(g); pruneVacuousEdges(g, expr);
pruneUnmatchable(g, rm); pruneUnmatchable(g, expr, rm);
if (!has_offset_adj) { if (!has_offset_adj) {
pruneExtUnreachable(g); pruneExtUnreachable(g, expr);
} }
// We may have removed all the edges to accept, in which case this // We may have removed all the edges to accept, in which case this
// expression cannot match. // expression cannot match.
if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
throw CompileError(g.expressionIndex, "Extended parameter " throw CompileError(expr.index, "Extended parameter "
"constraints can not be satisfied for any match from " "constraints can not be satisfied for any match from "
"this expression."); "this expression.");
} }
@ -812,27 +820,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g,
// If the pattern is completely anchored and has a min_length set, this can // If the pattern is completely anchored and has a min_length set, this can
// be converted to a min_offset. // be converted to a min_offset.
if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) { if (expr.min_length && (expr.min_offset <= expr.min_length) &&
DEBUG_PRINTF("converting min_length to min_offset=%llu for " is_anchored) {
"anchored case\n", g.min_length); DEBUG_PRINTF("convertinexpr.min_length to min_offset=%llu for "
g.min_offset = g.min_length; "anchored case\n", expr.min_length);
g.min_length = 0; expr.min_offset = expr.min_length;
expr.min_length = 0;
} }
if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) { if (expr.min_offset && expr.min_offset <= minWidth && !has_offset_adj) {
DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n", DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n",
g.min_offset); expr.min_offset);
g.min_offset = 0; expr.min_offset = 0;
} }
if (!hasExtParams(g)) { if (!hasExtParams(expr)) {
return; return;
} }
// If the pattern has a min_length and is of "ratchet" form with one // If the pattern has a min_length and is of "ratchet" form with one
// unbounded repeat, that repeat can become a bounded repeat. // unbounded repeat, that repeat can become a bounded repeat.
// e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
if (g.min_length && transformMinLengthToRepeat(rm, g)) { if (expr.min_length && transformMinLengthToRepeat(rm, g, expr)) {
DEBUG_PRINTF("converted min_length to bounded repeat\n"); DEBUG_PRINTF("converted min_length to bounded repeat\n");
// recalc // recalc
minWidth = findMinWidth(g); minWidth = findMinWidth(g);
@ -846,28 +855,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g,
// Note that it is possible to handle graphs that have a combination of // Note that it is possible to handle graphs that have a combination of
// anchored and unanchored paths, but it's too tricky for the moment. // anchored and unanchored paths, but it's too tricky for the moment.
if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length && if (expr.max_offset != MAX_OFFSET && !expr.som && !expr.min_length &&
!has_offset_adj && isUnanchored(g)) { !has_offset_adj && isUnanchored(g)) {
if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) { if (anchorPatternWithBoundedRepeat(g, expr, minWidth, maxWidth)) {
DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(), DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(),
maxWidth.str().c_str()); maxWidth.str().c_str());
if (minWidth == maxWidth) { if (minWidth == maxWidth) {
// For a fixed width pattern, we can retire the offsets as they // For a fixed width pattern, we can retire the offsets as they
// are implicit in the graph now. // are implicit in the graph now.
g.min_offset = 0; expr.min_offset = 0;
g.max_offset = MAX_OFFSET; expr.max_offset = MAX_OFFSET;
} }
} }
} }
//dumpGraph("final.dot", g); //dumpGraph("final.dot", g);
if (!hasExtParams(g)) { if (!hasExtParams(expr)) {
return; return;
} }
set<NFAVertex> done; set<NFAVertex> done;
updateReportBounds(rm, g, g.accept, done); updateReportBounds(rm, g, expr, g.accept, done);
updateReportBounds(rm, g, g.acceptEod, done); updateReportBounds(rm, g, expr, g.acceptEod, done);
} }
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -37,10 +37,11 @@
namespace ue2 { namespace ue2 {
struct CompileContext; struct CompileContext;
class NGWrapper; class ExpressionInfo;
class NGHolder;
class ReportManager; class ReportManager;
void handleExtendedParams(ReportManager &rm, NGWrapper &g, void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr,
const CompileContext &cc); const CompileContext &cc);
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -30,12 +30,15 @@
* \brief Literal Component Splitting. Identifies literals that span the * \brief Literal Component Splitting. Identifies literals that span the
* graph and moves them into Rose. * graph and moves them into Rose.
*/ */
#include "ng_literal_component.h"
#include "grey.h" #include "grey.h"
#include "ng.h" #include "ng.h"
#include "ng_literal_component.h"
#include "ng_prune.h" #include "ng_prune.h"
#include "ng_util.h" #include "ng_util.h"
#include "ue2common.h" #include "ue2common.h"
#include "compiler/compiler.h"
#include "rose/rose_build.h" #include "rose/rose_build.h"
#include "util/container.h" #include "util/container.h"
#include "util/graph.h" #include "util/graph.h"
@ -47,8 +50,8 @@ using namespace std;
namespace ue2 { namespace ue2 {
static static
bool isLiteralChar(const NGWrapper &g, NFAVertex v, bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase,
bool &nocase, bool &casefixed) { bool &casefixed) {
const CharReach &cr = g[v].char_reach; const CharReach &cr = g[v].char_reach;
const size_t num = cr.count(); const size_t num = cr.count();
if (num > 2) { if (num > 2) {
@ -93,7 +96,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) {
} }
static static
bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored,
set<NFAVertex> &dead) { set<NFAVertex> &dead) {
DEBUG_PRINTF("examine vertex %zu\n", g[v].index); DEBUG_PRINTF("examine vertex %zu\n", g[v].index);
bool nocase = false, casefixed = false; bool nocase = false, casefixed = false;
@ -185,7 +188,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored,
} }
/** \brief Split off literals. True if any changes were made to the graph. */ /** \brief Split off literals. True if any changes were made to the graph. */
bool splitOffLiterals(NG &ng, NGWrapper &g) { bool splitOffLiterals(NG &ng, NGHolder &g) {
if (!ng.cc.grey.allowLiteral) { if (!ng.cc.grey.allowLiteral) {
return false; return false;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -37,10 +37,10 @@
namespace ue2 { namespace ue2 {
class NG; class NG;
class NGWrapper; class NGHolder;
/** \brief Split off literals. True if any changes were made to the graph. */ /** \brief Split off literals. True if any changes were made to the graph. */
bool splitOffLiterals(NG &ng, NGWrapper &graph); bool splitOffLiterals(NG &ng, NGHolder &g);
} // namespace ue2 } // namespace ue2

View File

@ -29,6 +29,9 @@
/** \file /** \file
* \brief SOM ("Start of Match") analysis. * \brief SOM ("Start of Match") analysis.
*/ */
#include "ng_som.h"
#include "ng.h" #include "ng.h"
#include "ng_dump.h" #include "ng_dump.h"
#include "ng_equivalence.h" #include "ng_equivalence.h"
@ -40,7 +43,6 @@
#include "ng_redundancy.h" #include "ng_redundancy.h"
#include "ng_region.h" #include "ng_region.h"
#include "ng_reports.h" #include "ng_reports.h"
#include "ng_som.h"
#include "ng_som_add_redundancy.h" #include "ng_som_add_redundancy.h"
#include "ng_som_util.h" #include "ng_som_util.h"
#include "ng_split.h" #include "ng_split.h"
@ -49,6 +51,7 @@
#include "ng_width.h" #include "ng_width.h"
#include "grey.h" #include "grey.h"
#include "ue2common.h" #include "ue2common.h"
#include "compiler/compiler.h"
#include "nfa/goughcompile.h" #include "nfa/goughcompile.h"
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "nfa/nfa_internal.h" // for MO_INVALID_IDX
#include "parser/position.h" #include "parser/position.h"
@ -1584,8 +1587,9 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p,
* implement the full pattern. * implement the full pattern.
*/ */
static static
void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id,
vector<som_plan> &plan, const u32 first_som_slot) { NGHolder &g, vector<som_plan> &plan,
const u32 first_som_slot) {
ReportManager &rm = ng.rm; ReportManager &rm = ng.rm;
SomSlotManager &ssm = ng.ssm; SomSlotManager &ssm = ng.ssm;
@ -1598,14 +1602,14 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
// Root plan, which already has a SOM slot assigned (first_som_slot). // Root plan, which already has a SOM slot assigned (first_som_slot).
dumpSomPlan(g, plan.front(), 0); dumpSomPlan(g, plan.front(), 0);
dumpSomSubComponent(*plan.front().prefix, "04_som", w.expressionIndex, dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0,
comp_id, 0, ng.cc.grey); ng.cc.grey);
assert(plan.front().prefix); assert(plan.front().prefix);
if (plan.front().escapes.any() && !plan.front().is_reset) { if (plan.front().escapes.any() && !plan.front().is_reset) {
/* setup escaper for first som location */ /* setup escaper for first som location */
if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes, if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
first_som_slot)) { first_som_slot)) {
throw CompileError(w.expressionIndex, "Pattern is too large."); throw CompileError(expr.index, "Pattern is too large.");
} }
} }
@ -1617,7 +1621,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
for (++it; it != plan.end(); ++it) { for (++it; it != plan.end(); ++it) {
const u32 plan_num = it - plan.begin(); const u32 plan_num = it - plan.begin();
dumpSomPlan(g, *it, plan_num); dumpSomPlan(g, *it, plan_num);
dumpSomSubComponent(*it->prefix, "04_som", w.expressionIndex, comp_id, dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id,
plan_num, ng.cc.grey); plan_num, ng.cc.grey);
assert(it->parent < plan_num); assert(it->parent < plan_num);
@ -1628,7 +1632,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
assert(!it->no_implement); assert(!it->no_implement);
if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) { if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
throw CompileError(w.expressionIndex, "Pattern is too large."); throw CompileError(expr.index, "Pattern is too large.");
} }
updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in); updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out); updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
@ -1639,7 +1643,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g,
renumber_vertices(*plan.front().prefix); renumber_vertices(*plan.front().prefix);
assert(plan.front().prefix->kind == NFA_OUTFIX); assert(plan.front().prefix->kind == NFA_OUTFIX);
if (!ng.addHolder(*plan.front().prefix)) { if (!ng.addHolder(*plan.front().prefix)) {
throw CompileError(w.expressionIndex, "Pattern is too large."); throw CompileError(expr.index, "Pattern is too large.");
} }
} }
} }
@ -1852,7 +1856,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
} }
static static
u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g, u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g,
const CompileContext &cc) { const CompileContext &cc) {
depth maxWidth = findMaxWidth(g); depth maxWidth = findMaxWidth(g);
@ -1861,7 +1865,7 @@ u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g,
auto nfa = makeBareSomRevNfa(g, cc); auto nfa = makeBareSomRevNfa(g, cc);
if (!nfa) { if (!nfa) {
throw CompileError(w.expressionIndex, "Pattern is too large."); throw CompileError(expr.index, "Pattern is too large.");
} }
if (ng.cc.streaming) { if (ng.cc.streaming) {
@ -2055,8 +2059,8 @@ void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix,
} }
static static
sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
som_type som, u32 comp_id, som_type som,
const ue2::unordered_map<NFAVertex, u32> &regions, const ue2::unordered_map<NFAVertex, u32> &regions,
const map<u32, region_info> &info, const map<u32, region_info> &info,
map<u32, region_info>::const_iterator lower_bound) { map<u32, region_info>::const_iterator lower_bound) {
@ -2077,7 +2081,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
// This is an optimisation: if we can't build a Haig from a portion of // This is an optimisation: if we can't build a Haig from a portion of
// the graph, then we won't be able to manage it as an outfix either // the graph, then we won't be able to manage it as an outfix either
// when we fall back. // when we fall back.
throw CompileError(w.expressionIndex, "Pattern is too large."); throw CompileError(expr.index, "Pattern is too large.");
} }
while (1) { while (1) {
@ -2152,7 +2156,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
goto next_try; goto next_try;
} }
implementSomPlan(ng, w, comp_id, g, plan, som_loc); implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
Report ir = makeCallback(0U, 0); Report ir = makeCallback(0U, 0);
assert(!plan.empty()); assert(!plan.empty());
@ -2877,7 +2881,7 @@ unique_ptr<NGHolder> makePrefixForChain(NGHolder &g,
return prefix; return prefix;
} }
sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id,
som_type som) { som_type som) {
assert(som); assert(som);
DEBUG_PRINTF("som hello\n"); DEBUG_PRINTF("som hello\n");
@ -3001,7 +3005,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
/* create prefix to set the som_loc */ /* create prefix to set the som_loc */
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET); updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
if (prefix_by_rev) { if (prefix_by_rev) {
u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
} }
renumber_vertices(*prefix); renumber_vertices(*prefix);
@ -3084,18 +3088,18 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET); updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
} }
if (prefix_by_rev && !plan.front().no_implement) { if (prefix_by_rev && !plan.front().no_implement) {
u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
} }
implementSomPlan(ng, w, comp_id, g, plan, som_loc); implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
DEBUG_PRINTF("success\n"); DEBUG_PRINTF("success\n");
return SOMBE_HANDLED_INTERNAL; return SOMBE_HANDLED_INTERNAL;
} }
sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
som_type som) { u32 comp_id, som_type som) {
assert(som); assert(som);
DEBUG_PRINTF("som+haig hello\n"); DEBUG_PRINTF("som+haig hello\n");
@ -3132,7 +3136,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id,
buildRegionMapping(g, regions, info, true); buildRegionMapping(g, regions, info, true);
sombe_rv rv = sombe_rv rv =
doHaigLitSom(ng, g, w, comp_id, som, regions, info, info.begin()); doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin());
if (rv == SOMBE_FAIL) { if (rv == SOMBE_FAIL) {
clear_graph(g); clear_graph(g);
cloneHolder(g, g_pristine); cloneHolder(g, g_pristine);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -34,12 +34,14 @@
#define NG_SOM_H #define NG_SOM_H
#include "som/som.h" #include "som/som.h"
#include "ue2common.h"
namespace ue2 { namespace ue2 {
class ExpressionInfo;
class NG; class NG;
class NGHolder; class NGHolder;
class NGWrapper; class ReportManager;
struct Grey; struct Grey;
enum sombe_rv { enum sombe_rv {
@ -63,14 +65,14 @@ enum sombe_rv {
* May throw a "Pattern too large" exception if prefixes of the * May throw a "Pattern too large" exception if prefixes of the
* pattern are too large to compile. * pattern are too large to compile.
*/ */
sombe_rv doSom(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id,
som_type som); som_type som);
/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established. /** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established.
* May also throw pattern too large if prefixes of the pattern are too large to * May also throw pattern too large if prefixes of the pattern are too large to
* compile. */ * compile. */
sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr,
som_type som); u32 comp_id, som_type som);
void makeReportsSomPass(ReportManager &rm, NGHolder &g); void makeReportsSomPass(ReportManager &rm, NGHolder &g);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -34,6 +34,7 @@
#include "ng.h" #include "ng.h"
#include "ng_prune.h" #include "ng_prune.h"
#include "ng_util.h" #include "ng_util.h"
#include "compiler/compiler.h"
#include "util/graph_range.h" #include "util/graph_range.h"
#include "util/unicode_def.h" #include "util/unicode_def.h"
@ -45,14 +46,14 @@ using namespace std;
namespace ue2 { namespace ue2 {
static static
void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) { void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) {
if (in_degree(v, w) != 1) { if (in_degree(v, g) != 1) {
DEBUG_PRINTF("unexpected pred\n"); DEBUG_PRINTF("unexpected pred\n");
assert(0); /* should be true due to the early stage of this analysis */ assert(0); /* should be true due to the early stage of this analysis */
return; return;
} }
CharReach &cr = w[v].char_reach; CharReach &cr = g[v].char_reach;
if (pred_char == 0xe0) { if (pred_char == 0xe0) {
assert(cr.isSubsetOf(CharReach(0xa0, 0xbf))); assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
if (cr == CharReach(0xa0, 0xbf)) { if (cr == CharReach(0xa0, 0xbf)) {
@ -79,8 +80,8 @@ void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) {
* above \\x{10ffff} or they represent overlong encodings. As we require valid * above \\x{10ffff} or they represent overlong encodings. As we require valid
* UTF-8 input, we have no defined behaviour in these cases, as a result we can * UTF-8 input, we have no defined behaviour in these cases, as a result we can
* accept them if it simplifies the graph. */ * accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGWrapper &w) { void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) {
if (!w.utf8) { if (!expr.utf8) {
return; return;
} }
@ -88,12 +89,12 @@ void relaxForbiddenUtf8(NGWrapper &w) {
const CharReach f0(0xf0); const CharReach f0(0xf0);
const CharReach f4(0xf4); const CharReach f4(0xf4);
for (auto v : vertices_range(w)) { for (auto v : vertices_range(g)) {
const CharReach &cr = w[v].char_reach; const CharReach &cr = g[v].char_reach;
if (cr == e0 || cr == f0 || cr == f4) { if (cr == e0 || cr == f0 || cr == f4) {
u8 pred_char = cr.find_first(); u8 pred_char = cr.find_first();
for (auto t : adjacent_vertices_range(v, w)) { for (auto t : adjacent_vertices_range(v, g)) {
allowIllegal(w, t, pred_char); allowIllegal(g, t, pred_char);
} }
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -35,7 +35,7 @@
namespace ue2 { namespace ue2 {
class NGWrapper; class ExpressionInfo;
class NGHolder; class NGHolder;
/** \brief Relax forbidden UTF-8 sequences. /** \brief Relax forbidden UTF-8 sequences.
@ -44,7 +44,7 @@ class NGHolder;
* above \\x{10ffff} or they represent overlong encodings. As we require valid * above \\x{10ffff} or they represent overlong encodings. As we require valid
* UTF-8 input, we have no defined behaviour in these cases, as a result we can * UTF-8 input, we have no defined behaviour in these cases, as a result we can
* accept them if it simplifies the graph. */ * accept them if it simplifies the graph. */
void relaxForbiddenUtf8(NGWrapper &w); void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr);
/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex /** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
* where possible, based on the assumption that we will always be matching * where possible, based on the assumption that we will always be matching

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -34,29 +34,31 @@
#include "grey.h" #include "grey.h"
#include "ng.h" #include "ng.h"
#include "ng_util.h" #include "ng_util.h"
#include "compiler/compiler.h"
using namespace std; using namespace std;
namespace ue2 { namespace ue2 {
static static
ReportID getInternalId(ReportManager &rm, const NGWrapper &graph) { ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) {
Report ir = rm.getBasicInternalReport(graph); Report ir = rm.getBasicInternalReport(expr);
// Apply any extended params. // Apply any extended params.
if (graph.min_offset || graph.max_offset != MAX_OFFSET) { if (expr.min_offset || expr.max_offset != MAX_OFFSET) {
ir.minOffset = graph.min_offset; ir.minOffset = expr.min_offset;
ir.maxOffset = graph.max_offset; ir.maxOffset = expr.max_offset;
} }
assert(!graph.min_length); // should be handled elsewhere. assert(!expr.min_length); // should be handled elsewhere.
return rm.getInternalId(ir); return rm.getInternalId(ir);
} }
static static
void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) { void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g,
const ReportID r = getInternalId(rm, g); const ExpressionInfo &expr) {
const ReportID r = getInternalId(rm, expr);
boundary.report_at_0_eod.insert(r); boundary.report_at_0_eod.insert(r);
boundary.report_at_0.insert(r); boundary.report_at_0.insert(r);
@ -81,8 +83,8 @@ void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) {
static static
void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &g) { NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_0.insert(getInternalId(rm, g)); boundary.report_at_0.insert(getInternalId(rm, expr));
remove_edge(g.start, g.accept, g); remove_edge(g.start, g.accept, g);
remove_edge(g.start, g.acceptEod, g); remove_edge(g.start, g.acceptEod, g);
g[g.start].reports.clear(); g[g.start].reports.clear();
@ -90,8 +92,8 @@ void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
static static
void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &g) { NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_eod.insert(getInternalId(rm, g)); boundary.report_at_eod.insert(getInternalId(rm, expr));
remove_edge(g.startDs, g.acceptEod, g); remove_edge(g.startDs, g.acceptEod, g);
remove_edge(g.start, g.acceptEod, g); remove_edge(g.start, g.acceptEod, g);
g[g.start].reports.clear(); g[g.start].reports.clear();
@ -100,18 +102,18 @@ void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm,
static static
void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm, void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &g) { NGHolder &g, const ExpressionInfo &expr) {
boundary.report_at_0_eod.insert(getInternalId(rm, g)); boundary.report_at_0_eod.insert(getInternalId(rm, expr));
remove_edge(g.start, g.acceptEod, g); remove_edge(g.start, g.acceptEod, g);
g[g.start].reports.clear(); g[g.start].reports.clear();
} }
bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &g) { NGHolder &g, const ExpressionInfo &expr) {
if (edge(g.startDs, g.accept, g).second) { if (edge(g.startDs, g.accept, g).second) {
// e.g. '.*'; match "between" every byte // e.g. '.*'; match "between" every byte
DEBUG_PRINTF("graph is firehose\n"); DEBUG_PRINTF("graph is firehose\n");
makeFirehose(boundary, rm, g); makeFirehose(boundary, rm, g, expr);
return true; return true;
} }
@ -119,19 +121,19 @@ bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
if (edge(g.start, g.accept, g).second) { if (edge(g.start, g.accept, g).second) {
DEBUG_PRINTF("creating anchored acceptor\n"); DEBUG_PRINTF("creating anchored acceptor\n");
makeAnchoredAcceptor(boundary, rm, g); makeAnchoredAcceptor(boundary, rm, g, expr);
work_done = true; work_done = true;
} }
if (edge(g.startDs, g.acceptEod, g).second) { if (edge(g.startDs, g.acceptEod, g).second) {
DEBUG_PRINTF("creating end-anchored acceptor\n"); DEBUG_PRINTF("creating end-anchored acceptor\n");
makeEndAnchoredAcceptor(boundary, rm, g); makeEndAnchoredAcceptor(boundary, rm, g, expr);
work_done = true; work_done = true;
} }
if (edge(g.start, g.acceptEod, g).second) { if (edge(g.start, g.acceptEod, g).second) {
DEBUG_PRINTF("creating nothing acceptor\n"); DEBUG_PRINTF("creating nothing acceptor\n");
makeNothingAcceptor(boundary, rm, g); makeNothingAcceptor(boundary, rm, g, expr);
work_done = true; work_done = true;
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -36,12 +36,13 @@
namespace ue2 { namespace ue2 {
struct BoundaryReports; struct BoundaryReports;
class NGWrapper; class ExpressionInfo;
class NGHolder;
class ReportManager; class ReportManager;
// Returns true if a "vacuous" reporter was created. // Returns true if a "vacuous" reporter was created.
bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm,
NGWrapper &graph); NGHolder &g, const ExpressionInfo &expr);
} // namespace ue2 } // namespace ue2

View File

@ -159,13 +159,15 @@ public:
ConstructLiteralVisitor::~ConstructLiteralVisitor() {} ConstructLiteralVisitor::~ConstructLiteralVisitor() {}
/** \brief True if the literal expression \a expr could be added to Rose. */ /** \brief True if the literal expression \a expr could be added to Rose. */
bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { bool shortcutLiteral(NG &ng, const ParsedExpression &pe) {
assert(expr.component); assert(pe.component);
if (!ng.cc.grey.allowLiteral) { if (!ng.cc.grey.allowLiteral) {
return false; return false;
} }
const auto &expr = pe.expr;
// XXX: don't shortcut literals with extended params (yet) // XXX: don't shortcut literals with extended params (yet)
if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length || if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length ||
expr.edit_distance) { expr.edit_distance) {
@ -175,8 +177,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) {
ConstructLiteralVisitor vis; ConstructLiteralVisitor vis;
try { try {
assert(expr.component); assert(pe.component);
expr.component->accept(vis); pe.component->accept(vis);
assert(vis.repeat_stack.empty()); assert(vis.repeat_stack.empty());
} catch (const ConstructLiteralVisitor::NotLiteral&) { } catch (const ConstructLiteralVisitor::NotLiteral&) {
DEBUG_PRINTF("not a literal\n"); DEBUG_PRINTF("not a literal\n");
@ -196,7 +198,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) {
} }
DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str()); DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str());
return ng.addLiteral(lit, expr.index, expr.id, expr.highlander, expr.som); return ng.addLiteral(lit, expr.index, expr.report, expr.highlander,
expr.som);
} }
} // namespace ue2 } // namespace ue2

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -30,6 +30,7 @@
#include "grey.h" #include "grey.h"
#include "ue2common.h" #include "ue2common.h"
#include "compiler/compiler.h"
#include "nfa/dfa_min.h" #include "nfa/dfa_min.h"
#include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile.h"
#include "nfa/mcclellancompile_util.h" #include "nfa/mcclellancompile_util.h"
@ -74,7 +75,7 @@ public:
// Construct a runtime implementation. // Construct a runtime implementation.
aligned_unique_ptr<SmallWriteEngine> build(u32 roseQuality) override; aligned_unique_ptr<SmallWriteEngine> build(u32 roseQuality) override;
void add(const NGWrapper &w) override; void add(const NGHolder &g, const ExpressionInfo &expr) override;
void add(const ue2_literal &literal, ReportID r) override; void add(const ue2_literal &literal, ReportID r) override;
set<ReportID> all_reports() const override; set<ReportID> all_reports() const override;
@ -171,26 +172,26 @@ bool pruneOverlong(NGHolder &g, const depth &max_depth,
return modified; return modified;
} }
void SmallWriteBuildImpl::add(const NGWrapper &w) { void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) {
// If the graph is poisoned (i.e. we can't build a SmallWrite version), // If the graph is poisoned (i.e. we can't build a SmallWrite version),
// we don't even try. // we don't even try.
if (poisoned) { if (poisoned) {
return; return;
} }
if (w.som || w.min_length || isVacuous(w)) { /* cannot support in smwr */ if (expr.som || expr.min_length || isVacuous(g)) {
poisoned = true; poisoned = true; /* cannot support in smwr */
return; return;
} }
DEBUG_PRINTF("w=%p\n", &w); DEBUG_PRINTF("g=%p\n", &g);
// make a copy of the graph so that we can modify it for our purposes // make a copy of the graph so that we can modify it for our purposes
unique_ptr<NGHolder> h = cloneHolder(w); unique_ptr<NGHolder> h = cloneHolder(g);
pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm); pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm);
reduceGraph(*h, SOM_NONE, w.utf8, cc); reduceGraph(*h, SOM_NONE, expr.utf8, cc);
if (can_never_match(*h)) { if (can_never_match(*h)) {
DEBUG_PRINTF("graph can never match in small block\n"); DEBUG_PRINTF("graph can never match in small block\n");

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -48,8 +48,9 @@ namespace ue2 {
struct CompileContext; struct CompileContext;
struct ue2_literal; struct ue2_literal;
class NGWrapper; class ExpressionInfo;
class ReportManager; class NGHolder;
class ReportManager;
// Abstract interface intended for callers from elsewhere in the tree, real // Abstract interface intended for callers from elsewhere in the tree, real
// underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h. // underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h.
@ -61,16 +62,16 @@ public:
// Construct a runtime implementation. // Construct a runtime implementation.
virtual ue2::aligned_unique_ptr<SmallWriteEngine> build(u32 roseQuality) = 0; virtual ue2::aligned_unique_ptr<SmallWriteEngine> build(u32 roseQuality) = 0;
virtual void add(const NGWrapper &w) = 0; virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0;
virtual void add(const ue2_literal &literal, ReportID r) = 0; virtual void add(const ue2_literal &literal, ReportID r) = 0;
virtual std::set<ReportID> all_reports() const = 0; virtual std::set<ReportID> all_reports() const = 0;
}; };
// Construct a usable SmallWrite builder. // Construct a usable SmallWrite builder.
std::unique_ptr<SmallWriteBuild> makeSmallWriteBuilder(size_t num_patterns, std::unique_ptr<SmallWriteBuild>
const ReportManager &rm, makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm,
const CompileContext &cc); const CompileContext &cc);
size_t smwrSize(const SmallWriteEngine *t); size_t smwrSize(const SmallWriteEngine *t);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -29,9 +29,12 @@
/** \file /** \file
* \brief ReportManager: tracks Report structures, exhaustion and dedupe keys. * \brief ReportManager: tracks Report structures, exhaustion and dedupe keys.
*/ */
#include "grey.h"
#include "report_manager.h" #include "report_manager.h"
#include "grey.h"
#include "ue2common.h" #include "ue2common.h"
#include "compiler/compiler.h"
#include "nfagraph/ng.h" #include "nfagraph/ng.h"
#include "rose/rose_build.h" #include "rose/rose_build.h"
#include "util/compile_error.h" #include "util/compile_error.h"
@ -201,20 +204,21 @@ void ReportManager::registerExtReport(ReportID id,
} }
} }
Report ReportManager::getBasicInternalReport(const NGWrapper &g, s32 adj) { Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr,
s32 adj) {
/* validate that we are not violating highlander constraints, this will /* validate that we are not violating highlander constraints, this will
* throw a CompileError if so. */ * throw a CompileError if so. */
registerExtReport(g.reportId, registerExtReport(expr.report,
external_report_info(g.highlander, g.expressionIndex)); external_report_info(expr.highlander, expr.index));
/* create the internal report */ /* create the internal report */
u32 ekey = INVALID_EKEY; u32 ekey = INVALID_EKEY;
if (g.highlander) { if (expr.highlander) {
/* all patterns with the same report id share an ekey */ /* all patterns with the same report id share an ekey */
ekey = getExhaustibleKey(g.reportId); ekey = getExhaustibleKey(expr.report);
} }
return makeECallback(g.reportId, adj, ekey); return makeECallback(expr.report, adj, ekey);
} }
void ReportManager::setProgramOffset(ReportID id, u32 programOffset) { void ReportManager::setProgramOffset(ReportID id, u32 programOffset) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -47,7 +47,7 @@ namespace ue2 {
struct Grey; struct Grey;
class RoseBuild; class RoseBuild;
class NGWrapper; class ExpressionInfo;
struct external_report_info { struct external_report_info {
external_report_info(bool h, u32 fpi) external_report_info(bool h, u32 fpi)
@ -92,13 +92,13 @@ public:
const std::vector<Report> &reports() const { return reportIds; } const std::vector<Report> &reports() const { return reportIds; }
/** /**
* Get a simple internal report corresponding to the wrapper. An ekey will * Get a simple internal report corresponding to the expression. An ekey
* be setup as required. * will be setup if required.
* *
* Note: this function may throw a CompileError if constraints on external * Note: this function may throw a CompileError if constraints on external
* match id are violated (mixed highlander status for example). * match id are violated (mixed highlander status for example).
*/ */
Report getBasicInternalReport(const NGWrapper &g, s32 adj = 0); Report getBasicInternalReport(const ExpressionInfo &expr, s32 adj = 0);
/** \brief Register an external report and validate that we are not /** \brief Register an external report and validate that we are not
* violating highlander constraints (which will cause an exception to be * violating highlander constraints (which will cause an exception to be

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -96,7 +96,8 @@ protected:
const CompileContext cc(true, false, target, grey); const CompileContext cc(true, false, target, grey);
ReportManager rm(cc.grey); ReportManager rm(cc.grey);
ParsedExpression parsed(0, pattern.c_str(), flags, 0); ParsedExpression parsed(0, pattern.c_str(), flags, 0);
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed); auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr); ASSERT_TRUE(g != nullptr);
clearReports(*g); clearReports(*g);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -73,7 +73,8 @@ protected:
CompileContext cc(false, false, target, Grey()); CompileContext cc(false, false, target, Grey());
ReportManager rm(cc.grey); ReportManager rm(cc.grey);
ParsedExpression parsed(0, expr.c_str(), flags, 0); ParsedExpression parsed(0, expr.c_str(), flags, 0);
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed); auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr); ASSERT_TRUE(g != nullptr);
clearReports(*g); clearReports(*g);
@ -306,7 +307,8 @@ protected:
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
ReportManager rm(cc.grey); ReportManager rm(cc.grey);
ParsedExpression parsed(0, expr.c_str(), flags, 0); ParsedExpression parsed(0, expr.c_str(), flags, 0);
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed); auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr); ASSERT_TRUE(g != nullptr);
clearReports(*g); clearReports(*g);
@ -365,7 +367,8 @@ protected:
CompileContext cc(true, false, get_current_target(), Grey()); CompileContext cc(true, false, get_current_target(), Grey());
ParsedExpression parsed(0, expr.c_str(), flags, 0); ParsedExpression parsed(0, expr.c_str(), flags, 0);
ReportManager rm(cc.grey); ReportManager rm(cc.grey);
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, parsed); auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
ASSERT_TRUE(g != nullptr); ASSERT_TRUE(g != nullptr);
clearReports(*g); clearReports(*g);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -40,18 +40,19 @@ namespace ue2 {
// Helper function: construct a graph from an expression, flags and context. // Helper function: construct a graph from an expression, flags and context.
inline inline
std::unique_ptr<NGWrapper> constructGraphWithCC(const std::string &expr, std::unique_ptr<NGHolder> constructGraphWithCC(const std::string &expr,
CompileContext &cc, CompileContext &cc,
unsigned flags) { unsigned flags) {
ReportManager rm(cc.grey); ReportManager rm(cc.grey);
ParsedExpression parsed(0, expr.c_str(), flags, 0); ParsedExpression parsed(0, expr.c_str(), flags, 0);
return buildWrapper(rm, cc, parsed); auto built_expr = buildGraph(rm, cc, parsed);
return std::move(built_expr.g);
} }
// Helper function: construct a graph from an expression and its flags. // Helper function: construct a graph from an expression and its flags.
inline inline
std::unique_ptr<NGWrapper> constructGraph(const std::string &expr, std::unique_ptr<NGHolder> constructGraph(const std::string &expr,
unsigned flags) { unsigned flags) {
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
return constructGraphWithCC(expr, cc, flags); return constructGraphWithCC(expr, cc, flags);
} }

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -54,7 +54,7 @@ TEST(NFAGraph, RemoveEquivalence1) {
// The graph should be merged into: a(b|c) // The graph should be merged into: a(b|c)
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("(ab|ac)", cc, 0)); auto graph(constructGraphWithCC("(ab|ac)", cc, 0));
ASSERT_TRUE(graph != nullptr); ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph; NGHolder &g = *graph;
g.kind = NFA_SUFFIX; g.kind = NFA_SUFFIX;
@ -115,7 +115,7 @@ TEST(NFAGraph, RemoveEquivalence2) {
// The graph should be merged into: (b|c)a // The graph should be merged into: (b|c)a
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("(ba|ca)", cc, 0)); auto graph(constructGraphWithCC("(ba|ca)", cc, 0));
ASSERT_TRUE(graph != nullptr); ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph; NGHolder &g = *graph;
g.kind = NFA_SUFFIX; g.kind = NFA_SUFFIX;
@ -176,8 +176,7 @@ TEST(NFAGraph, RemoveEquivalence3) {
// The graph should be merged into: a(..)+(X|Y) // The graph should be merged into: a(..)+(X|Y)
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, auto graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, HS_FLAG_DOTALL));
HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr); ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph; NGHolder &g = *graph;
g.kind = NFA_SUFFIX; g.kind = NFA_SUFFIX;
@ -266,8 +265,7 @@ TEST(NFAGraph, RemoveEquivalence4) {
// The graph should be merged into: (X|Y)(..)+a // The graph should be merged into: (X|Y)(..)+a
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, auto graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, HS_FLAG_DOTALL));
HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr); ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph; NGHolder &g = *graph;
g.kind = NFA_SUFFIX; g.kind = NFA_SUFFIX;
@ -363,8 +361,7 @@ TEST(NFAGraph, RemoveEquivalence5) {
// The graph should be merged into: [^\x00]*[\x00] // The graph should be merged into: [^\x00]*[\x00]
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", auto graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", cc, 0));
cc, 0));
ASSERT_TRUE(graph != nullptr); ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph; NGHolder &g = *graph;
g.kind = NFA_PREFIX; g.kind = NFA_PREFIX;
@ -420,7 +417,7 @@ TEST(NFAGraph, RemoveEquivalence5) {
TEST(NFAGraph, RemoveEquivalence6) { TEST(NFAGraph, RemoveEquivalence6) {
// Build a small graph with two redundant vertices: ^(.*|.*)a // Build a small graph with two redundant vertices: ^(.*|.*)a
// The graph should be merged into: a // The graph should be merged into: a
unique_ptr<NGWrapper> graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL)); auto graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr); ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph; NGHolder &g = *graph;
@ -458,7 +455,7 @@ TEST(NFAGraph, RemoveEquivalence6) {
TEST(NFAGraph, RemoveEquivalence7) { TEST(NFAGraph, RemoveEquivalence7) {
// Build a small graph with no redundant vertices: ^.+a // Build a small graph with no redundant vertices: ^.+a
// Make sure we don't merge anything // Make sure we don't merge anything
unique_ptr<NGWrapper> graph(constructGraph("^.+a", HS_FLAG_DOTALL)); auto graph(constructGraph("^.+a", HS_FLAG_DOTALL));
ASSERT_TRUE(graph != nullptr); ASSERT_TRUE(graph != nullptr);
NGHolder &g = *graph; NGHolder &g = *graph;

View File

@ -208,7 +208,8 @@ TEST_P(MatchesTest, Check) {
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
ReportManager rm(cc.grey); ReportManager rm(cc.grey);
ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0); ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0);
auto g = buildWrapper(rm, cc, parsed); auto built_expr = buildGraph(rm, cc, parsed);
const auto &g = built_expr.g;
bool utf8 = (t.flags & HS_FLAG_UTF8) > 0; bool utf8 = (t.flags & HS_FLAG_UTF8) > 0;
set<pair<size_t, size_t>> matches; set<pair<size_t, size_t>> matches;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015-2016, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -53,7 +53,7 @@ TEST(NFAGraph, RemoveRedundancy1) {
// The character reachability should be merged into: [ab]c // The character reachability should be merged into: [ab]c
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("(a|b)c", cc, 0)); auto graph(constructGraphWithCC("(a|b)c", cc, 0));
ASSERT_TRUE(graph.get() != nullptr); ASSERT_TRUE(graph.get() != nullptr);
NGHolder &g = *graph; NGHolder &g = *graph;
@ -95,8 +95,7 @@ TEST(NFAGraph, RemoveRedundancy2) {
// Build a small graph with a redundant vertex: a.*b?c // Build a small graph with a redundant vertex: a.*b?c
// The dot-star should swallow the 'b?', leaving a.*c // The dot-star should swallow the 'b?', leaving a.*c
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("a.*b?c", cc, auto graph(constructGraphWithCC("a.*b?c", cc, HS_FLAG_DOTALL));
HS_FLAG_DOTALL));
ASSERT_TRUE(graph.get() != nullptr); ASSERT_TRUE(graph.get() != nullptr);
NGHolder &g = *graph; NGHolder &g = *graph;
@ -152,8 +151,7 @@ TEST(NFAGraph, RemoveRedundancy2) {
TEST(NFAGraph, RemoveRedundancy3) { TEST(NFAGraph, RemoveRedundancy3) {
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("foobar.*(a|b)?teakettle", auto graph(constructGraphWithCC("foobar.*(a|b)?teakettle", cc, 0));
cc, 0));
ASSERT_TRUE(graph.get() != nullptr); ASSERT_TRUE(graph.get() != nullptr);
unsigned countBefore = num_vertices(*graph); unsigned countBefore = num_vertices(*graph);
@ -166,7 +164,7 @@ TEST(NFAGraph, RemoveRedundancy3) {
TEST(NFAGraph, RemoveRedundancy4) { TEST(NFAGraph, RemoveRedundancy4) {
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0)); auto graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0));
ASSERT_TRUE(graph.get() != nullptr); ASSERT_TRUE(graph.get() != nullptr);
unsigned countBefore = num_vertices(*graph); unsigned countBefore = num_vertices(*graph);
@ -178,8 +176,7 @@ TEST(NFAGraph, RemoveRedundancy4) {
TEST(NFAGraph, RemoveRedundancy5) { TEST(NFAGraph, RemoveRedundancy5) {
CompileContext cc(false, false, get_current_target(), Grey()); CompileContext cc(false, false, get_current_target(), Grey());
unique_ptr<NGWrapper> graph(constructGraphWithCC("[0-9]?badgerbrush", auto graph(constructGraphWithCC("[0-9]?badgerbrush", cc, 0));
cc, 0));
ASSERT_TRUE(graph.get() != nullptr); ASSERT_TRUE(graph.get() != nullptr);
unsigned countBefore = num_vertices(*graph); unsigned countBefore = num_vertices(*graph);

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2015, Intel Corporation * Copyright (c) 2015-2017, Intel Corporation
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
@ -79,10 +79,10 @@ INSTANTIATE_TEST_CASE_P(NFAWidth, NFAWidthTest, ValuesIn(widthTests));
TEST_P(NFAWidthTest, Check) { TEST_P(NFAWidthTest, Check) {
const WidthTest &t = GetParam(); const WidthTest &t = GetParam();
SCOPED_TRACE(testing::Message() << "Pattern: " << t.pattern); SCOPED_TRACE(testing::Message() << "Pattern: " << t.pattern);
unique_ptr<NGWrapper> w(constructGraph(t.pattern, 0)); auto g = constructGraph(t.pattern, 0);
ASSERT_EQ(t.minWidth, findMinWidth(*w)); ASSERT_EQ(t.minWidth, findMinWidth(*g));
ASSERT_EQ(t.maxWidth, findMaxWidth(*w)); ASSERT_EQ(t.maxWidth, findMaxWidth(*g));
} }
// for google test // for google test

View File

@ -35,6 +35,7 @@
#include "ng_corpus_generator.h" #include "ng_corpus_generator.h"
#include "ng_corpus_editor.h" #include "ng_corpus_editor.h"
#include "compiler/compiler.h"
#include "nfagraph/ng.h" #include "nfagraph/ng.h"
#include "nfagraph/ng_util.h" #include "nfagraph/ng_util.h"
#include "ue2common.h" #include "ue2common.h"
@ -219,8 +220,9 @@ namespace {
/** \brief Concrete implementation */ /** \brief Concrete implementation */
class CorpusGeneratorImpl : public CorpusGenerator { class CorpusGeneratorImpl : public CorpusGenerator {
public: public:
CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props); CorpusGeneratorImpl(const NGHolder &graph_in, const ExpressionInfo &expr_in,
~CorpusGeneratorImpl() {} CorpusProperties &props);
~CorpusGeneratorImpl() = default;
void generateCorpus(vector<string> &data); void generateCorpus(vector<string> &data);
@ -237,6 +239,9 @@ private:
* bytes in length. */ * bytes in length. */
void addRandom(const min_max &mm, string *out); void addRandom(const min_max &mm, string *out);
/** \brief Info about this expression. */
const ExpressionInfo &expr;
/** \brief The NFA graph we operate over. */ /** \brief The NFA graph we operate over. */
const NGHolder &graph; const NGHolder &graph;
@ -245,12 +250,13 @@ private:
CorpusProperties &cProps; CorpusProperties &cProps;
}; };
CorpusGeneratorImpl::CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in,
const ExpressionInfo &expr_in,
CorpusProperties &props) CorpusProperties &props)
: graph(graph_in), cProps(props) { : expr(expr_in), graph(graph_in), cProps(props) {
// if this pattern is to be matched approximately // if this pattern is to be matched approximately
if (graph_in.edit_distance && !props.editDistance) { if (expr.edit_distance && !props.editDistance) {
props.editDistance = props.rand(0, graph_in.edit_distance + 1); props.editDistance = props.rand(0, expr.edit_distance + 1);
} }
} }
@ -392,8 +398,9 @@ hit_limit:
/** \brief Concrete implementation for UTF-8 */ /** \brief Concrete implementation for UTF-8 */
class CorpusGeneratorUtf8 : public CorpusGenerator { class CorpusGeneratorUtf8 : public CorpusGenerator {
public: public:
CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props); CorpusGeneratorUtf8(const NGHolder &graph_in, const ExpressionInfo &expr_in,
~CorpusGeneratorUtf8() {} CorpusProperties &props);
~CorpusGeneratorUtf8() = default;
void generateCorpus(vector<string> &data); void generateCorpus(vector<string> &data);
@ -410,19 +417,23 @@ private:
* length. */ * length. */
void addRandom(const min_max &mm, vector<unichar> *out); void addRandom(const min_max &mm, vector<unichar> *out);
/** \brief Info about this expression. */
const ExpressionInfo &expr;
/** \brief The NFA graph we operate over. */ /** \brief The NFA graph we operate over. */
const NGWrapper &graph; const NGHolder &graph;
/** \brief Reference to our corpus generator properties object (stores some /** \brief Reference to our corpus generator properties object (stores some
* state) */ * state) */
CorpusProperties &cProps; CorpusProperties &cProps;
}; };
CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in,
const ExpressionInfo &expr_in,
CorpusProperties &props) CorpusProperties &props)
: graph(graph_in), cProps(props) { : expr(expr_in), graph(graph_in), cProps(props) {
// we do not support Utf8 for approximate matching // we do not support Utf8 for approximate matching
if (graph.edit_distance) { if (expr.edit_distance) {
throw CorpusGenerationFailure("UTF-8 for edited patterns is not " throw CorpusGenerationFailure("UTF-8 for edited patterns is not "
"supported."); "supported.");
} }
@ -681,11 +692,12 @@ CorpusGenerator::~CorpusGenerator() { }
// External entry point // External entry point
unique_ptr<CorpusGenerator> makeCorpusGenerator(const NGWrapper &graph, unique_ptr<CorpusGenerator> makeCorpusGenerator(const NGHolder &graph,
const ExpressionInfo &expr,
CorpusProperties &props) { CorpusProperties &props) {
if (graph.utf8) { if (expr.utf8) {
return ue2::make_unique<CorpusGeneratorUtf8>(graph, props); return ue2::make_unique<CorpusGeneratorUtf8>(graph, expr, props);
} else { } else {
return ue2::make_unique<CorpusGeneratorImpl>(graph, props); return ue2::make_unique<CorpusGeneratorImpl>(graph, expr, props);
} }
} }

View File

@ -41,7 +41,8 @@
namespace ue2 { namespace ue2 {
class NGWrapper; class ExpressionInfo;
class NGHolder;
} // namespace ue2 } // namespace ue2
@ -68,6 +69,7 @@ public:
/** \brief Build a concrete impl conforming to the \ref CorpusGenerator /** \brief Build a concrete impl conforming to the \ref CorpusGenerator
* interface. */ * interface. */
std::unique_ptr<CorpusGenerator> std::unique_ptr<CorpusGenerator>
makeCorpusGenerator(const ue2::NGWrapper &graph, CorpusProperties &props); makeCorpusGenerator(const ue2::NGHolder &g, const ue2::ExpressionInfo &expr,
CorpusProperties &props);
#endif #endif