diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a60142f..732a73b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -674,6 +674,7 @@ SET (hs_SRCS src/compiler/compiler.h src/compiler/error.cpp src/compiler/error.h + src/compiler/expression_info.h src/fdr/engine_description.cpp src/fdr/engine_description.h src/fdr/fdr_compile.cpp diff --git a/src/compiler/asserts.cpp b/src/compiler/asserts.cpp index be836b06..44442226 100644 --- a/src/compiler/asserts.cpp +++ b/src/compiler/asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,6 +42,8 @@ * word-to-word and word-to-nonword) are dropped. */ #include "asserts.h" + +#include "compiler/compiler.h" #include "nfagraph/ng.h" #include "nfagraph/ng_prune.h" #include "nfagraph/ng_redundancy.h" @@ -115,8 +117,8 @@ u32 conjunct(u32 flags1, u32 flags2) { typedef map, NFAEdge> edge_cache_t; static -void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, - u32 &assert_edge_count) { +void replaceAssertVertex(NGHolder &g, NFAVertex t, const ExpressionInfo &expr, + edge_cache_t &edge_cache, u32 &assert_edge_count) { DEBUG_PRINTF("replacing assert vertex %zu\n", g[t].index); const u32 flags = g[t].assert_flags; @@ -178,8 +180,7 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, edge_cache.emplace(cache_key, e); g[e].assert_flags = flags; if (++assert_edge_count > MAX_ASSERT_EDGES) { - throw CompileError(g.expressionIndex, - "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } else { NFAEdge e = ecit->second; @@ -200,21 +201,23 @@ void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache, } static -void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { +void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, s32 adj) { // Don't try and set the report ID of a special vertex. assert(!is_special(v, g)); // There should be no reports set already. assert(g[v].reports.empty()); - Report r = rm.getBasicInternalReport(g, adj); + Report r = rm.getBasicInternalReport(expr, adj); g[v].reports.insert(rm.getInternalId(r)); DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static -void checkForMultilineStart(ReportManager &rm, NGWrapper &g) { +void checkForMultilineStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { vector dead; for (auto v : adjacent_vertices_range(g.start, g)) { if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) { @@ -238,7 +241,7 @@ void checkForMultilineStart(ReportManager &rm, NGWrapper &g) { for (const auto &e : dead) { NFAVertex dummy = add_vertex(g); g[dummy].char_reach.setall(); - setReportId(rm, g, dummy, -1); + setReportId(rm, g, expr, dummy, -1); add_edge(source(e, g), dummy, g[e], g); add_edge(dummy, g.accept, g); } @@ -263,7 +266,8 @@ bool hasAssertVertices(const NGHolder &g) { * Remove the horrors that are the temporary assert vertices which arise from * our construction method. Allows the rest of our code base to live in * blissful ignorance of their existence. */ -void removeAssertVertices(ReportManager &rm, NGWrapper &g) { +void removeAssertVertices(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { size_t num = 0; DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g)); @@ -285,12 +289,12 @@ void removeAssertVertices(ReportManager &rm, NGWrapper &g) { for (auto v : vertices_range(g)) { if (g[v].assert_flags & WORDBOUNDARY_FLAGS) { - replaceAssertVertex(g, v, edge_cache, assert_edge_count); + replaceAssertVertex(g, v, expr, edge_cache, assert_edge_count); num++; } } - checkForMultilineStart(rm, g); + checkForMultilineStart(rm, g, expr); if (num) { DEBUG_PRINTF("resolved %zu assert vertices\n", num); diff --git a/src/compiler/asserts.h b/src/compiler/asserts.h index b9ec80c7..b4d64c6c 100644 --- a/src/compiler/asserts.h +++ b/src/compiler/asserts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,8 +35,9 @@ namespace ue2 { +class ExpressionInfo; class ReportManager; -class NGWrapper; +class NGHolder; /** \brief Convert temporary assert vertices (from construction method) to * edge-based flags. @@ -44,7 +45,8 @@ class NGWrapper; * Remove the horrors that are the temporary assert vertices which arise from * our construction method. Allows the rest of our code base to live in * blissful ignorance of their existence. */ -void removeAssertVertices(ReportManager &rm, NGWrapper &g); +void removeAssertVertices(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr); } // namespace ue2 diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index 56ed5f41..49ed88f4 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -73,7 +73,6 @@ using namespace std; namespace ue2 { - static void validateExt(const hs_expr_ext &ext) { static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET | @@ -100,26 +99,18 @@ void validateExt(const hs_expr_ext &ext) { } ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, - unsigned flags, ReportID actionId, + unsigned flags, ReportID report, const hs_expr_ext *ext) - : utf8(false), - allow_vacuous(flags & HS_FLAG_ALLOWEMPTY), - highlander(flags & HS_FLAG_SINGLEMATCH), - prefilter(flags & HS_FLAG_PREFILTER), - som(SOM_NONE), - index(index_in), - id(actionId), - min_offset(0), - max_offset(MAX_OFFSET), - min_length(0), - edit_distance(0) { + : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH, + false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET, + 0, 0) { ParseMode mode(flags); component = parse(expression, mode); - utf8 = mode.utf8; /* utf8 may be set by parse() */ + expr.utf8 = mode.utf8; /* utf8 may be set by parse() */ - if (utf8 && !isValidUtf8(expression)) { + if (expr.utf8 && !isValidUtf8(expression)) { throw ParseError("Expression is not valid UTF-8."); } @@ -147,7 +138,7 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, // Set SOM type. if (flags & HS_FLAG_SOM_LEFTMOST) { - som = SOM_LEFT; + expr.som = SOM_LEFT; } // Set extended parameters, if we have them. @@ -156,29 +147,29 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, validateExt(*ext); if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) { - min_offset = ext->min_offset; + expr.min_offset = ext->min_offset; } if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { - max_offset = ext->max_offset; + expr.max_offset = ext->max_offset; } if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) { - min_length = ext->min_length; + expr.min_length = ext->min_length; } if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) { - edit_distance = ext->edit_distance; + expr.edit_distance = ext->edit_distance; } } // These are validated in validateExt, so an error will already have been // thrown if these conditions don't hold. - assert(max_offset >= min_offset); - assert(max_offset >= min_length); + assert(expr.max_offset >= expr.min_offset); + assert(expr.max_offset >= expr.min_length); // Since prefiltering and SOM aren't supported together, we must squash any // min_length constraint as well. - if (flags & HS_FLAG_PREFILTER && min_length) { + if (flags & HS_FLAG_PREFILTER && expr.min_length) { DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n"); - min_length = 0; + expr.min_length = 0; } } @@ -187,25 +178,25 @@ ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, * \brief Dumps the parse tree to screen in debug mode and to disk in dump * mode. */ -void dumpExpression(UNUSED const ParsedExpression &expr, +void dumpExpression(UNUSED const ParsedExpression &pe, UNUSED const char *stage, UNUSED const Grey &grey) { #if defined(DEBUG) - DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id, - expr.index); + DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n", + pe.expr.report, pe.expr.index); ostringstream debug_tree; - dumpTree(debug_tree, expr.component.get()); + dumpTree(debug_tree, pe.component.get()); printf("%s\n", debug_tree.str().c_str()); #endif // DEBUG #if defined(DUMP_SUPPORT) if (grey.dumpFlags & Grey::DUMP_PARSE) { stringstream ss; - ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_" + ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_" << stage << ".txt"; ofstream out(ss.str().c_str()); - out << "Component Tree for " << expr.id << endl; - dumpTree(out, expr.component.get()); - if (expr.utf8) { + out << "Component Tree for " << pe.expr.report << endl; + dumpTree(out, pe.component.get()); + if (pe.expr.utf8) { out << "UTF8 mode" << endl; } } @@ -215,13 +206,13 @@ void dumpExpression(UNUSED const ParsedExpression &expr, /** \brief Run Component tree optimisations on \a expr. */ static -void optimise(ParsedExpression &expr) { - if (expr.min_length || expr.som) { +void optimise(ParsedExpression &pe) { + if (pe.expr.min_length || pe.expr.som) { return; } DEBUG_PRINTF("optimising\n"); - expr.component->optimise(true /* root is connected to sds */); + pe.component->optimise(true /* root is connected to sds */); } void addExpression(NG &ng, unsigned index, const char *expression, @@ -238,34 +229,34 @@ void addExpression(NG &ng, unsigned index, const char *expression, // Do per-expression processing: errors here will result in an exception // being thrown up to our caller - ParsedExpression expr(index, expression, flags, id, ext); - dumpExpression(expr, "orig", cc.grey); + ParsedExpression pe(index, expression, flags, id, ext); + dumpExpression(pe, "orig", cc.grey); // Apply prefiltering transformations if desired. - if (expr.prefilter) { - prefilterTree(expr.component, ParseMode(flags)); - dumpExpression(expr, "prefiltered", cc.grey); + if (pe.expr.prefilter) { + prefilterTree(pe.component, ParseMode(flags)); + dumpExpression(pe, "prefiltered", cc.grey); } // Expressions containing zero-width assertions and other extended pcre // types aren't supported yet. This call will throw a ParseError exception // if the component tree contains such a construct. - checkUnsupported(*expr.component); + checkUnsupported(*pe.component); - expr.component->checkEmbeddedStartAnchor(true); - expr.component->checkEmbeddedEndAnchor(true); + pe.component->checkEmbeddedStartAnchor(true); + pe.component->checkEmbeddedEndAnchor(true); if (cc.grey.optimiseComponentTree) { - optimise(expr); - dumpExpression(expr, "opt", cc.grey); + optimise(pe); + dumpExpression(pe, "opt", cc.grey); } DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n", - expr.component.get(), expr.index, expr.id); + pe.component.get(), pe.expr.index, pe.expr.report); // You can only use the SOM flags if you've also specified an SOM // precision mode. - if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { + if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { throw CompileError("To use a SOM expression flag in streaming mode, " "an SOM precision mode (e.g. " "HS_MODE_SOM_HORIZON_LARGE) must be specified."); @@ -273,26 +264,26 @@ void addExpression(NG &ng, unsigned index, const char *expression, // If this expression is a literal, we can feed it directly to Rose rather // than building the NFA graph. - if (shortcutLiteral(ng, expr)) { + if (shortcutLiteral(ng, pe)) { DEBUG_PRINTF("took literal short cut\n"); return; } - unique_ptr g = buildWrapper(ng.rm, cc, expr); - - if (!g) { + auto built_expr = buildGraph(ng.rm, cc, pe); + if (!built_expr.g) { DEBUG_PRINTF("NFA build failed on ID %u, but no exception was " - "thrown.\n", expr.id); + "thrown.\n", pe.expr.report); throw CompileError("Internal error."); } - if (!expr.allow_vacuous && matches_everywhere(*g)) { + auto &g = *built_expr.g; + if (!pe.expr.allow_vacuous && matches_everywhere(g)) { throw CompileError("Pattern matches empty buffer; use " "HS_FLAG_ALLOWEMPTY to enable support."); } - if (!ng.addGraph(*g)) { - DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id); + if (!ng.addGraph(built_expr.expr, g)) { + DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", pe.expr.report); throw CompileError("Error compiling expression."); } } @@ -453,41 +444,42 @@ bool isSupported(const Component &c) { } #endif -unique_ptr buildWrapper(ReportManager &rm, const CompileContext &cc, - const ParsedExpression &expr) { - assert(isSupported(*expr.component)); +BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc, + const ParsedExpression &pe) { + assert(isSupported(*pe.component)); - const unique_ptr builder = makeNFABuilder(rm, cc, expr); + const auto builder = makeNFABuilder(rm, cc, pe); assert(builder); // Set up START and ACCEPT states; retrieve the special states - const auto bs = makeGlushkovBuildState(*builder, expr.prefilter); + const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter); // Map position IDs to characters/components - expr.component->notePositions(*bs); + pe.component->notePositions(*bs); // Wire the start dotstar state to the firsts - connectInitialStates(*bs, expr); + connectInitialStates(*bs, pe); DEBUG_PRINTF("wire up body of expr\n"); // Build the rest of the FOLLOW set vector initials = {builder->getStartDotStar(), builder->getStart()}; - expr.component->buildFollowSet(*bs, initials); + pe.component->buildFollowSet(*bs, initials); // Wire the lasts to the accept state - connectFinalStates(*bs, expr); + connectFinalStates(*bs, pe); // Create our edges bs->buildEdges(); - auto g = builder->getGraph(); - assert(g); + BuiltExpression built_expr = builder->getGraph(); + assert(built_expr.g); - dumpDotWrapper(*g, "00_before_asserts", cc.grey); - removeAssertVertices(rm, *g); + dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts", + cc.grey); + removeAssertVertices(rm, *built_expr.g, built_expr.expr); - return g; + return built_expr; } } // namespace ue2 diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 48987fc3..8f5f9b65 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -35,8 +35,8 @@ #include "ue2common.h" #include "database.h" +#include "compiler/expression_info.h" #include "parser/Component.h" -#include "som/som.h" #include #include @@ -50,35 +50,32 @@ struct CompileContext; struct Grey; struct target_t; class NG; +class NGHolder; class ReportManager; -class NGWrapper; -/** Class gathering together the pieces of a parsed expression. - * Note: Owns the provided component. - */ +/** \brief Class gathering together the pieces of a parsed expression. */ class ParsedExpression : boost::noncopyable { public: ParsedExpression(unsigned index, const char *expression, unsigned flags, - ReportID actionId, const hs_expr_ext *ext = nullptr); + ReportID report, const hs_expr_ext *ext = nullptr); - bool utf8; //!< UTF-8 mode flag specified + /** \brief Expression information (from flags, extparam etc) */ + ExpressionInfo expr; - /** \brief root node of parsed component tree. */ - std::unique_ptr component; + /** \brief Root node of parsed component tree. */ + std::unique_ptr component; +}; - const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified - const bool highlander; //!< HS_FLAG_SINGLEMATCH specified - const bool prefilter; //!< HS_FLAG_PREFILTER specified - som_type som; //!< chosen SOM mode, or SOM_NONE +/** + * \brief Class gathering together the pieces of an expression that has been + * built into an NFA graph. + */ +struct BuiltExpression { + /** \brief Expression information (from flags, extparam etc) */ + ExpressionInfo expr; - /** \brief index in expressions array passed to \ref hs_compile_multi */ - const unsigned index; - - const ReportID id; //!< user-specified pattern ID - u64a min_offset; //!< 0 if not used - u64a max_offset; //!< MAX_OFFSET if not used - u64a min_length; //!< 0 if not used - u32 edit_distance; //!< 0 if not used + /** \brief Built Glushkov NFA graph. */ + std::unique_ptr g; }; /** @@ -95,12 +92,12 @@ public: * @param ext * Struct containing extra parameters for this expression, or NULL if * none. - * @param actionId + * @param report * The identifier to associate with the expression; returned by engine on * match. */ void addExpression(NG &ng, unsigned index, const char *expression, - unsigned flags, const hs_expr_ext *ext, ReportID actionId); + unsigned flags, const hs_expr_ext *ext, ReportID report); /** * Build a Hyperscan database out of the expressions we've been given. A @@ -128,9 +125,8 @@ struct hs_database *build(NG &ng, unsigned int *length); * @return * nullptr on error. */ -std::unique_ptr buildWrapper(ReportManager &rm, - const CompileContext &cc, - const ParsedExpression &expr); +BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc, + const ParsedExpression &expr); /** * Build a platform_t out of a target_t. diff --git a/src/compiler/expression_info.h b/src/compiler/expression_info.h new file mode 100644 index 00000000..7775f59e --- /dev/null +++ b/src/compiler/expression_info.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief ExpressionInfo class for storing the properties of an expression. + */ + +#ifndef COMPILER_EXPRESSION_INFO_H +#define COMPILER_EXPRESSION_INFO_H + +#include "ue2common.h" +#include "som/som.h" + +namespace ue2 { + +/** \brief Properties of an expression. */ +class ExpressionInfo { +public: + ExpressionInfo(unsigned int index_in, bool allow_vacuous_in, + bool highlander_in, bool utf8_in, bool prefilter_in, + som_type som_in, ReportID report_in, u64a min_offset_in, + u64a max_offset_in, u64a min_length_in, u32 edit_distance_in) + : index(index_in), report(report_in), allow_vacuous(allow_vacuous_in), + highlander(highlander_in), utf8(utf8_in), prefilter(prefilter_in), + som(som_in), min_offset(min_offset_in), max_offset(max_offset_in), + min_length(min_length_in), edit_distance(edit_distance_in) {} + + /** + * \brief Index of the expression represented by this graph. + * + * Used: + * - down the track in error handling; + * - for identifying parts of an expression in highlander mode. + */ + unsigned int index; + + /** \brief Report ID specified by the user. */ + ReportID report; + + /** \brief Vacuous pattern is allowed. (HS_FLAG_ALLOWEMPTY) */ + bool allow_vacuous; + + /** \brief "Highlander" (single match) pattern. (HS_FLAG_SINGLEMATCH) */ + bool highlander; + + /** \brief UTF-8 pattern. (HS_FLAG_UTF8) */ + bool utf8; + + /** \brief Prefiltering pattern. (HS_FLAG_PREFILTER) */ + bool prefilter; + + /** \brief Start-of-match type requested, or SOM_NONE. */ + som_type som; + + /** \brief Minimum match offset extended parameter. 0 if not used. */ + u64a min_offset; + + /** + * \brief Maximum match offset extended parameter. + * MAX_OFFSET if not used. + */ + u64a max_offset; + + /** \brief Minimum match length extended parameter. 0 if not used. */ + u64a min_length; + + /** + * \brief Approximate matching edit distance extended parameter. + * 0 if not used. + */ + u32 edit_distance; +}; + +} + +#endif // COMPILER_EXPRESSION_INFO_H diff --git a/src/hs.cpp b/src/hs.cpp index 6cd3a3ee..c1e1cdce 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -369,11 +369,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, assert(pe.component); // Apply prefiltering transformations if desired. - if (pe.prefilter) { + if (pe.expr.prefilter) { prefilterTree(pe.component, ParseMode(flags)); } - unique_ptr g = buildWrapper(rm, cc, pe); + auto built_expr = buildGraph(rm, cc, pe); + unique_ptr &g = built_expr.g; + ExpressionInfo &expr = built_expr.expr; if (!g) { DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n"); @@ -381,13 +383,13 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, } // validate graph's suitability for fuzzing - validate_fuzzy_compile(*g, g->edit_distance, g->utf8, cc.grey); + validate_fuzzy_compile(*g, expr.edit_distance, expr.utf8, cc.grey); // fuzz graph - this must happen before any transformations are made - make_fuzzy(*g, g->edit_distance, cc.grey); + make_fuzzy(*g, expr.edit_distance, cc.grey); - handleExtendedParams(rm, *g, cc); - fillExpressionInfo(rm, *g, &local_info); + handleExtendedParams(rm, *g, expr, cc); + fillExpressionInfo(rm, *g, expr, &local_info); } catch (const CompileError &e) { // Compiler error occurred diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index 2f6d8cd7..eded7af2 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -27,10 +27,11 @@ */ /** \file - * \brief NG, NGHolder, NGWrapper and graph handling. + * \brief NG and graph handling. */ -#include "grey.h" #include "ng.h" + +#include "grey.h" #include "ng_anchored_acyclic.h" #include "ng_anchored_dots.h" #include "ng_asserts.h" @@ -62,6 +63,7 @@ #include "ng_util.h" #include "ng_width.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfa/goughcompile.h" #include "rose/rose_build.h" #include "smallwrite/smallwrite_build.h" @@ -100,16 +102,16 @@ NG::~NG() { * \throw CompileError if SOM cannot be supported for the component. */ static -bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, +bool addComponentSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, const som_type som, const u32 comp_id) { DEBUG_PRINTF("doing som\n"); - dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "03_presom", expr.index, comp_id, ng.cc.grey); assert(hasCorrectlyNumberedVertices(g)); - assert(allMatchStatesHaveReports(w)); + assert(allMatchStatesHaveReports(g)); // First, we try the "SOM chain" support in ng_som.cpp. - sombe_rv rv = doSom(ng, g, w, comp_id, som); + sombe_rv rv = doSom(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -118,7 +120,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, assert(rv == SOMBE_FAIL); /* Next, Sombe style approaches */ - rv = doSomWithHaig(ng, g, w, comp_id, som); + rv = doSomWithHaig(ng, g, expr, comp_id, som); if (rv == SOMBE_HANDLED_INTERNAL) { return false; } else if (rv == SOMBE_HANDLED_ALL) { @@ -132,7 +134,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, vector > triggers; /* empty for outfix */ assert(g.kind == NFA_OUTFIX); - dumpComponent(g, "haig", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "haig", expr.index, comp_id, ng.cc.grey); makeReportsSomPass(ng.rm, g); auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers, ng.cc.grey); @@ -145,7 +147,7 @@ bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w, /* Our various strategies for supporting SOM for this pattern have failed. * Provide a generic pattern not supported/too large return value as it is * unclear what the meaning of a specific SOM error would be */ - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); assert(0); // unreachable return false; @@ -200,21 +202,21 @@ void reduceGraph(NGHolder &g, som_type som, bool utf8, } static -bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, - const u32 comp_id) { +bool addComponent(NG &ng, NGHolder &g, const ExpressionInfo &expr, + const som_type som, const u32 comp_id) { const CompileContext &cc = ng.cc; assert(hasCorrectlyNumberedVertices(g)); DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n", - w.expressionIndex, comp_id, num_vertices(g), num_edges(g)); + expr.index, comp_id, num_vertices(g), num_edges(g)); - dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "01_begin", expr.index, comp_id, ng.cc.grey); - assert(allMatchStatesHaveReports(w)); + assert(allMatchStatesHaveReports(g)); - reduceGraph(g, som, w.utf8, cc); + reduceGraph(g, som, expr.utf8, cc); - dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey); + dumpComponent(g, "02_reduced", expr.index, comp_id, ng.cc.grey); // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { @@ -231,12 +233,12 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, // Start Of Match handling. if (som) { - if (addComponentSom(ng, g, w, som, comp_id)) { + if (addComponentSom(ng, g, expr, som, comp_id)) { return true; } } - assert(allMatchStatesHaveReports(w)); + assert(allMatchStatesHaveReports(g)); if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) { return true; @@ -251,11 +253,11 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (doViolet(*ng.rose, g, w.prefilter, false, ng.rm, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, false, ng.rm, cc)) { return true; } - if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) { + if (splitOffPuffs(*ng.rose, ng.rm, g, expr.prefilter, cc)) { return true; } @@ -268,7 +270,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (doViolet(*ng.rose, g, w.prefilter, true, ng.rm, cc)) { + if (doViolet(*ng.rose, g, expr.prefilter, true, ng.rm, cc)) { return true; } @@ -283,7 +285,7 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, // Returns true if all components have been added. static -bool processComponents(NG &ng, NGWrapper &w, +bool processComponents(NG &ng, ExpressionInfo &expr, deque> &g_comp, const som_type som) { const u32 num_components = g_comp.size(); @@ -293,7 +295,7 @@ bool processComponents(NG &ng, NGWrapper &w, if (!g_comp[i]) { continue; } - if (addComponent(ng, *g_comp[i], w, som, i)) { + if (addComponent(ng, *g_comp[i], expr, som, i)) { g_comp[i].reset(); continue; } @@ -313,48 +315,48 @@ bool processComponents(NG &ng, NGWrapper &w, return false; } -bool NG::addGraph(NGWrapper &w) { +bool NG::addGraph(ExpressionInfo &expr, NGHolder &g) { // remove reports that aren't on vertices connected to accept. - clearReports(w); + clearReports(g); - som_type som = w.som; - if (som && isVacuous(w)) { - throw CompileError(w.expressionIndex, "Start of match is not " + som_type som = expr.som; + if (som && isVacuous(g)) { + throw CompileError(expr.index, "Start of match is not " "currently supported for patterns which match an " "empty buffer."); } - dumpDotWrapper(w, "01_initial", cc.grey); - assert(allMatchStatesHaveReports(w)); + dumpDotWrapper(g, expr, "01_initial", cc.grey); + assert(allMatchStatesHaveReports(g)); /* ensure utf8 starts at cp boundary */ - ensureCodePointStart(rm, w); + ensureCodePointStart(rm, g, expr); - if (can_never_match(w)) { - throw CompileError(w.expressionIndex, "Pattern can never match."); + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); } // validate graph's suitability for fuzzing before resolving asserts - validate_fuzzy_compile(w, w.edit_distance, w.utf8, cc.grey); + validate_fuzzy_compile(g, expr.edit_distance, expr.utf8, cc.grey); - resolveAsserts(rm, w); - dumpDotWrapper(w, "02_post_assert_resolve", cc.grey); - assert(allMatchStatesHaveReports(w)); + resolveAsserts(rm, g, expr); + dumpDotWrapper(g, expr, "02_post_assert_resolve", cc.grey); + assert(allMatchStatesHaveReports(g)); - make_fuzzy(w, w.edit_distance, cc.grey); - dumpDotWrapper(w, "02a_post_fuzz", cc.grey); + make_fuzzy(g, expr.edit_distance, cc.grey); + dumpDotWrapper(g, expr, "02a_post_fuzz", cc.grey); - pruneUseless(w); - pruneEmptyVertices(w); + pruneUseless(g); + pruneEmptyVertices(g); - if (can_never_match(w)) { - throw CompileError(w.expressionIndex, "Pattern can never match."); + if (can_never_match(g)) { + throw CompileError(expr.index, "Pattern can never match."); } - optimiseVirtualStarts(w); /* good for som */ + optimiseVirtualStarts(g); /* good for som */ - handleExtendedParams(rm, w, cc); - if (w.min_length) { + handleExtendedParams(rm, g, expr, cc); + if (expr.min_length) { // We have a minimum length constraint, which we currently use SOM to // satisfy. som = SOM_LEFT; @@ -368,70 +370,70 @@ bool NG::addGraph(NGWrapper &w) { // first, we can perform graph work that can be done on an individual // expression basis. - if (w.utf8) { - relaxForbiddenUtf8(w); + if (expr.utf8) { + relaxForbiddenUtf8(g, expr); } - if (w.highlander && !w.min_length && !w.min_offset) { + if (expr.highlander && !expr.min_length && !expr.min_offset) { // In highlander mode: if we don't have constraints on our reports that // may prevent us accepting our first match (i.e. extended params) we // can prune the other out-edges of all vertices connected to accept. - pruneHighlanderAccepts(w, rm); + pruneHighlanderAccepts(g, rm); } - dumpDotWrapper(w, "02b_fairly_early", cc.grey); + dumpDotWrapper(g, expr, "02b_fairly_early", cc.grey); // If we're a vacuous pattern, we can handle this early. - if (splitOffVacuous(boundary, rm, w)) { + if (splitOffVacuous(boundary, rm, g, expr)) { DEBUG_PRINTF("split off vacuous\n"); } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(w) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed by vacuous handling\n"); return true; } // Now that vacuous edges have been removed, update the min width exclusive // of boundary reports. - minWidth = min(minWidth, findMinWidth(w)); + minWidth = min(minWidth, findMinWidth(g)); // Add the pattern to the small write builder. - smwr->add(w); + smwr->add(g, expr); if (!som) { - removeSiblingsOfStartDotStar(w); + removeSiblingsOfStartDotStar(g); } - dumpDotWrapper(w, "03_early", cc.grey); + dumpDotWrapper(g, expr, "03_early", cc.grey); // Perform a reduction pass to merge sibling character classes together. if (cc.grey.performGraphSimplification) { - removeRedundancy(w, som); - prunePathsRedundantWithSuccessorOfCyclics(w, som); + removeRedundancy(g, som); + prunePathsRedundantWithSuccessorOfCyclics(g, som); } - dumpDotWrapper(w, "04_reduced", cc.grey); + dumpDotWrapper(g, expr, "04_reduced", cc.grey); // If we've got some literals that span the graph from start to accept, we // can split them off into Rose from here. if (!som) { - if (splitOffLiterals(*this, w)) { + if (splitOffLiterals(*this, g)) { DEBUG_PRINTF("some vertices claimed by literals\n"); } } // We might be done at this point: if we've run out of vertices, we can // stop processing. - if (num_vertices(w) == N_SPECIALS) { + if (num_vertices(g) == N_SPECIALS) { DEBUG_PRINTF("all vertices claimed before calc components\n"); return true; } // Split the graph into a set of connected components. - deque> g_comp = calcComponents(w); + deque> g_comp = calcComponents(g); assert(!g_comp.empty()); if (!som) { @@ -443,14 +445,14 @@ bool NG::addGraph(NGWrapper &w) { recalcComponents(g_comp); } - if (processComponents(*this, w, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } // If we're in prefiltering mode, we can run the prefilter reductions and // have another shot at accepting the graph. - if (cc.grey.prefilterReductions && w.prefilter) { + if (cc.grey.prefilterReductions && expr.prefilter) { for (u32 i = 0; i < g_comp.size(); i++) { if (!g_comp[i]) { continue; @@ -459,7 +461,7 @@ bool NG::addGraph(NGWrapper &w) { prefilterReductions(*g_comp[i], cc); } - if (processComponents(*this, w, g_comp, som)) { + if (processComponents(*this, expr, g_comp, som)) { return true; } } @@ -469,7 +471,7 @@ bool NG::addGraph(NGWrapper &w) { if (g_comp[i]) { DEBUG_PRINTF("could not compile component %u with %zu vertices\n", i, num_vertices(*g_comp[i])); - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } @@ -478,60 +480,60 @@ bool NG::addGraph(NGWrapper &w) { } /** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */ -bool NG::addHolder(NGHolder &w) { - DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(w)); - assert(allMatchStatesHaveReports(w)); - assert(hasCorrectlyNumberedVertices(w)); +bool NG::addHolder(NGHolder &g) { + DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(g)); + assert(allMatchStatesHaveReports(g)); + assert(hasCorrectlyNumberedVertices(g)); /* We don't update the global minWidth here as we care about the min width * of the whole pattern - not a just a prefix of it. */ bool prefilter = false; - //dumpDotComp(comp, w, *this, 20, "prefix_init"); + //dumpDotComp(comp, g, *this, 20, "prefix_init"); som_type som = SOM_NONE; /* the prefixes created by the SOM code do not themselves track som */ bool utf8 = false; // handling done earlier - reduceGraph(w, som, utf8, cc); + reduceGraph(g, som, utf8, cc); // There may be redundant regions that we can remove if (cc.grey.performGraphSimplification) { - removeRegionRedundancy(w, som); + removeRegionRedundancy(g, som); } // "Short Exhaustible Passthrough" patterns always become outfixes. - if (isSEP(w, rm, cc.grey)) { + if (isSEP(g, rm, cc.grey)) { DEBUG_PRINTF("graph is SEP\n"); - if (rose->addOutfix(w)) { + if (rose->addOutfix(g)) { return true; } } - if (splitOffAnchoredAcyclic(*rose, w, cc)) { + if (splitOffAnchoredAcyclic(*rose, g, cc)) { return true; } - if (handleSmallLiteralSets(*rose, w, cc) - || handleFixedWidth(*rose, w, cc.grey)) { + if (handleSmallLiteralSets(*rose, g, cc) + || handleFixedWidth(*rose, g, cc.grey)) { return true; } - if (handleDecoratedLiterals(*rose, w, cc)) { + if (handleDecoratedLiterals(*rose, g, cc)) { return true; } - if (doViolet(*rose, w, prefilter, false, rm, cc)) { + if (doViolet(*rose, g, prefilter, false, rm, cc)) { return true; } - if (splitOffPuffs(*rose, rm, w, prefilter, cc)) { + if (splitOffPuffs(*rose, rm, g, prefilter, cc)) { return true; } - if (doViolet(*rose, w, prefilter, true, rm, cc)) { + if (doViolet(*rose, g, prefilter, true, rm, cc)) { return true; } DEBUG_PRINTF("trying for outfix\n"); - if (rose->addOutfix(w)) { + if (rose->addOutfix(g)) { DEBUG_PRINTF("ok\n"); return true; } @@ -586,26 +588,4 @@ bool NG::addLiteral(const ue2_literal &literal, u32 expr_index, return true; } -NGWrapper::NGWrapper(unsigned int ei, bool highlander_in, bool utf8_in, - bool prefilter_in, som_type som_in, ReportID r, - u64a min_offset_in, u64a max_offset_in, u64a min_length_in, - u32 edit_distance_in) - : expressionIndex(ei), reportId(r), highlander(highlander_in), - utf8(utf8_in), prefilter(prefilter_in), som(som_in), - min_offset(min_offset_in), max_offset(max_offset_in), - min_length(min_length_in), edit_distance(edit_distance_in) { - // All special nodes/edges are added in NGHolder's constructor. - DEBUG_PRINTF("built %p: expr=%u report=%u%s%s%s%s " - "min_offset=%llu max_offset=%llu min_length=%llu " - "edit_distance=%u\n", - this, expressionIndex, reportId, - highlander ? " highlander" : "", - utf8 ? " utf8" : "", - prefilter ? " prefilter" : "", - (som != SOM_NONE) ? " som" : "", - min_offset, max_offset, min_length, edit_distance); -} - -NGWrapper::~NGWrapper() {} - } // namespace ue2 diff --git a/src/nfagraph/ng.h b/src/nfagraph/ng.h index d6e5d3c0..6693773e 100644 --- a/src/nfagraph/ng.h +++ b/src/nfagraph/ng.h @@ -27,7 +27,7 @@ */ /** \file - * \brief NG, NGHolder, NGWrapper declarations. + * \brief NG declaration. */ #ifndef NG_H @@ -58,31 +58,7 @@ namespace ue2 { struct CompileContext; struct ue2_literal; -class NGWrapper : public NGHolder { -public: - NGWrapper(unsigned int expressionIndex, bool highlander, bool utf8, - bool prefilter, const som_type som, ReportID rid, u64a min_offset, - u64a max_offset, u64a min_length, u32 edit_distance); - - ~NGWrapper() override; - - /** index of the expression represented by this graph, used - * - down the track in error handling - * - identifying parts of an expression in highlander mode - */ - const unsigned int expressionIndex; - - const ReportID reportId; /**< user-visible report id */ - const bool highlander; /**< user-specified single match only */ - const bool utf8; /**< UTF-8 mode */ - const bool prefilter; /**< prefiltering mode */ - const som_type som; /**< SOM type requested */ - u64a min_offset; /**< extparam min_offset value */ - u64a max_offset; /**< extparam max_offset value */ - u64a min_length; /**< extparam min_length value */ - u32 edit_distance; /**< extparam edit_distance value */ -}; - +class ExpressionInfo; class RoseBuild; class SmallWriteBuild; @@ -94,14 +70,14 @@ public: /** \brief Consumes a pattern, returns false or throws a CompileError * exception if the graph cannot be consumed. */ - bool addGraph(NGWrapper &w); + bool addGraph(ExpressionInfo &expr, NGHolder &h); /** \brief Consumes a graph, cut-down version of addGraph for use by SOM * processing. */ bool addHolder(NGHolder &h); - /** \brief Adds a literal to Rose, used by literal shortcut passes (instead of - * using \ref addGraph) */ + /** \brief Adds a literal to Rose, used by literal shortcut passes (instead + * of using \ref addGraph) */ bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report, bool highlander, som_type som); @@ -128,7 +104,8 @@ public: * * Shared with the small write compiler. */ -void reduceGraph(NGHolder &g, som_type som, bool utf8, const CompileContext &cc); +void reduceGraph(NGHolder &g, som_type som, bool utf8, + const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_asserts.cpp b/src/nfagraph/ng_asserts.cpp index c2f0d68f..8812afad 100644 --- a/src/nfagraph/ng_asserts.cpp +++ b/src/nfagraph/ng_asserts.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,6 +47,7 @@ #include "ng_prune.h" #include "ng_redundancy.h" #include "ng_util.h" +#include "compiler/compiler.h" #include "parser/position.h" // for POS flags #include "util/bitutils.h" // for findAndClearLSB_32 #include "util/boundary_reports.h" @@ -184,43 +185,45 @@ void findSplitters(const NGHolder &g, const vector &asserts, } static -void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) { +void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, s32 adj) { // Don't try and set the report ID of a special vertex. assert(!is_special(v, g)); // If there's a report set already, we're replacing it. g[v].reports.clear(); - Report ir = rm.getBasicInternalReport(g, adj); + Report ir = rm.getBasicInternalReport(expr, adj); g[v].reports.insert(rm.getInternalId(ir)); DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj); } static -NFAVertex makeClone(ReportManager &rm, NGWrapper &g, NFAVertex v, - const CharReach &cr_mask) { +NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, const CharReach &cr_mask) { NFAVertex clone = clone_vertex(g, v); g[clone].char_reach &= cr_mask; clone_out_edges(g, v, clone); clone_in_edges(g, v, clone); if (v == g.startDs) { - if (g.utf8) { + if (expr.utf8) { g[clone].char_reach &= ~UTF_START_CR; } DEBUG_PRINTF("marked as virt\n"); g[clone].assert_flags = POS_FLAG_VIRTUAL_START; - setReportId(rm, g, clone, 0); + setReportId(rm, g, expr, clone, 0); } return clone; } static -void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { +void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + NFAVertex v, bool ucp) { assert(v != g.start); assert(v != g.accept); assert(v != g.acceptEod); @@ -232,14 +235,14 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; }; // Split v into word/nonword vertices with only asserting out-edges. - NFAVertex w_out = makeClone(rm, g, v, cr_word); - NFAVertex nw_out = makeClone(rm, g, v, cr_nonword); + NFAVertex w_out = makeClone(rm, g, expr, v, cr_word); + NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword); remove_out_edge_if(w_out, has_no_assert, g); remove_out_edge_if(nw_out, has_no_assert, g); // Split v into word/nonword vertices with only asserting in-edges. - NFAVertex w_in = makeClone(rm, g, v, cr_word); - NFAVertex nw_in = makeClone(rm, g, v, cr_nonword); + NFAVertex w_in = makeClone(rm, g, expr, v, cr_word); + NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword); remove_in_edge_if(w_in, has_no_assert, g); remove_in_edge_if(nw_in, has_no_assert, g); @@ -250,7 +253,8 @@ void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) { } static -void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { +void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, + set *dead) { for (const auto &e : edges_range(g)) { u32 flags = g[e].assert_flags; if (!flags) { @@ -363,7 +367,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else if (v_w) { /* need to add a word byte */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_WORD; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -372,7 +376,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else { /* need to add a non word byte or see eod */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_NONWORD; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -416,7 +420,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else if (v_w) { /* need to add a word byte */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_WORD_UCP_PRE; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -425,7 +429,7 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } else { /* need to add a non word byte or see eod */ NFAVertex vv = add_vertex(g); - setReportId(rm, g, vv, -1); + setReportId(rm, g, expr, vv, -1); g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE; add_edge(vv, g.accept, g); g[e].assert_flags = 0; @@ -450,7 +454,8 @@ void resolveEdges(ReportManager &rm, NGWrapper &g, set *dead) { } } -void resolveAsserts(ReportManager &rm, NGWrapper &g) { +void resolveAsserts(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { vector asserts = getAsserts(g); if (asserts.empty()) { return; @@ -460,20 +465,20 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) { map to_split_ucp; /* by index, for determinism */ findSplitters(g, asserts, &to_split, &to_split_ucp); if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) { - throw CompileError(g.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } for (const auto &m : to_split) { assert(!contains(to_split_ucp, m.first)); - splitVertex(rm, g, m.second, false); + splitVertex(rm, g, expr, m.second, false); } for (const auto &m : to_split_ucp) { - splitVertex(rm, g, m.second, true); + splitVertex(rm, g, expr, m.second, true); } set dead; - resolveEdges(rm, g, &dead); + resolveEdges(rm, g, expr, &dead); remove_edges(dead, g); renumber_vertices(g); @@ -485,15 +490,16 @@ void resolveAsserts(ReportManager &rm, NGWrapper &g) { clearReports(g); } -void ensureCodePointStart(ReportManager &rm, NGWrapper &g) { +void ensureCodePointStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { /* In utf8 mode there is an implicit assertion that we start at codepoint * boundaries. Assert resolution handles the badness coming from asserts. * The only other source of trouble is startDs->accept connections. */ NFAEdge orig = edge(g.startDs, g.accept, g); - if (g.utf8 && orig) { - DEBUG_PRINTF("rectifying %u\n", g.reportId); - Report ir = rm.getBasicInternalReport(g); + if (expr.utf8 && orig) { + DEBUG_PRINTF("rectifying %u\n", expr.report); + Report ir = rm.getBasicInternalReport(expr); ReportID rep = rm.getInternalId(ir); NFAVertex v_a = add_vertex(g); diff --git a/src/nfagraph/ng_asserts.h b/src/nfagraph/ng_asserts.h index 8183490a..2534f571 100644 --- a/src/nfagraph/ng_asserts.h +++ b/src/nfagraph/ng_asserts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,14 @@ namespace ue2 { struct BoundaryReports; -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; -void resolveAsserts(ReportManager &rm, NGWrapper &g); +void resolveAsserts(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr); -void ensureCodePointStart(ReportManager &rm, NGWrapper &g); +void ensureCodePointStart(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr); } // namespace ue2 diff --git a/src/nfagraph/ng_builder.cpp b/src/nfagraph/ng_builder.cpp index 385e114f..60f667f4 100644 --- a/src/nfagraph/ng_builder.cpp +++ b/src/nfagraph/ng_builder.cpp @@ -28,11 +28,13 @@ /** \file * \brief: NFA Graph Builder: used by Glushkov construction to construct an - * NGWrapper from a parsed expression. + * NGHolder from a parsed expression. */ + +#include "ng_builder.h" + #include "grey.h" #include "ng.h" -#include "ng_builder.h" #include "ng_util.h" #include "ue2common.h" #include "compiler/compiler.h" // for ParsedExpression @@ -79,7 +81,7 @@ public: void cloneRegion(Position first, Position last, unsigned posOffset) override; - unique_ptr getGraph() override; + BuiltExpression getGraph() override; private: /** fetch a vertex given its Position ID. */ @@ -94,8 +96,11 @@ private: /** \brief Greybox: used for resource limits. */ const Grey &grey; - /** \brief Underlying NGWrapper graph. */ - unique_ptr graph; + /** \brief Underlying graph. */ + unique_ptr graph; + + /** \brief Underlying expression info. */ + ExpressionInfo expr; /** \brief mapping from position to vertex. Use \ref getVertex for access. * */ @@ -108,13 +113,9 @@ private: } // namespace NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in, - const ParsedExpression &expr) - : rm(rm_in), grey(grey_in), - graph(ue2::make_unique( - expr.index, expr.highlander, expr.utf8, expr.prefilter, expr.som, - expr.id, expr.min_offset, expr.max_offset, expr.min_length, - expr.edit_distance)), - vertIdx(N_SPECIALS) { + const ParsedExpression &parsed) + : rm(rm_in), grey(grey_in), graph(ue2::make_unique()), + expr(parsed.expr), vertIdx(N_SPECIALS) { // Reserve space for a reasonably-sized NFA id2vertex.reserve(64); @@ -151,7 +152,7 @@ void NFABuilderImpl::addVertex(Position pos) { (*graph)[v].index = pos; } -unique_ptr NFABuilderImpl::getGraph() { +BuiltExpression NFABuilderImpl::getGraph() { DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n", num_vertices(*graph), num_edges(*graph)); @@ -162,13 +163,13 @@ unique_ptr NFABuilderImpl::getGraph() { throw CompileError("Pattern too large."); } - return move(graph); + return { expr, move(graph) }; } void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) { - Report ir = rm.getBasicInternalReport(*graph, offsetAdjust); + Report ir = rm.getBasicInternalReport(expr, offsetAdjust); DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n", - pos, graph->reportId, offsetAdjust, ir.ekey); + pos, expr.report, offsetAdjust, ir.ekey); NFAVertex v = getVertex(pos); auto &reports = (*graph)[v].reports; diff --git a/src/nfagraph/ng_builder.h b/src/nfagraph/ng_builder.h index 5bd95ba9..df2e0dd8 100644 --- a/src/nfagraph/ng_builder.h +++ b/src/nfagraph/ng_builder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ /** \file * \brief: NFA Graph Builder: used by Glushkov construction to construct an - * NGWrapper from a parsed expression. + * NGHolder from a parsed expression. */ #ifndef NG_BUILDER_H @@ -44,8 +44,8 @@ namespace ue2 { class CharReach; -class NGWrapper; class ReportManager; +struct BuiltExpression; struct CompileContext; class ParsedExpression; @@ -83,10 +83,10 @@ public: unsigned posOffset) = 0; /** - * \brief Returns the built NGWrapper graph. + * \brief Returns the built NGHolder graph and ExpressionInfo. * Note that this builder cannot be used after this call. */ - virtual std::unique_ptr getGraph() = 0; + virtual BuiltExpression getGraph() = 0; }; /** Construct a usable NFABuilder. */ diff --git a/src/nfagraph/ng_dump.cpp b/src/nfagraph/ng_dump.cpp index fc840f25..094d2401 100644 --- a/src/nfagraph/ng_dump.cpp +++ b/src/nfagraph/ng_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,24 +35,25 @@ #include "config.h" -#include "ng_dump.h" +#include "nfagraph/ng_dump.h" -#include "hwlm/hwlm_build.h" -#include "ng.h" -#include "ng_util.h" -#include "parser/position.h" +#include "hs_compile.h" /* for HS_MODE_* flags */ #include "ue2common.h" +#include "compiler/compiler.h" +#include "hwlm/hwlm_build.h" #include "nfa/accel.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX -#include "smallwrite/smallwrite_dump.h" +#include "nfagraph/ng.h" +#include "nfagraph/ng_util.h" +#include "parser/position.h" #include "rose/rose_build.h" #include "rose/rose_internal.h" +#include "smallwrite/smallwrite_dump.h" #include "util/bitutils.h" #include "util/dump_charclass.h" #include "util/report.h" #include "util/report_manager.h" #include "util/ue2string.h" -#include "hs_compile.h" /* for HS_MODE_* flags */ #include #include @@ -287,13 +288,13 @@ void dumpGraphImpl(const char *name, const GraphT &g, // manual instantiation of templated dumpGraph above. template void dumpGraphImpl(const char *, const NGHolder &); -void dumpDotWrapperImpl(const NGWrapper &nw, const char *name, - const Grey &grey) { +void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr, + const char *name, const Grey &grey) { if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) { stringstream ss; - ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot"; + ss << grey.dumpPath << "Expr_" << expr.index << "_" << name << ".dot"; DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str()); - dumpGraphImpl(ss.str().c_str(), nw); + dumpGraphImpl(ss.str().c_str(), g); } } diff --git a/src/nfagraph/ng_dump.h b/src/nfagraph/ng_dump.h index b20d9f1b..077f07ce 100644 --- a/src/nfagraph/ng_dump.h +++ b/src/nfagraph/ng_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,7 +48,7 @@ namespace ue2 { class NGHolder; class NG; -class NGWrapper; +class ExpressionInfo; class ReportManager; // Implementations for stubs below -- all have the suffix "Impl". @@ -61,7 +61,8 @@ void dumpGraphImpl(const char *name, const GraphT &g); template void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm); -void dumpDotWrapperImpl(const NGWrapper &w, const char *name, const Grey &grey); +void dumpDotWrapperImpl(const NGHolder &g, const ExpressionInfo &expr, + const char *name, const Grey &grey); void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp, const Grey &grey); @@ -88,10 +89,10 @@ static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) { // Stubs which call through to dump code if compiled in. UNUSED static inline -void dumpDotWrapper(UNUSED const NGWrapper &w, UNUSED const char *name, - UNUSED const Grey &grey) { +void dumpDotWrapper(UNUSED const NGHolder &g, UNUSED const ExpressionInfo &expr, + UNUSED const char *name, UNUSED const Grey &grey) { #ifdef DUMP_SUPPORT - dumpDotWrapperImpl(w, name, grey); + dumpDotWrapperImpl(g, expr, name, grey); #endif } diff --git a/src/nfagraph/ng_expr_info.cpp b/src/nfagraph/ng_expr_info.cpp index 7419609b..1f601c61 100644 --- a/src/nfagraph/ng_expr_info.cpp +++ b/src/nfagraph/ng_expr_info.cpp @@ -27,8 +27,8 @@ */ /** \file - * \brief Code for discovering properties of an NGWrapper used by - * hs_expression_info. + * \brief Code for discovering properties of an NFA graph used by + * hs_expression_info(). */ #include "ng_expr_info.h" @@ -58,42 +58,42 @@ namespace ue2 { /* get rid of leading \b and multiline ^ vertices */ static -void removeLeadingVirtualVerticesFromRoot(NGWrapper &w, NFAVertex root) { +void removeLeadingVirtualVerticesFromRoot(NGHolder &g, NFAVertex root) { vector victims; - for (auto v : adjacent_vertices_range(root, w)) { - if (w[v].assert_flags & POS_FLAG_VIRTUAL_START) { + for (auto v : adjacent_vertices_range(root, g)) { + if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) { DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n"); victims.push_back(v); } } for (auto u : victims) { - for (auto v : adjacent_vertices_range(u, w)) { - add_edge_if_not_present(root, v, w); + for (auto v : adjacent_vertices_range(u, g)) { + add_edge_if_not_present(root, v, g); } } - remove_vertices(victims, w); + remove_vertices(victims, g); } static -void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, +void checkVertex(const ReportManager &rm, const NGHolder &g, NFAVertex v, const vector &depths, DepthMinMax &info) { - if (is_any_accept(v, w)) { + if (is_any_accept(v, g)) { return; } - if (is_any_start(v, w)) { + if (is_any_start(v, g)) { info.min = 0; info.max = max(info.max, depth(0)); return; } - u32 idx = w[v].index; + u32 idx = g[v].index; assert(idx < depths.size()); const DepthMinMax &d = depths.at(idx); - for (ReportID report_id : w[v].reports) { + for (ReportID report_id : g[v].reports) { const Report &report = rm.getReport(report_id); assert(report.type == EXTERNAL_CALLBACK); @@ -118,7 +118,7 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, rd.max = min(rd.max, max_offset); } - DEBUG_PRINTF("vertex %zu report %u: %s\n", w[v].index, report_id, + DEBUG_PRINTF("vertex %zu report %u: %s\n", g[v].index, report_id, rd.str().c_str()); info = unionDepthMinMax(info, rd); @@ -126,8 +126,8 @@ void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v, } static -bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) { - for (const auto &report_id : all_reports(w)) { +bool hasOffsetAdjust(const ReportManager &rm, const NGHolder &g) { + for (const auto &report_id : all_reports(g)) { if (rm.getReport(report_id).offsetAdjust) { return true; } @@ -135,28 +135,29 @@ bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) { return false; } -void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) { +void fillExpressionInfo(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr, hs_expr_info *info) { assert(info); /* ensure utf8 starts at cp boundary */ - ensureCodePointStart(rm, w); - resolveAsserts(rm, w); - optimiseVirtualStarts(w); + ensureCodePointStart(rm, g, expr); + resolveAsserts(rm, g, expr); + optimiseVirtualStarts(g); - removeLeadingVirtualVerticesFromRoot(w, w.start); - removeLeadingVirtualVerticesFromRoot(w, w.startDs); + removeLeadingVirtualVerticesFromRoot(g, g.start); + removeLeadingVirtualVerticesFromRoot(g, g.startDs); vector depths; - calcDepthsFrom(w, w.start, depths); + calcDepthsFrom(g, g.start, depths); DepthMinMax d; - for (auto u : inv_adjacent_vertices_range(w.accept, w)) { - checkVertex(rm, w, u, depths, d); + for (auto u : inv_adjacent_vertices_range(g.accept, g)) { + checkVertex(rm, g, u, depths, d); } - for (auto u : inv_adjacent_vertices_range(w.acceptEod, w)) { - checkVertex(rm, w, u, depths, d); + for (auto u : inv_adjacent_vertices_range(g.acceptEod, g)) { + checkVertex(rm, g, u, depths, d); } if (d.max.is_finite()) { @@ -170,9 +171,9 @@ void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) { info->min_width = UINT_MAX; } - info->unordered_matches = hasOffsetAdjust(rm, w); - info->matches_at_eod = can_match_at_eod(w); - info->matches_only_at_eod = can_only_match_at_eod(w); + info->unordered_matches = hasOffsetAdjust(rm, g); + info->matches_at_eod = can_match_at_eod(g); + info->matches_only_at_eod = can_only_match_at_eod(g); } } // namespace ue2 diff --git a/src/nfagraph/ng_expr_info.h b/src/nfagraph/ng_expr_info.h index dcc5a419..e518738c 100644 --- a/src/nfagraph/ng_expr_info.h +++ b/src/nfagraph/ng_expr_info.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,7 +27,7 @@ */ /** \file - * \brief Code for discovering properties of an NGWrapper used by + * \brief Code for discovering properties of an expression used by * hs_expression_info. */ @@ -36,14 +36,14 @@ struct hs_expr_info; -#include "ue2common.h" - namespace ue2 { -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; -void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info); +void fillExpressionInfo(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr, hs_expr_info *info); } // namespace ue2 diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index a504ac50..31a1f81b 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,16 +38,19 @@ * match given these constraints, or transform the graph in order to make a * constraint implicit. */ + +#include "ng_extparam.h" + #include "ng.h" #include "ng_depth.h" #include "ng_dump.h" -#include "ng_extparam.h" #include "ng_prune.h" #include "ng_reports.h" #include "ng_som_util.h" #include "ng_width.h" #include "ng_util.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "parser/position.h" #include "util/compile_context.h" #include "util/compile_error.h" @@ -129,7 +132,8 @@ DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) { /** \brief Replace the graph's reports with new reports that specify bounds. */ static -void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept, +void updateReportBounds(ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr, NFAVertex accept, set &done) { for (auto v : inv_adjacent_vertices_range(accept, g)) { // Don't operate on g.accept itself. @@ -153,16 +157,16 @@ void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept, // Note that we need to cope with offset adjustment here. - ir.minOffset = g.min_offset - ir.offsetAdjust; - if (g.max_offset == MAX_OFFSET) { + ir.minOffset = expr.min_offset - ir.offsetAdjust; + if (expr.max_offset == MAX_OFFSET) { ir.maxOffset = MAX_OFFSET; } else { - ir.maxOffset = g.max_offset - ir.offsetAdjust; + ir.maxOffset = expr.max_offset - ir.offsetAdjust; } assert(ir.maxOffset >= ir.minOffset); - ir.minLength = g.min_length; - if (g.min_length && !g.som) { + ir.minLength = expr.min_length; + if (expr.min_length && !expr.som) { ir.quashSom = true; } @@ -196,22 +200,23 @@ bool hasVirtualStarts(const NGHolder &g) { * anchored and unanchored paths, but it's too tricky for the moment. */ static -bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, +bool anchorPatternWithBoundedRepeat(NGHolder &g, const ExpressionInfo &expr, + const depth &minWidth, const depth &maxWidth) { - assert(!g.som); - assert(g.max_offset != MAX_OFFSET); + assert(!expr.som); + assert(expr.max_offset != MAX_OFFSET); assert(minWidth <= maxWidth); assert(maxWidth.is_reachable()); DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n", - minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset, - g.max_offset); + minWidth.str().c_str(), maxWidth.str().c_str(), + expr.min_offset, expr.max_offset); - if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) { + if (expr.max_offset > MAX_MAXOFFSET_TO_ANCHOR) { return false; } - if (g.max_offset < minWidth) { + if (expr.max_offset < minWidth) { assert(0); return false; } @@ -232,10 +237,10 @@ bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth, u32 min_bound, max_bound; if (maxWidth.is_infinite()) { min_bound = 0; - max_bound = g.max_offset - minWidth; + max_bound = expr.max_offset - minWidth; } else { - min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0; - max_bound = g.max_offset - minWidth; + min_bound = expr.min_offset > maxWidth ? expr.min_offset - maxWidth : 0; + max_bound = expr.max_offset - minWidth; } DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound); @@ -315,7 +320,7 @@ NFAVertex findSingleCyclic(const NGHolder &g) { } static -bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g, +bool hasOffsetAdjust(const ReportManager &rm, NGHolder &g, int *adjust) { const auto &reports = all_reports(g); if (reports.empty()) { @@ -342,10 +347,11 @@ bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g, * /foo.*bar/{min_length=100} --> /foo.{94,}bar/ */ static -bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { - assert(g.min_length); +bool transformMinLengthToRepeat(const ReportManager &rm, NGHolder &g, + ExpressionInfo &expr) { + assert(expr.min_length); - if (g.min_length > MAX_MINLENGTH_TO_CONVERT) { + if (expr.min_length > MAX_MINLENGTH_TO_CONVERT) { return false; } @@ -437,10 +443,10 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { DEBUG_PRINTF("width=%u, vertex %zu is cyclic\n", width, g[cyclic].index); - if (width >= g.min_length) { + if (width >= expr.min_length) { DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n", - g.min_length, width); - g.min_length = 0; + expr.min_length, width); + expr.min_length = 0; return true; } @@ -468,7 +474,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { const CharReach &cr = g[cyclic].char_reach; - for (u32 i = 0; i < g.min_length - width - 1; ++i) { + for (u32 i = 0; i < expr.min_length - width - 1; ++i) { v = add_vertex(g); g[v].char_reach = cr; @@ -487,19 +493,19 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { renumber_edges(g); clearReports(g); - g.min_length = 0; + expr.min_length = 0; return true; } static -bool hasExtParams(const NGWrapper &g) { - if (g.min_length != 0) { +bool hasExtParams(const ExpressionInfo &expr) { + if (expr.min_length != 0) { return true; } - if (g.min_offset != 0) { + if (expr.min_offset != 0) { return true; } - if (g.max_offset != MAX_OFFSET) { + if (expr.max_offset != MAX_OFFSET) { return true; } return false; @@ -535,7 +541,7 @@ const depth& minDistToAccept(const NFAVertexBidiDepth &d) { } static -bool isEdgePrunable(const NGWrapper &g, +bool isEdgePrunable(const NGHolder &g, const ExpressionInfo &expr, const vector &depths, const NFAEdge &e) { const NFAVertex u = source(e, g); @@ -564,29 +570,29 @@ bool isEdgePrunable(const NGWrapper &g, const NFAVertexBidiDepth &du = depths.at(u_idx); const NFAVertexBidiDepth &dv = depths.at(v_idx); - if (g.min_offset) { + if (expr.min_offset) { depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv); - if (max_offset.is_finite() && max_offset < g.min_offset) { + if (max_offset.is_finite() && max_offset < expr.min_offset) { DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str()); return true; } } - if (g.max_offset != MAX_OFFSET) { + if (expr.max_offset != MAX_OFFSET) { depth min_offset = minDistFromStart(du) + minDistToAccept(dv); assert(min_offset.is_finite()); - if (min_offset > g.max_offset) { + if (min_offset > expr.max_offset) { DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str()); return true; } } - if (g.min_length && is_any_accept(v, g)) { + if (expr.min_length && is_any_accept(v, g)) { // Simple take on min_length. If we're an edge to accept and our max // dist from start is too small, we can be pruned. const depth &width = du.fromStart.max; - if (width.is_finite() && width < g.min_length) { + if (width.is_finite() && width < expr.min_length) { DEBUG_PRINTF("max width %s from start too small for min_length\n", width.str().c_str()); return true; @@ -597,14 +603,14 @@ bool isEdgePrunable(const NGWrapper &g, } static -void pruneExtUnreachable(NGWrapper &g) { +void pruneExtUnreachable(NGHolder &g, const ExpressionInfo &expr) { vector depths; calcDepths(g, depths); vector dead; for (const auto &e : edges_range(g)) { - if (isEdgePrunable(g, depths, e)) { + if (isEdgePrunable(g, expr, depths, e)) { DEBUG_PRINTF("pruning\n"); dead.push_back(e); } @@ -621,8 +627,8 @@ void pruneExtUnreachable(NGWrapper &g) { /** Remove vacuous edges in graphs where the min_offset or min_length * constraints dictate that they can never produce a match. */ static -void pruneVacuousEdges(NGWrapper &g) { - if (!g.min_length && !g.min_offset) { +void pruneVacuousEdges(NGHolder &g, const ExpressionInfo &expr) { + if (!expr.min_length && !expr.min_offset) { return; } @@ -634,14 +640,14 @@ void pruneVacuousEdges(NGWrapper &g) { // Special case: Crudely remove vacuous edges from start in graphs with a // min_offset. - if (g.min_offset && u == g.start && is_any_accept(v, g)) { + if (expr.min_offset && u == g.start && is_any_accept(v, g)) { DEBUG_PRINTF("vacuous edge in graph with min_offset!\n"); dead.push_back(e); continue; } // If a min_length is set, vacuous edges can be removed. - if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) { + if (expr.min_length && is_any_start(u, g) && is_any_accept(v, g)) { DEBUG_PRINTF("vacuous edge in graph with min_length!\n"); dead.push_back(e); continue; @@ -657,7 +663,8 @@ void pruneVacuousEdges(NGWrapper &g) { } static -void pruneUnmatchable(NGWrapper &g, const vector &depths, +void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr, + const vector &depths, const ReportManager &rm, NFAVertex accept) { vector dead; @@ -676,16 +683,16 @@ void pruneUnmatchable(NGWrapper &g, const vector &depths, d.min += adj.first; d.max += adj.second; - if (d.max.is_finite() && d.max < g.min_length) { + if (d.max.is_finite() && d.max < expr.min_length) { DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n", - d.max.str().c_str(), g.min_length); + d.max.str().c_str(), expr.min_length); dead.push_back(e); continue; } - if (g.max_offset != MAX_OFFSET && d.min > g.max_offset) { + if (expr.max_offset != MAX_OFFSET && d.min > expr.max_offset) { DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n", - d.min.str().c_str(), g.max_offset); + d.min.str().c_str(), expr.max_offset); dead.push_back(e); continue; } @@ -697,15 +704,16 @@ void pruneUnmatchable(NGWrapper &g, const vector &depths, /** Remove edges to accepts that can never produce a match long enough to * satisfy our min_length and max_offset constraints. */ static -void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) { - if (!g.min_length) { +void pruneUnmatchable(NGHolder &g, const ExpressionInfo &expr, + const ReportManager &rm) { + if (!expr.min_length) { return; } vector depths = getDistancesFromSOM(g); - pruneUnmatchable(g, depths, rm, g.accept); - pruneUnmatchable(g, depths, rm, g.acceptEod); + pruneUnmatchable(g, expr, depths, rm, g.accept); + pruneUnmatchable(g, expr, depths, rm, g.acceptEod); pruneUseless(g); } @@ -732,9 +740,9 @@ bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) { return false; } -void handleExtendedParams(ReportManager &rm, NGWrapper &g, +void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr, UNUSED const CompileContext &cc) { - if (!hasExtParams(g)) { + if (!hasExtParams(expr)) { return; } @@ -751,50 +759,50 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g, DepthMinMax match_depths = findMatchLengths(rm, g); DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str()); - if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) { + if (is_anchored && maxWidth.is_finite() && expr.min_offset > maxWidth) { ostringstream oss; oss << "Expression is anchored and cannot satisfy min_offset=" - << g.min_offset << " as it can only produce matches of length " + << expr.min_offset << " as it can only produce matches of length " << maxWidth << " bytes at most."; - throw CompileError(g.expressionIndex, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (minWidth > g.max_offset) { + if (minWidth > expr.max_offset) { ostringstream oss; - oss << "Expression has max_offset=" << g.max_offset << " but requires " - << minWidth << " bytes to match."; - throw CompileError(g.expressionIndex, oss.str()); + oss << "Expression has max_offset=" << expr.max_offset + << " but requires " << minWidth << " bytes to match."; + throw CompileError(expr.index, oss.str()); } - if (maxWidth.is_finite() && match_depths.max < g.min_length) { + if (maxWidth.is_finite() && match_depths.max < expr.min_length) { ostringstream oss; - oss << "Expression has min_length=" << g.min_length << " but can " + oss << "Expression has min_length=" << expr.min_length << " but can " "only produce matches of length " << match_depths.max << " bytes at most."; - throw CompileError(g.expressionIndex, oss.str()); + throw CompileError(expr.index, oss.str()); } - if (g.min_length && g.min_length <= match_depths.min) { + if (expr.min_length && expr.min_length <= match_depths.min) { DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n", - g.min_length); - g.min_length = 0; + expr.min_length); + expr.min_length = 0; } - if (!hasExtParams(g)) { + if (!hasExtParams(expr)) { return; } - pruneVacuousEdges(g); - pruneUnmatchable(g, rm); + pruneVacuousEdges(g, expr); + pruneUnmatchable(g, expr, rm); if (!has_offset_adj) { - pruneExtUnreachable(g); + pruneExtUnreachable(g, expr); } // We may have removed all the edges to accept, in which case this // expression cannot match. if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) { - throw CompileError(g.expressionIndex, "Extended parameter " + throw CompileError(expr.index, "Extended parameter " "constraints can not be satisfied for any match from " "this expression."); } @@ -812,27 +820,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g, // If the pattern is completely anchored and has a min_length set, this can // be converted to a min_offset. - if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) { - DEBUG_PRINTF("converting min_length to min_offset=%llu for " - "anchored case\n", g.min_length); - g.min_offset = g.min_length; - g.min_length = 0; + if (expr.min_length && (expr.min_offset <= expr.min_length) && + is_anchored) { + DEBUG_PRINTF("convertinexpr.min_length to min_offset=%llu for " + "anchored case\n", expr.min_length); + expr.min_offset = expr.min_length; + expr.min_length = 0; } - if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) { + if (expr.min_offset && expr.min_offset <= minWidth && !has_offset_adj) { DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n", - g.min_offset); - g.min_offset = 0; + expr.min_offset); + expr.min_offset = 0; } - if (!hasExtParams(g)) { + if (!hasExtParams(expr)) { return; } // If the pattern has a min_length and is of "ratchet" form with one // unbounded repeat, that repeat can become a bounded repeat. // e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/ - if (g.min_length && transformMinLengthToRepeat(rm, g)) { + if (expr.min_length && transformMinLengthToRepeat(rm, g, expr)) { DEBUG_PRINTF("converted min_length to bounded repeat\n"); // recalc minWidth = findMinWidth(g); @@ -846,28 +855,28 @@ void handleExtendedParams(ReportManager &rm, NGWrapper &g, // Note that it is possible to handle graphs that have a combination of // anchored and unanchored paths, but it's too tricky for the moment. - if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length && - !has_offset_adj && isUnanchored(g)) { - if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) { + if (expr.max_offset != MAX_OFFSET && !expr.som && !expr.min_length && + !has_offset_adj && isUnanchored(g)) { + if (anchorPatternWithBoundedRepeat(g, expr, minWidth, maxWidth)) { DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(), maxWidth.str().c_str()); if (minWidth == maxWidth) { // For a fixed width pattern, we can retire the offsets as they // are implicit in the graph now. - g.min_offset = 0; - g.max_offset = MAX_OFFSET; + expr.min_offset = 0; + expr.max_offset = MAX_OFFSET; } } } //dumpGraph("final.dot", g); - if (!hasExtParams(g)) { + if (!hasExtParams(expr)) { return; } set done; - updateReportBounds(rm, g, g.accept, done); - updateReportBounds(rm, g, g.acceptEod, done); + updateReportBounds(rm, g, expr, g.accept, done); + updateReportBounds(rm, g, expr, g.acceptEod, done); } } // namespace ue2 diff --git a/src/nfagraph/ng_extparam.h b/src/nfagraph/ng_extparam.h index d5df1cf6..798acd3f 100644 --- a/src/nfagraph/ng_extparam.h +++ b/src/nfagraph/ng_extparam.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,10 +37,11 @@ namespace ue2 { struct CompileContext; -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; -void handleExtendedParams(ReportManager &rm, NGWrapper &g, +void handleExtendedParams(ReportManager &rm, NGHolder &g, ExpressionInfo &expr, const CompileContext &cc); } // namespace ue2 diff --git a/src/nfagraph/ng_literal_component.cpp b/src/nfagraph/ng_literal_component.cpp index e3cfe867..de05e490 100644 --- a/src/nfagraph/ng_literal_component.cpp +++ b/src/nfagraph/ng_literal_component.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,12 +30,15 @@ * \brief Literal Component Splitting. Identifies literals that span the * graph and moves them into Rose. */ + +#include "ng_literal_component.h" + #include "grey.h" #include "ng.h" -#include "ng_literal_component.h" #include "ng_prune.h" #include "ng_util.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "rose/rose_build.h" #include "util/container.h" #include "util/graph.h" @@ -47,8 +50,8 @@ using namespace std; namespace ue2 { static -bool isLiteralChar(const NGWrapper &g, NFAVertex v, - bool &nocase, bool &casefixed) { +bool isLiteralChar(const NGHolder &g, NFAVertex v, bool &nocase, + bool &casefixed) { const CharReach &cr = g[v].char_reach; const size_t num = cr.count(); if (num > 2) { @@ -93,7 +96,7 @@ void addToString(string &s, const NGHolder &g, NFAVertex v) { } static -bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, +bool splitOffLiteral(NG &ng, NGHolder &g, NFAVertex v, const bool anchored, set &dead) { DEBUG_PRINTF("examine vertex %zu\n", g[v].index); bool nocase = false, casefixed = false; @@ -185,7 +188,7 @@ bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored, } /** \brief Split off literals. True if any changes were made to the graph. */ -bool splitOffLiterals(NG &ng, NGWrapper &g) { +bool splitOffLiterals(NG &ng, NGHolder &g) { if (!ng.cc.grey.allowLiteral) { return false; } diff --git a/src/nfagraph/ng_literal_component.h b/src/nfagraph/ng_literal_component.h index dc177c40..1f284ce3 100644 --- a/src/nfagraph/ng_literal_component.h +++ b/src/nfagraph/ng_literal_component.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,10 +37,10 @@ namespace ue2 { class NG; -class NGWrapper; +class NGHolder; /** \brief Split off literals. True if any changes were made to the graph. */ -bool splitOffLiterals(NG &ng, NGWrapper &graph); +bool splitOffLiterals(NG &ng, NGHolder &g); } // namespace ue2 diff --git a/src/nfagraph/ng_som.cpp b/src/nfagraph/ng_som.cpp index 8d3d75a3..5bf52915 100644 --- a/src/nfagraph/ng_som.cpp +++ b/src/nfagraph/ng_som.cpp @@ -29,6 +29,9 @@ /** \file * \brief SOM ("Start of Match") analysis. */ + +#include "ng_som.h" + #include "ng.h" #include "ng_dump.h" #include "ng_equivalence.h" @@ -40,7 +43,6 @@ #include "ng_redundancy.h" #include "ng_region.h" #include "ng_reports.h" -#include "ng_som.h" #include "ng_som_add_redundancy.h" #include "ng_som_util.h" #include "ng_split.h" @@ -49,6 +51,7 @@ #include "ng_width.h" #include "grey.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfa/goughcompile.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "parser/position.h" @@ -1584,8 +1587,9 @@ void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p, * implement the full pattern. */ static -void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, - vector &plan, const u32 first_som_slot) { +void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id, + NGHolder &g, vector &plan, + const u32 first_som_slot) { ReportManager &rm = ng.rm; SomSlotManager &ssm = ng.ssm; @@ -1598,14 +1602,14 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, // Root plan, which already has a SOM slot assigned (first_som_slot). dumpSomPlan(g, plan.front(), 0); - dumpSomSubComponent(*plan.front().prefix, "04_som", w.expressionIndex, - comp_id, 0, ng.cc.grey); + dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0, + ng.cc.grey); assert(plan.front().prefix); if (plan.front().escapes.any() && !plan.front().is_reset) { /* setup escaper for first som location */ if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes, first_som_slot)) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } @@ -1617,7 +1621,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, for (++it; it != plan.end(); ++it) { const u32 plan_num = it - plan.begin(); dumpSomPlan(g, *it, plan_num); - dumpSomSubComponent(*it->prefix, "04_som", w.expressionIndex, comp_id, + dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id, plan_num, ng.cc.grey); assert(it->parent < plan_num); @@ -1628,7 +1632,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, assert(!it->no_implement); if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in); updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out); @@ -1639,7 +1643,7 @@ void implementSomPlan(NG &ng, const NGWrapper &w, u32 comp_id, NGHolder &g, renumber_vertices(*plan.front().prefix); assert(plan.front().prefix->kind == NFA_OUTFIX); if (!ng.addHolder(*plan.front().prefix)) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } } } @@ -1852,7 +1856,7 @@ bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) { } static -u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g, +u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, NGHolder &g, const CompileContext &cc) { depth maxWidth = findMaxWidth(g); @@ -1861,7 +1865,7 @@ u32 doSomRevNfaPrefix(NG &ng, const NGWrapper &w, NGHolder &g, auto nfa = makeBareSomRevNfa(g, cc); if (!nfa) { - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } if (ng.cc.streaming) { @@ -2055,8 +2059,8 @@ void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr &prefix, } static -sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, - som_type som, +sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, + u32 comp_id, som_type som, const ue2::unordered_map ®ions, const map &info, map::const_iterator lower_bound) { @@ -2077,7 +2081,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, // This is an optimisation: if we can't build a Haig from a portion of // the graph, then we won't be able to manage it as an outfix either // when we fall back. - throw CompileError(w.expressionIndex, "Pattern is too large."); + throw CompileError(expr.index, "Pattern is too large."); } while (1) { @@ -2152,7 +2156,7 @@ sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, goto next_try; } - implementSomPlan(ng, w, comp_id, g, plan, som_loc); + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); Report ir = makeCallback(0U, 0); assert(!plan.empty()); @@ -2877,7 +2881,7 @@ unique_ptr makePrefixForChain(NGHolder &g, return prefix; } -sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, +sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id, som_type som) { assert(som); DEBUG_PRINTF("som hello\n"); @@ -3001,7 +3005,7 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, /* create prefix to set the som_loc */ updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET); if (prefix_by_rev) { - u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); + u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } renumber_vertices(*prefix); @@ -3084,18 +3088,18 @@ sombe_rv doSom(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET); } if (prefix_by_rev && !plan.front().no_implement) { - u32 rev_comp_id = doSomRevNfaPrefix(ng, w, *prefix, cc); + u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc); updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id); } - implementSomPlan(ng, w, comp_id, g, plan, som_loc); + implementSomPlan(ng, expr, comp_id, g, plan, som_loc); DEBUG_PRINTF("success\n"); return SOMBE_HANDLED_INTERNAL; } -sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, - som_type som) { +sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr, + u32 comp_id, som_type som) { assert(som); DEBUG_PRINTF("som+haig hello\n"); @@ -3132,7 +3136,7 @@ sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const NGWrapper &w, u32 comp_id, buildRegionMapping(g, regions, info, true); sombe_rv rv = - doHaigLitSom(ng, g, w, comp_id, som, regions, info, info.begin()); + doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin()); if (rv == SOMBE_FAIL) { clear_graph(g); cloneHolder(g, g_pristine); diff --git a/src/nfagraph/ng_som.h b/src/nfagraph/ng_som.h index 70710945..ecae4c67 100644 --- a/src/nfagraph/ng_som.h +++ b/src/nfagraph/ng_som.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,12 +34,14 @@ #define NG_SOM_H #include "som/som.h" +#include "ue2common.h" namespace ue2 { +class ExpressionInfo; class NG; class NGHolder; -class NGWrapper; +class ReportManager; struct Grey; enum sombe_rv { @@ -63,14 +65,14 @@ enum sombe_rv { * May throw a "Pattern too large" exception if prefixes of the * pattern are too large to compile. */ -sombe_rv doSom(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, +sombe_rv doSom(NG &ng, NGHolder &h, const ExpressionInfo &expr, u32 comp_id, som_type som); /** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established. * May also throw pattern too large if prefixes of the pattern are too large to * compile. */ -sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id, - som_type som); +sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const ExpressionInfo &expr, + u32 comp_id, som_type som); void makeReportsSomPass(ReportManager &rm, NGHolder &g); diff --git a/src/nfagraph/ng_utf8.cpp b/src/nfagraph/ng_utf8.cpp index 383aa142..89500fe3 100644 --- a/src/nfagraph/ng_utf8.cpp +++ b/src/nfagraph/ng_utf8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,6 +34,7 @@ #include "ng.h" #include "ng_prune.h" #include "ng_util.h" +#include "compiler/compiler.h" #include "util/graph_range.h" #include "util/unicode_def.h" @@ -45,14 +46,14 @@ using namespace std; namespace ue2 { static -void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) { - if (in_degree(v, w) != 1) { +void allowIllegal(NGHolder &g, NFAVertex v, u8 pred_char) { + if (in_degree(v, g) != 1) { DEBUG_PRINTF("unexpected pred\n"); assert(0); /* should be true due to the early stage of this analysis */ return; } - CharReach &cr = w[v].char_reach; + CharReach &cr = g[v].char_reach; if (pred_char == 0xe0) { assert(cr.isSubsetOf(CharReach(0xa0, 0xbf))); if (cr == CharReach(0xa0, 0xbf)) { @@ -79,8 +80,8 @@ void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) { * above \\x{10ffff} or they represent overlong encodings. As we require valid * UTF-8 input, we have no defined behaviour in these cases, as a result we can * accept them if it simplifies the graph. */ -void relaxForbiddenUtf8(NGWrapper &w) { - if (!w.utf8) { +void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr) { + if (!expr.utf8) { return; } @@ -88,12 +89,12 @@ void relaxForbiddenUtf8(NGWrapper &w) { const CharReach f0(0xf0); const CharReach f4(0xf4); - for (auto v : vertices_range(w)) { - const CharReach &cr = w[v].char_reach; + for (auto v : vertices_range(g)) { + const CharReach &cr = g[v].char_reach; if (cr == e0 || cr == f0 || cr == f4) { u8 pred_char = cr.find_first(); - for (auto t : adjacent_vertices_range(v, w)) { - allowIllegal(w, t, pred_char); + for (auto t : adjacent_vertices_range(v, g)) { + allowIllegal(g, t, pred_char); } } } diff --git a/src/nfagraph/ng_utf8.h b/src/nfagraph/ng_utf8.h index e1b08e40..7c428833 100644 --- a/src/nfagraph/ng_utf8.h +++ b/src/nfagraph/ng_utf8.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ namespace ue2 { -class NGWrapper; +class ExpressionInfo; class NGHolder; /** \brief Relax forbidden UTF-8 sequences. @@ -44,7 +44,7 @@ class NGHolder; * above \\x{10ffff} or they represent overlong encodings. As we require valid * UTF-8 input, we have no defined behaviour in these cases, as a result we can * accept them if it simplifies the graph. */ -void relaxForbiddenUtf8(NGWrapper &w); +void relaxForbiddenUtf8(NGHolder &g, const ExpressionInfo &expr); /** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex * where possible, based on the assumption that we will always be matching diff --git a/src/nfagraph/ng_vacuous.cpp b/src/nfagraph/ng_vacuous.cpp index 53672a1b..d1123dff 100644 --- a/src/nfagraph/ng_vacuous.cpp +++ b/src/nfagraph/ng_vacuous.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,29 +34,31 @@ #include "grey.h" #include "ng.h" #include "ng_util.h" +#include "compiler/compiler.h" using namespace std; namespace ue2 { static -ReportID getInternalId(ReportManager &rm, const NGWrapper &graph) { - Report ir = rm.getBasicInternalReport(graph); +ReportID getInternalId(ReportManager &rm, const ExpressionInfo &expr) { + Report ir = rm.getBasicInternalReport(expr); // Apply any extended params. - if (graph.min_offset || graph.max_offset != MAX_OFFSET) { - ir.minOffset = graph.min_offset; - ir.maxOffset = graph.max_offset; + if (expr.min_offset || expr.max_offset != MAX_OFFSET) { + ir.minOffset = expr.min_offset; + ir.maxOffset = expr.max_offset; } - assert(!graph.min_length); // should be handled elsewhere. + assert(!expr.min_length); // should be handled elsewhere. return rm.getInternalId(ir); } static -void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) { - const ReportID r = getInternalId(rm, g); +void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGHolder &g, + const ExpressionInfo &expr) { + const ReportID r = getInternalId(rm, expr); boundary.report_at_0_eod.insert(r); boundary.report_at_0.insert(r); @@ -81,8 +83,8 @@ void makeFirehose(BoundaryReports &boundary, ReportManager &rm, NGWrapper &g) { static void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { - boundary.report_at_0.insert(getInternalId(rm, g)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_0.insert(getInternalId(rm, expr)); remove_edge(g.start, g.accept, g); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); @@ -90,8 +92,8 @@ void makeAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, static void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { - boundary.report_at_eod.insert(getInternalId(rm, g)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_eod.insert(getInternalId(rm, expr)); remove_edge(g.startDs, g.acceptEod, g); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); @@ -100,18 +102,18 @@ void makeEndAnchoredAcceptor(BoundaryReports &boundary, ReportManager &rm, static void makeNothingAcceptor(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { - boundary.report_at_0_eod.insert(getInternalId(rm, g)); + NGHolder &g, const ExpressionInfo &expr) { + boundary.report_at_0_eod.insert(getInternalId(rm, expr)); remove_edge(g.start, g.acceptEod, g); g[g.start].reports.clear(); } bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &g) { + NGHolder &g, const ExpressionInfo &expr) { if (edge(g.startDs, g.accept, g).second) { // e.g. '.*'; match "between" every byte DEBUG_PRINTF("graph is firehose\n"); - makeFirehose(boundary, rm, g); + makeFirehose(boundary, rm, g, expr); return true; } @@ -119,19 +121,19 @@ bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, if (edge(g.start, g.accept, g).second) { DEBUG_PRINTF("creating anchored acceptor\n"); - makeAnchoredAcceptor(boundary, rm, g); + makeAnchoredAcceptor(boundary, rm, g, expr); work_done = true; } if (edge(g.startDs, g.acceptEod, g).second) { DEBUG_PRINTF("creating end-anchored acceptor\n"); - makeEndAnchoredAcceptor(boundary, rm, g); + makeEndAnchoredAcceptor(boundary, rm, g, expr); work_done = true; } if (edge(g.start, g.acceptEod, g).second) { DEBUG_PRINTF("creating nothing acceptor\n"); - makeNothingAcceptor(boundary, rm, g); + makeNothingAcceptor(boundary, rm, g, expr); work_done = true; } diff --git a/src/nfagraph/ng_vacuous.h b/src/nfagraph/ng_vacuous.h index ebbc9d17..c33cb312 100644 --- a/src/nfagraph/ng_vacuous.h +++ b/src/nfagraph/ng_vacuous.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,13 @@ namespace ue2 { struct BoundaryReports; -class NGWrapper; +class ExpressionInfo; +class NGHolder; class ReportManager; // Returns true if a "vacuous" reporter was created. bool splitOffVacuous(BoundaryReports &boundary, ReportManager &rm, - NGWrapper &graph); + NGHolder &g, const ExpressionInfo &expr); } // namespace ue2 diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index a7aa5d06..4539836a 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -159,13 +159,15 @@ public: ConstructLiteralVisitor::~ConstructLiteralVisitor() {} /** \brief True if the literal expression \a expr could be added to Rose. */ -bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { - assert(expr.component); +bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { + assert(pe.component); if (!ng.cc.grey.allowLiteral) { return false; } + const auto &expr = pe.expr; + // XXX: don't shortcut literals with extended params (yet) if (expr.min_offset || expr.max_offset != MAX_OFFSET || expr.min_length || expr.edit_distance) { @@ -175,8 +177,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { ConstructLiteralVisitor vis; try { - assert(expr.component); - expr.component->accept(vis); + assert(pe.component); + pe.component->accept(vis); assert(vis.repeat_stack.empty()); } catch (const ConstructLiteralVisitor::NotLiteral&) { DEBUG_PRINTF("not a literal\n"); @@ -196,7 +198,8 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &expr) { } DEBUG_PRINTF("constructed literal %s\n", dumpString(lit).c_str()); - return ng.addLiteral(lit, expr.index, expr.id, expr.highlander, expr.som); + return ng.addLiteral(lit, expr.index, expr.report, expr.highlander, + expr.som); } } // namespace ue2 diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index 108bca8a..7d340d79 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ #include "grey.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfa/dfa_min.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" @@ -74,7 +75,7 @@ public: // Construct a runtime implementation. aligned_unique_ptr build(u32 roseQuality) override; - void add(const NGWrapper &w) override; + void add(const NGHolder &g, const ExpressionInfo &expr) override; void add(const ue2_literal &literal, ReportID r) override; set all_reports() const override; @@ -171,26 +172,26 @@ bool pruneOverlong(NGHolder &g, const depth &max_depth, return modified; } -void SmallWriteBuildImpl::add(const NGWrapper &w) { +void SmallWriteBuildImpl::add(const NGHolder &g, const ExpressionInfo &expr) { // If the graph is poisoned (i.e. we can't build a SmallWrite version), // we don't even try. if (poisoned) { return; } - if (w.som || w.min_length || isVacuous(w)) { /* cannot support in smwr */ - poisoned = true; + if (expr.som || expr.min_length || isVacuous(g)) { + poisoned = true; /* cannot support in smwr */ return; } - DEBUG_PRINTF("w=%p\n", &w); + DEBUG_PRINTF("g=%p\n", &g); // make a copy of the graph so that we can modify it for our purposes - unique_ptr h = cloneHolder(w); + unique_ptr h = cloneHolder(g); pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm); - reduceGraph(*h, SOM_NONE, w.utf8, cc); + reduceGraph(*h, SOM_NONE, expr.utf8, cc); if (can_never_match(*h)) { DEBUG_PRINTF("graph can never match in small block\n"); diff --git a/src/smallwrite/smallwrite_build.h b/src/smallwrite/smallwrite_build.h index 84c6df3a..3d7f3cb6 100644 --- a/src/smallwrite/smallwrite_build.h +++ b/src/smallwrite/smallwrite_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,8 +48,9 @@ namespace ue2 { struct CompileContext; struct ue2_literal; -class NGWrapper; -class ReportManager; +class ExpressionInfo; +class NGHolder; +class ReportManager; // Abstract interface intended for callers from elsewhere in the tree, real // underlying implementation is SmallWriteBuildImpl in smwr_build_impl.h. @@ -61,16 +62,16 @@ public: // Construct a runtime implementation. virtual ue2::aligned_unique_ptr build(u32 roseQuality) = 0; - virtual void add(const NGWrapper &w) = 0; + virtual void add(const NGHolder &g, const ExpressionInfo &expr) = 0; virtual void add(const ue2_literal &literal, ReportID r) = 0; virtual std::set all_reports() const = 0; }; // Construct a usable SmallWrite builder. -std::unique_ptr makeSmallWriteBuilder(size_t num_patterns, - const ReportManager &rm, - const CompileContext &cc); +std::unique_ptr +makeSmallWriteBuilder(size_t num_patterns, const ReportManager &rm, + const CompileContext &cc); size_t smwrSize(const SmallWriteEngine *t); diff --git a/src/util/report_manager.cpp b/src/util/report_manager.cpp index 8377ea03..9c72da07 100644 --- a/src/util/report_manager.cpp +++ b/src/util/report_manager.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,9 +29,12 @@ /** \file * \brief ReportManager: tracks Report structures, exhaustion and dedupe keys. */ -#include "grey.h" + #include "report_manager.h" + +#include "grey.h" #include "ue2common.h" +#include "compiler/compiler.h" #include "nfagraph/ng.h" #include "rose/rose_build.h" #include "util/compile_error.h" @@ -201,20 +204,21 @@ void ReportManager::registerExtReport(ReportID id, } } -Report ReportManager::getBasicInternalReport(const NGWrapper &g, s32 adj) { +Report ReportManager::getBasicInternalReport(const ExpressionInfo &expr, + s32 adj) { /* validate that we are not violating highlander constraints, this will * throw a CompileError if so. */ - registerExtReport(g.reportId, - external_report_info(g.highlander, g.expressionIndex)); + registerExtReport(expr.report, + external_report_info(expr.highlander, expr.index)); /* create the internal report */ u32 ekey = INVALID_EKEY; - if (g.highlander) { + if (expr.highlander) { /* all patterns with the same report id share an ekey */ - ekey = getExhaustibleKey(g.reportId); + ekey = getExhaustibleKey(expr.report); } - return makeECallback(g.reportId, adj, ekey); + return makeECallback(expr.report, adj, ekey); } void ReportManager::setProgramOffset(ReportID id, u32 programOffset) { diff --git a/src/util/report_manager.h b/src/util/report_manager.h index 0eed2711..4b62e4b5 100644 --- a/src/util/report_manager.h +++ b/src/util/report_manager.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,7 +47,7 @@ namespace ue2 { struct Grey; class RoseBuild; -class NGWrapper; +class ExpressionInfo; struct external_report_info { external_report_info(bool h, u32 fpi) @@ -92,13 +92,13 @@ public: const std::vector &reports() const { return reportIds; } /** - * Get a simple internal report corresponding to the wrapper. An ekey will - * be setup as required. + * Get a simple internal report corresponding to the expression. An ekey + * will be setup if required. * * Note: this function may throw a CompileError if constraints on external * match id are violated (mixed highlander status for example). */ - Report getBasicInternalReport(const NGWrapper &g, s32 adj = 0); + Report getBasicInternalReport(const ExpressionInfo &expr, s32 adj = 0); /** \brief Register an external report and validate that we are not * violating highlander constraints (which will cause an exception to be diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index e40bda02..60bf8940 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -96,7 +96,8 @@ protected: const CompileContext cc(true, false, target, grey); ReportManager rm(cc.grey); ParsedExpression parsed(0, pattern.c_str(), flags, 0); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 804fcb1f..333c35f3 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -73,7 +73,8 @@ protected: CompileContext cc(false, false, target, Grey()); ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); @@ -306,7 +307,8 @@ protected: CompileContext cc(false, false, get_current_target(), Grey()); ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); @@ -365,7 +367,8 @@ protected: CompileContext cc(true, false, get_current_target(), Grey()); ParsedExpression parsed(0, expr.c_str(), flags, 0); ReportManager rm(cc.grey); - unique_ptr g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; ASSERT_TRUE(g != nullptr); clearReports(*g); diff --git a/unit/internal/nfagraph_common.h b/unit/internal/nfagraph_common.h index d3aafc99..ca5554c4 100644 --- a/unit/internal/nfagraph_common.h +++ b/unit/internal/nfagraph_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,18 +40,19 @@ namespace ue2 { // Helper function: construct a graph from an expression, flags and context. inline -std::unique_ptr constructGraphWithCC(const std::string &expr, - CompileContext &cc, - unsigned flags) { +std::unique_ptr constructGraphWithCC(const std::string &expr, + CompileContext &cc, + unsigned flags) { ReportManager rm(cc.grey); ParsedExpression parsed(0, expr.c_str(), flags, 0); - return buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + return std::move(built_expr.g); } // Helper function: construct a graph from an expression and its flags. inline -std::unique_ptr constructGraph(const std::string &expr, - unsigned flags) { +std::unique_ptr constructGraph(const std::string &expr, + unsigned flags) { CompileContext cc(false, false, get_current_target(), Grey()); return constructGraphWithCC(expr, cc, flags); } diff --git a/unit/internal/nfagraph_equivalence.cpp b/unit/internal/nfagraph_equivalence.cpp index 8fda9223..73aec1d7 100644 --- a/unit/internal/nfagraph_equivalence.cpp +++ b/unit/internal/nfagraph_equivalence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -54,7 +54,7 @@ TEST(NFAGraph, RemoveEquivalence1) { // The graph should be merged into: a(b|c) CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("(ab|ac)", cc, 0)); + auto graph(constructGraphWithCC("(ab|ac)", cc, 0)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -115,7 +115,7 @@ TEST(NFAGraph, RemoveEquivalence2) { // The graph should be merged into: (b|c)a CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("(ba|ca)", cc, 0)); + auto graph(constructGraphWithCC("(ba|ca)", cc, 0)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -176,8 +176,7 @@ TEST(NFAGraph, RemoveEquivalence3) { // The graph should be merged into: a(..)+(X|Y) CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, - HS_FLAG_DOTALL)); + auto graph(constructGraphWithCC("a(..)+X|a(..)+Y", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -266,8 +265,7 @@ TEST(NFAGraph, RemoveEquivalence4) { // The graph should be merged into: (X|Y)(..)+a CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, - HS_FLAG_DOTALL)); + auto graph(constructGraphWithCC("X(..)+a|Y(..)+a", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_SUFFIX; @@ -363,8 +361,7 @@ TEST(NFAGraph, RemoveEquivalence5) { // The graph should be merged into: [^\x00]*[\x00] CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", - cc, 0)); + auto graph(constructGraphWithCC("[^\\x00][^\\x00]*[\\x00]", cc, 0)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; g.kind = NFA_PREFIX; @@ -420,7 +417,7 @@ TEST(NFAGraph, RemoveEquivalence5) { TEST(NFAGraph, RemoveEquivalence6) { // Build a small graph with two redundant vertices: ^(.*|.*)a // The graph should be merged into: a - unique_ptr graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL)); + auto graph(constructGraph("^(.*|.*)a", HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; @@ -458,7 +455,7 @@ TEST(NFAGraph, RemoveEquivalence6) { TEST(NFAGraph, RemoveEquivalence7) { // Build a small graph with no redundant vertices: ^.+a // Make sure we don't merge anything - unique_ptr graph(constructGraph("^.+a", HS_FLAG_DOTALL)); + auto graph(constructGraph("^.+a", HS_FLAG_DOTALL)); ASSERT_TRUE(graph != nullptr); NGHolder &g = *graph; diff --git a/unit/internal/nfagraph_find_matches.cpp b/unit/internal/nfagraph_find_matches.cpp index 92c514d8..cd0cd796 100644 --- a/unit/internal/nfagraph_find_matches.cpp +++ b/unit/internal/nfagraph_find_matches.cpp @@ -208,7 +208,8 @@ TEST_P(MatchesTest, Check) { CompileContext cc(false, false, get_current_target(), Grey()); ReportManager rm(cc.grey); ParsedExpression parsed(0, t.pattern.c_str(), t.flags, 0); - auto g = buildWrapper(rm, cc, parsed); + auto built_expr = buildGraph(rm, cc, parsed); + const auto &g = built_expr.g; bool utf8 = (t.flags & HS_FLAG_UTF8) > 0; set> matches; diff --git a/unit/internal/nfagraph_redundancy.cpp b/unit/internal/nfagraph_redundancy.cpp index be9527fd..c77045e0 100644 --- a/unit/internal/nfagraph_redundancy.cpp +++ b/unit/internal/nfagraph_redundancy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -53,7 +53,7 @@ TEST(NFAGraph, RemoveRedundancy1) { // The character reachability should be merged into: [ab]c CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("(a|b)c", cc, 0)); + auto graph(constructGraphWithCC("(a|b)c", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); NGHolder &g = *graph; @@ -95,8 +95,7 @@ TEST(NFAGraph, RemoveRedundancy2) { // Build a small graph with a redundant vertex: a.*b?c // The dot-star should swallow the 'b?', leaving a.*c CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("a.*b?c", cc, - HS_FLAG_DOTALL)); + auto graph(constructGraphWithCC("a.*b?c", cc, HS_FLAG_DOTALL)); ASSERT_TRUE(graph.get() != nullptr); NGHolder &g = *graph; @@ -152,8 +151,7 @@ TEST(NFAGraph, RemoveRedundancy2) { TEST(NFAGraph, RemoveRedundancy3) { CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("foobar.*(a|b)?teakettle", - cc, 0)); + auto graph(constructGraphWithCC("foobar.*(a|b)?teakettle", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); unsigned countBefore = num_vertices(*graph); @@ -166,7 +164,7 @@ TEST(NFAGraph, RemoveRedundancy3) { TEST(NFAGraph, RemoveRedundancy4) { CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0)); + auto graph(constructGraphWithCC("foo([A-Z]|a|b|q)", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); unsigned countBefore = num_vertices(*graph); @@ -178,8 +176,7 @@ TEST(NFAGraph, RemoveRedundancy4) { TEST(NFAGraph, RemoveRedundancy5) { CompileContext cc(false, false, get_current_target(), Grey()); - unique_ptr graph(constructGraphWithCC("[0-9]?badgerbrush", - cc, 0)); + auto graph(constructGraphWithCC("[0-9]?badgerbrush", cc, 0)); ASSERT_TRUE(graph.get() != nullptr); unsigned countBefore = num_vertices(*graph); diff --git a/unit/internal/nfagraph_width.cpp b/unit/internal/nfagraph_width.cpp index 03508ea8..5cfb4c87 100644 --- a/unit/internal/nfagraph_width.cpp +++ b/unit/internal/nfagraph_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -79,10 +79,10 @@ INSTANTIATE_TEST_CASE_P(NFAWidth, NFAWidthTest, ValuesIn(widthTests)); TEST_P(NFAWidthTest, Check) { const WidthTest &t = GetParam(); SCOPED_TRACE(testing::Message() << "Pattern: " << t.pattern); - unique_ptr w(constructGraph(t.pattern, 0)); + auto g = constructGraph(t.pattern, 0); - ASSERT_EQ(t.minWidth, findMinWidth(*w)); - ASSERT_EQ(t.maxWidth, findMaxWidth(*w)); + ASSERT_EQ(t.minWidth, findMinWidth(*g)); + ASSERT_EQ(t.maxWidth, findMaxWidth(*g)); } // for google test diff --git a/util/ng_corpus_generator.cpp b/util/ng_corpus_generator.cpp index 9d75a7ad..19ab7edf 100644 --- a/util/ng_corpus_generator.cpp +++ b/util/ng_corpus_generator.cpp @@ -35,6 +35,7 @@ #include "ng_corpus_generator.h" #include "ng_corpus_editor.h" +#include "compiler/compiler.h" #include "nfagraph/ng.h" #include "nfagraph/ng_util.h" #include "ue2common.h" @@ -219,8 +220,9 @@ namespace { /** \brief Concrete implementation */ class CorpusGeneratorImpl : public CorpusGenerator { public: - CorpusGeneratorImpl(const NGWrapper &graph_in, CorpusProperties &props); - ~CorpusGeneratorImpl() {} + CorpusGeneratorImpl(const NGHolder &graph_in, const ExpressionInfo &expr_in, + CorpusProperties &props); + ~CorpusGeneratorImpl() = default; void generateCorpus(vector &data); @@ -237,6 +239,9 @@ private: * bytes in length. */ void addRandom(const min_max &mm, string *out); + /** \brief Info about this expression. */ + const ExpressionInfo &expr; + /** \brief The NFA graph we operate over. */ const NGHolder &graph; @@ -245,12 +250,13 @@ private: CorpusProperties &cProps; }; -CorpusGeneratorImpl::CorpusGeneratorImpl(const NGWrapper &graph_in, +CorpusGeneratorImpl::CorpusGeneratorImpl(const NGHolder &graph_in, + const ExpressionInfo &expr_in, CorpusProperties &props) - : graph(graph_in), cProps(props) { + : expr(expr_in), graph(graph_in), cProps(props) { // if this pattern is to be matched approximately - if (graph_in.edit_distance && !props.editDistance) { - props.editDistance = props.rand(0, graph_in.edit_distance + 1); + if (expr.edit_distance && !props.editDistance) { + props.editDistance = props.rand(0, expr.edit_distance + 1); } } @@ -392,8 +398,9 @@ hit_limit: /** \brief Concrete implementation for UTF-8 */ class CorpusGeneratorUtf8 : public CorpusGenerator { public: - CorpusGeneratorUtf8(const NGWrapper &graph_in, CorpusProperties &props); - ~CorpusGeneratorUtf8() {} + CorpusGeneratorUtf8(const NGHolder &graph_in, const ExpressionInfo &expr_in, + CorpusProperties &props); + ~CorpusGeneratorUtf8() = default; void generateCorpus(vector &data); @@ -410,19 +417,23 @@ private: * length. */ void addRandom(const min_max &mm, vector *out); + /** \brief Info about this expression. */ + const ExpressionInfo &expr; + /** \brief The NFA graph we operate over. */ - const NGWrapper &graph; + const NGHolder &graph; /** \brief Reference to our corpus generator properties object (stores some * state) */ CorpusProperties &cProps; }; -CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGWrapper &graph_in, +CorpusGeneratorUtf8::CorpusGeneratorUtf8(const NGHolder &graph_in, + const ExpressionInfo &expr_in, CorpusProperties &props) - : graph(graph_in), cProps(props) { + : expr(expr_in), graph(graph_in), cProps(props) { // we do not support Utf8 for approximate matching - if (graph.edit_distance) { + if (expr.edit_distance) { throw CorpusGenerationFailure("UTF-8 for edited patterns is not " "supported."); } @@ -681,11 +692,12 @@ CorpusGenerator::~CorpusGenerator() { } // External entry point -unique_ptr makeCorpusGenerator(const NGWrapper &graph, +unique_ptr makeCorpusGenerator(const NGHolder &graph, + const ExpressionInfo &expr, CorpusProperties &props) { - if (graph.utf8) { - return ue2::make_unique(graph, props); + if (expr.utf8) { + return ue2::make_unique(graph, expr, props); } else { - return ue2::make_unique(graph, props); + return ue2::make_unique(graph, expr, props); } } diff --git a/util/ng_corpus_generator.h b/util/ng_corpus_generator.h index a02721bd..f230a10d 100644 --- a/util/ng_corpus_generator.h +++ b/util/ng_corpus_generator.h @@ -41,7 +41,8 @@ namespace ue2 { -class NGWrapper; +class ExpressionInfo; +class NGHolder; } // namespace ue2 @@ -68,6 +69,7 @@ public: /** \brief Build a concrete impl conforming to the \ref CorpusGenerator * interface. */ std::unique_ptr -makeCorpusGenerator(const ue2::NGWrapper &graph, CorpusProperties &props); +makeCorpusGenerator(const ue2::NGHolder &g, const ue2::ExpressionInfo &expr, + CorpusProperties &props); #endif