mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-18 18:20:35 +03:00
Initial commit of Hyperscan
This commit is contained in:
599
src/nfagraph/ng.cpp
Normal file
599
src/nfagraph/ng.cpp
Normal file
@@ -0,0 +1,599 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NG, NGHolder, NGWrapper and graph handling.
|
||||
*/
|
||||
#include "grey.h"
|
||||
#include "ng.h"
|
||||
#include "ng_anchored_acyclic.h"
|
||||
#include "ng_anchored_dots.h"
|
||||
#include "ng_asserts.h"
|
||||
#include "ng_calc_components.h"
|
||||
#include "ng_cyclic_redundancy.h"
|
||||
#include "ng_dump.h"
|
||||
#include "ng_edge_redundancy.h"
|
||||
#include "ng_equivalence.h"
|
||||
#include "ng_extparam.h"
|
||||
#include "ng_fixed_width.h"
|
||||
#include "ng_haig.h"
|
||||
#include "ng_literal_component.h"
|
||||
#include "ng_literal_decorated.h"
|
||||
#include "ng_misc_opt.h"
|
||||
#include "ng_puff.h"
|
||||
#include "ng_prefilter.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_redundancy.h"
|
||||
#include "ng_region.h"
|
||||
#include "ng_region_redundancy.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_rose.h"
|
||||
#include "ng_sep.h"
|
||||
#include "ng_small_literal_set.h"
|
||||
#include "ng_som.h"
|
||||
#include "ng_vacuous.h"
|
||||
#include "ng_utf8.h"
|
||||
#include "ng_util.h"
|
||||
#include "ng_width.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/goughcompile.h"
|
||||
#include "smallwrite/smallwrite_build.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/container.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
NG::NG(const CompileContext &in_cc, unsigned in_somPrecision)
|
||||
: maxSomRevHistoryAvailable(in_cc.grey.somMaxRevNfaLength),
|
||||
minWidth(depth::infinity()),
|
||||
rm(in_cc.grey),
|
||||
ssm(in_somPrecision),
|
||||
cc(in_cc),
|
||||
rose(makeRoseBuilder(rm, ssm, cc, boundary)),
|
||||
smwr(makeSmallWriteBuilder(rm, cc)) {
|
||||
}
|
||||
|
||||
NG::~NG() {
|
||||
// empty
|
||||
}
|
||||
|
||||
/** \brief SOM handling code, called by \ref addComponent.
|
||||
*
|
||||
* \return true if the component was handled completely by something (e.g. a
|
||||
* Haig outfix), false if SOM could be established but implementation via an
|
||||
* engine will be required.
|
||||
*
|
||||
* \throw CompileError if SOM cannot be supported for the component.
|
||||
*/
|
||||
static
|
||||
bool addComponentSom(NG &ng, NGHolder &g, const NGWrapper &w,
|
||||
const som_type som, const u32 comp_id) {
|
||||
DEBUG_PRINTF("doing som\n");
|
||||
dumpComponent(g, "03_presom", w.expressionIndex, comp_id, ng.cc.grey);
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
// First, we try the "SOM chain" support in ng_som.cpp.
|
||||
|
||||
sombe_rv rv = doSom(ng, g, w, comp_id, som);
|
||||
if (rv == SOMBE_HANDLED_INTERNAL) {
|
||||
return false;
|
||||
} else if (rv == SOMBE_HANDLED_ALL) {
|
||||
return true;
|
||||
}
|
||||
assert(rv == SOMBE_FAIL);
|
||||
|
||||
/* Next, Sombe style approaches */
|
||||
rv = doSomWithHaig(ng, g, w, comp_id, som);
|
||||
if (rv == SOMBE_HANDLED_INTERNAL) {
|
||||
return false;
|
||||
} else if (rv == SOMBE_HANDLED_ALL) {
|
||||
return true;
|
||||
}
|
||||
assert(rv == SOMBE_FAIL);
|
||||
|
||||
// If the previous approach could not support this pattern, we try treating
|
||||
// it monolithically, as a Haig outfix.
|
||||
|
||||
vector<vector<CharReach> > triggers; /* empty for outfix */
|
||||
|
||||
assert(g.kind == NFA_OUTFIX);
|
||||
dumpComponent(g, "haig", w.expressionIndex, comp_id, ng.cc.grey);
|
||||
auto haig = attemptToBuildHaig(g, som, ng.ssm.somPrecision(), triggers,
|
||||
ng.cc.grey);
|
||||
if (haig) {
|
||||
DEBUG_PRINTF("built haig outfix\n");
|
||||
ng.rose->addOutfix(g, *haig);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Our various strategies for supporting SOM for this pattern have failed.
|
||||
* Provide a generic pattern not supported/too large return value as it is
|
||||
* unclear what the meaning of a specific SOM error would be */
|
||||
throw CompileError(w.expressionIndex, "Pattern is too large.");
|
||||
|
||||
assert(0); // unreachable
|
||||
return false;
|
||||
}
|
||||
|
||||
void reduceGraph(NGHolder &g, som_type som, bool utf8,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.performGraphSimplification) {
|
||||
return;
|
||||
}
|
||||
|
||||
// We run reduction passes until either the graph stops changing or we hit
|
||||
// a (small) limit.
|
||||
|
||||
if (!som) {
|
||||
mergeCyclicDotStars(g);
|
||||
}
|
||||
|
||||
const unsigned MAX_PASSES = 3;
|
||||
for (unsigned pass = 1; pass <= MAX_PASSES; pass++) {
|
||||
bool changed = false;
|
||||
DEBUG_PRINTF("reduce pass %u/%u\n", pass, MAX_PASSES);
|
||||
changed |= removeEdgeRedundancy(g, som, cc);
|
||||
changed |= reduceGraphEquivalences(g, cc);
|
||||
changed |= removeRedundancy(g, som);
|
||||
if (!changed) {
|
||||
DEBUG_PRINTF("graph unchanged after pass %u, stopping\n", pass);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (utf8) {
|
||||
utf8DotRestoration(g, som);
|
||||
}
|
||||
|
||||
/* Minor non-redundancy improvements */
|
||||
if (improveGraph(g, som)) {
|
||||
/* may be some more edges to remove */
|
||||
removeEdgeRedundancy(g, som, cc);
|
||||
}
|
||||
|
||||
removeCyclicPathRedundancy(g);
|
||||
removeCyclicDominated(g, som);
|
||||
|
||||
if (!som) {
|
||||
mergeCyclicDotStars(g);
|
||||
}
|
||||
|
||||
if (!som) {
|
||||
removeSiblingsOfStartDotStar(g);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som,
|
||||
const u32 comp_id) {
|
||||
const CompileContext &cc = ng.cc;
|
||||
|
||||
DEBUG_PRINTF("expr=%u, comp=%u: %zu vertices, %zu edges\n",
|
||||
w.expressionIndex, comp_id, num_vertices(g), num_edges(g));
|
||||
|
||||
dumpComponent(g, "01_begin", w.expressionIndex, comp_id, ng.cc.grey);
|
||||
|
||||
reduceGraph(g, som, w.utf8, cc);
|
||||
|
||||
dumpComponent(g, "02_reduced", w.expressionIndex, comp_id, ng.cc.grey);
|
||||
|
||||
// There may be redundant regions that we can remove
|
||||
if (cc.grey.performGraphSimplification) {
|
||||
removeRegionRedundancy(g, som);
|
||||
}
|
||||
|
||||
// "Short Exhaustible Passthrough" patterns always become outfixes.
|
||||
if (!som && isSEP(g, ng.rm, cc.grey)) {
|
||||
DEBUG_PRINTF("graph is SEP\n");
|
||||
if (ng.rose->addOutfix(g)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Start Of Match handling.
|
||||
if (som) {
|
||||
if (addComponentSom(ng, g, w, som, comp_id)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (splitOffAnchoredAcyclic(*ng.rose, g, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (handleSmallLiteralSets(*ng.rose, g, cc)
|
||||
|| handleFixedWidth(*ng.rose, g, cc.grey)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (handleDecoratedLiterals(*ng.rose, g, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (splitOffRose(*ng.rose, g, w.prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (handleSmallLiteralSets(*ng.rose, g, cc)
|
||||
|| handleFixedWidth(*ng.rose, g, cc.grey)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (handleDecoratedLiterals(*ng.rose, g, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (splitOffRose(*ng.rose, g, w.prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// A final pass at cyclic redundancy and Rose
|
||||
// TODO: investigate - coverage results suggest that this never succeeds?
|
||||
if (cc.grey.performGraphSimplification) {
|
||||
if (removeCyclicPathRedundancy(g) ||
|
||||
removeCyclicDominated(g, som)) {
|
||||
if (handleFixedWidth(*ng.rose, g, cc.grey)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (finalChanceRose(*ng.rose, g, w.prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("testing for outfix\n");
|
||||
assert(allMatchStatesHaveReports(g));
|
||||
if (ng.rose->addOutfix(g)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if all components have been added.
|
||||
static
|
||||
bool processComponents(NG &ng, NGWrapper &w,
|
||||
deque<unique_ptr<NGHolder>> &g_comp,
|
||||
const som_type som) {
|
||||
const u32 num_components = g_comp.size();
|
||||
|
||||
u32 failed = 0;
|
||||
for (u32 i = 0; i < num_components; i++) {
|
||||
if (!g_comp[i]) {
|
||||
continue;
|
||||
}
|
||||
if (addComponent(ng, *g_comp[i], w, som, i)) {
|
||||
g_comp[i].reset();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (som) { /* bail immediately */
|
||||
return false;
|
||||
}
|
||||
failed++;
|
||||
}
|
||||
|
||||
if (!failed) {
|
||||
DEBUG_PRINTF("all components claimed\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%u components still remain\n", failed);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NG::addGraph(NGWrapper &w) {
|
||||
// remove reports that aren't on vertices connected to accept.
|
||||
clearReports(w);
|
||||
|
||||
som_type som = w.som;
|
||||
if (som && isVacuous(w)) {
|
||||
throw CompileError(w.expressionIndex, "Start of match is not "
|
||||
"currently supported for patterns which match an "
|
||||
"empty buffer.");
|
||||
}
|
||||
|
||||
dumpDotWrapper(w, "01_initial", cc.grey);
|
||||
assert(allMatchStatesHaveReports(w));
|
||||
|
||||
/* ensure utf8 starts at cp boundary */
|
||||
ensureCodePointStart(rm, w);
|
||||
resolveAsserts(rm, w);
|
||||
|
||||
dumpDotWrapper(w, "02_post_assert_resolve", cc.grey);
|
||||
assert(allMatchStatesHaveReports(w));
|
||||
|
||||
pruneUseless(w);
|
||||
pruneEmptyVertices(w);
|
||||
|
||||
if (can_never_match(w)) {
|
||||
throw CompileError(w.expressionIndex, "Pattern can never match.");
|
||||
}
|
||||
|
||||
optimiseVirtualStarts(w); /* good for som */
|
||||
|
||||
handleExtendedParams(rm, w, cc);
|
||||
if (w.min_length) {
|
||||
// We have a minimum length constraint, which we currently use SOM to
|
||||
// satisfy.
|
||||
som = SOM_LEFT;
|
||||
ssm.somPrecision(8);
|
||||
}
|
||||
|
||||
if (som) {
|
||||
rose->setSom();
|
||||
}
|
||||
|
||||
// first, we can perform graph work that can be done on an individual
|
||||
// expression basis.
|
||||
|
||||
if (w.utf8) {
|
||||
relaxForbiddenUtf8(w);
|
||||
}
|
||||
|
||||
if (w.highlander && !w.min_length && !w.min_offset) {
|
||||
// In highlander mode: if we don't have constraints on our reports that
|
||||
// may prevent us accepting our first match (i.e. extended params) we
|
||||
// can prune the other out-edges of all vertices connected to accept.
|
||||
pruneHighlanderAccepts(w, rm);
|
||||
}
|
||||
|
||||
dumpDotWrapper(w, "02b_fairly_early", cc.grey);
|
||||
|
||||
// If we're a vacuous pattern, we can handle this early.
|
||||
if (splitOffVacuous(boundary, rm, w)) {
|
||||
DEBUG_PRINTF("split off vacuous\n");
|
||||
}
|
||||
|
||||
// We might be done at this point: if we've run out of vertices, we can
|
||||
// stop processing.
|
||||
if (num_vertices(w) == N_SPECIALS) {
|
||||
DEBUG_PRINTF("all vertices claimed by vacuous handling\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Now that vacuous edges have been removed, update the min width exclusive
|
||||
// of boundary reports.
|
||||
minWidth = min(minWidth, findMinWidth(w));
|
||||
|
||||
// Add the pattern to the small write builder.
|
||||
smwr->add(w);
|
||||
|
||||
if (!som) {
|
||||
removeSiblingsOfStartDotStar(w);
|
||||
}
|
||||
|
||||
dumpDotWrapper(w, "03_early", cc.grey);
|
||||
|
||||
// If we've got some literals that span the graph from start to accept, we
|
||||
// can split them off into Rose from here.
|
||||
if (!som) {
|
||||
if (splitOffLiterals(*this, w)) {
|
||||
DEBUG_PRINTF("some vertices claimed by literals\n");
|
||||
}
|
||||
}
|
||||
|
||||
// We might be done at this point: if we've run out of vertices, we can
|
||||
// stop processing.
|
||||
if (num_vertices(w) == N_SPECIALS) {
|
||||
DEBUG_PRINTF("all vertices claimed before calc components\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Split the graph into a set of connected components.
|
||||
|
||||
deque<unique_ptr<NGHolder>> g_comp = calcComponents(w);
|
||||
assert(!g_comp.empty());
|
||||
|
||||
if (!som) {
|
||||
for (u32 i = 0; i < g_comp.size(); i++) {
|
||||
assert(g_comp[i]);
|
||||
reformLeadingDots(*g_comp[i]);
|
||||
}
|
||||
|
||||
recalcComponents(g_comp);
|
||||
}
|
||||
|
||||
if (processComponents(*this, w, g_comp, som)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If we're in prefiltering mode, we can run the prefilter reductions and
|
||||
// have another shot at accepting the graph.
|
||||
|
||||
if (cc.grey.prefilterReductions && w.prefilter) {
|
||||
for (u32 i = 0; i < g_comp.size(); i++) {
|
||||
if (!g_comp[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
prefilterReductions(*g_comp[i], cc);
|
||||
}
|
||||
|
||||
if (processComponents(*this, w, g_comp, som)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// We must have components that could not be compiled.
|
||||
for (u32 i = 0; i < g_comp.size(); i++) {
|
||||
if (g_comp[i]) {
|
||||
DEBUG_PRINTF("could not compile component %u with %zu vertices\n",
|
||||
i, num_vertices(*g_comp[i]));
|
||||
throw CompileError(w.expressionIndex, "Pattern is too large.");
|
||||
}
|
||||
}
|
||||
|
||||
assert(0); // should have thrown.
|
||||
return false;
|
||||
}
|
||||
|
||||
/** \brief Used from SOM mode to add an arbitrary NGHolder as an engine. */
|
||||
bool NG::addHolder(NGHolder &w) {
|
||||
DEBUG_PRINTF("adding holder of %zu states\n", num_vertices(w));
|
||||
assert(allMatchStatesHaveReports(w));
|
||||
assert(hasCorrectlyNumberedVertices(w));
|
||||
|
||||
/* We don't update the global minWidth here as we care about the min width
|
||||
* of the whole pattern - not a just a prefix of it. */
|
||||
|
||||
bool prefilter = false;
|
||||
//dumpDotComp(comp, w, *this, 20, "prefix_init");
|
||||
|
||||
som_type som = SOM_NONE; /* the prefixes created by the SOM code do not
|
||||
themselves track som */
|
||||
bool utf8 = false; // handling done earlier
|
||||
reduceGraph(w, som, utf8, cc);
|
||||
|
||||
// There may be redundant regions that we can remove
|
||||
if (cc.grey.performGraphSimplification) {
|
||||
removeRegionRedundancy(w, som);
|
||||
}
|
||||
|
||||
// "Short Exhaustible Passthrough" patterns always become outfixes.
|
||||
if (isSEP(w, rm, cc.grey)) {
|
||||
DEBUG_PRINTF("graph is SEP\n");
|
||||
if (rose->addOutfix(w)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (splitOffAnchoredAcyclic(*rose, w, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (handleSmallLiteralSets(*rose, w, cc)
|
||||
|| handleFixedWidth(*rose, w, cc.grey)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (handleDecoratedLiterals(*rose, w, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (splitOffRose(*rose, w, prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
if (splitOffPuffs(*rose, rm, w, prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
if (splitOffRose(*rose, w, prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
if (finalChanceRose(*rose, w, prefilter, cc)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("trying for outfix\n");
|
||||
if (rose->addOutfix(w)) {
|
||||
DEBUG_PRINTF("ok\n");
|
||||
return true;
|
||||
}
|
||||
DEBUG_PRINTF("trying for outfix - failed\n");
|
||||
DEBUG_PRINTF("nobody would take us\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NG::addLiteral(const ue2_literal &literal, u32 expr_index,
|
||||
u32 external_report, bool highlander, som_type som) {
|
||||
assert(!literal.empty());
|
||||
|
||||
if (!cc.grey.shortcutLiterals) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// We can't natively handle arbitrary literals with mixed case sensitivity
|
||||
// in Rose -- they require mechanisms like benefits masks, which have
|
||||
// length limits etc. Better to let those go through full graph processing.
|
||||
if (mixed_sensitivity(literal)) {
|
||||
DEBUG_PRINTF("mixed sensitivity\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Register external report and validate highlander constraints.
|
||||
rm.registerExtReport(external_report,
|
||||
external_report_info(highlander, expr_index));
|
||||
|
||||
ReportID id;
|
||||
if (som) {
|
||||
assert(!highlander); // not allowed, checked earlier.
|
||||
Report r = makeSomRelativeCallback(external_report, 0, literal.length());
|
||||
id = rm.getInternalId(r);
|
||||
rose->setSom();
|
||||
} else {
|
||||
u32 ekey = highlander ? rm.getExhaustibleKey(external_report)
|
||||
: INVALID_EKEY;
|
||||
Report r = makeECallback(external_report, 0, ekey);
|
||||
id = rm.getInternalId(r);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("success: graph is literal '%s', report ID %u\n",
|
||||
dumpString(literal).c_str(), id);
|
||||
|
||||
rose->add(false, false, literal, {id});
|
||||
|
||||
minWidth = min(minWidth, depth(literal.length()));
|
||||
|
||||
smwr->add(literal, id); /* inform small write handler about this literal */
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
NGWrapper::NGWrapper(unsigned int ei, bool highlander_in, bool utf8_in,
|
||||
bool prefilter_in, som_type som_in, ReportID r,
|
||||
u64a min_offset_in, u64a max_offset_in, u64a min_length_in)
|
||||
: expressionIndex(ei), reportId(r), highlander(highlander_in),
|
||||
utf8(utf8_in), prefilter(prefilter_in), som(som_in),
|
||||
min_offset(min_offset_in), max_offset(max_offset_in),
|
||||
min_length(min_length_in) {
|
||||
// All special nodes/edges are added in NGHolder's constructor.
|
||||
DEBUG_PRINTF("built %p: expr=%u report=%u%s%s%s%s "
|
||||
"min_offset=%llu max_offset=%llu min_length=%llu\n",
|
||||
this, expressionIndex, reportId,
|
||||
highlander ? " highlander" : "",
|
||||
utf8 ? " utf8" : "",
|
||||
prefilter ? " prefilter" : "",
|
||||
(som != SOM_NONE) ? " som" : "",
|
||||
min_offset, max_offset, min_length);
|
||||
}
|
||||
|
||||
NGWrapper::~NGWrapper() {}
|
||||
|
||||
} // namespace ue2
|
||||
133
src/nfagraph/ng.h
Normal file
133
src/nfagraph/ng.h
Normal file
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NG, NGHolder, NGWrapper declarations.
|
||||
*/
|
||||
|
||||
#ifndef NG_H
|
||||
#define NG_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ue2common.h"
|
||||
#include "parser/position.h"
|
||||
#include "som/slot_manager.h"
|
||||
#include "som/som.h"
|
||||
#include "util/boundary_reports.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CompileContext;
|
||||
struct ue2_literal;
|
||||
|
||||
class NGWrapper : public NGHolder {
|
||||
public:
|
||||
NGWrapper(unsigned int expressionIndex, bool highlander, bool utf8,
|
||||
bool prefilter, const som_type som, ReportID rid, u64a min_offset,
|
||||
u64a max_offset, u64a min_length);
|
||||
|
||||
~NGWrapper();
|
||||
|
||||
/** index of the expression represented by this graph, used
|
||||
* - down the track in error handling
|
||||
* - identifying parts of an expression in highlander mode
|
||||
*/
|
||||
const unsigned int expressionIndex;
|
||||
|
||||
const ReportID reportId; /**< user-visible report id */
|
||||
const bool highlander; /**< user-specified single match only */
|
||||
const bool utf8; /**< UTF-8 mode */
|
||||
const bool prefilter; /**< prefiltering mode */
|
||||
const som_type som; /**< SOM type requested */
|
||||
u64a min_offset; /**< extparam min_offset value */
|
||||
u64a max_offset; /**< extparam max_offset value */
|
||||
u64a min_length; /**< extparam min_length value */
|
||||
};
|
||||
|
||||
class RoseBuild;
|
||||
class SmallWriteBuild;
|
||||
|
||||
class NG : boost::noncopyable {
|
||||
public:
|
||||
NG(const CompileContext &in_cc, unsigned in_somPrecision);
|
||||
~NG();
|
||||
|
||||
/** \brief Consumes a pattern, returns false or throws a CompileError
|
||||
* exception if the graph cannot be consumed. */
|
||||
bool addGraph(NGWrapper &w);
|
||||
|
||||
/** \brief Consumes a graph, cut-down version of addGraph for use by SOM
|
||||
* processing. */
|
||||
bool addHolder(NGHolder &h);
|
||||
|
||||
/** \brief Adds a literal to Rose, used by literal shortcut passes (instead of
|
||||
* using \ref addGraph) */
|
||||
bool addLiteral(const ue2_literal &lit, u32 expr_index, u32 external_report,
|
||||
bool highlander, som_type som);
|
||||
|
||||
/** \brief Maximum history in bytes available for use by SOM reverse NFAs,
|
||||
* a hack for pattern support (see UE-1903). This is always set to the max
|
||||
* "lookbehind" length. */
|
||||
const u32 maxSomRevHistoryAvailable;
|
||||
|
||||
/** \brief The length of the shortest corpus which can match a pattern
|
||||
* contained in the NG (excluding the boundary reports used by vacuous
|
||||
* patterns, which give an effective minWidth of zero). */
|
||||
depth minWidth;
|
||||
|
||||
ReportManager rm;
|
||||
SomSlotManager ssm;
|
||||
BoundaryReports boundary;
|
||||
const CompileContext cc;
|
||||
|
||||
const std::unique_ptr<RoseBuild> rose; //!< Rose builder.
|
||||
const std::unique_ptr<SmallWriteBuild> smwr; //!< SmallWrite builder.
|
||||
};
|
||||
|
||||
/** \brief Run graph reduction passes.
|
||||
*
|
||||
* Shared with the small write compiler.
|
||||
*/
|
||||
void reduceGraph(NGHolder &g, som_type som, bool utf8, const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
67
src/nfagraph/ng_anchored_acyclic.cpp
Normal file
67
src/nfagraph/ng_anchored_acyclic.cpp
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Anchored acyclic graph -> DFA analysis.
|
||||
*/
|
||||
#include "ng_anchored_acyclic.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "util/compile_context.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowAnchoredAcyclic) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isAnchored(h)) {
|
||||
DEBUG_PRINTF("fail, not anchored\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isAcyclic(h)) {
|
||||
DEBUG_PRINTF("fail, not acyclic\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (rose.addAnchoredAcyclic(h)) {
|
||||
return true;
|
||||
} else {
|
||||
DEBUG_PRINTF("failed to add anchored nfa\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
49
src/nfagraph/ng_anchored_acyclic.h
Normal file
49
src/nfagraph/ng_anchored_acyclic.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Anchored acyclic graph -> DFA analysis.
|
||||
*/
|
||||
|
||||
#ifndef NG_ANCHORED_ACYCLIC_H
|
||||
#define NG_ANCHORED_ACYCLIC_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class RoseBuild;
|
||||
struct CompileContext;
|
||||
|
||||
/** \brief Attempt to consume the entire pattern in graph \a h as an anchored
|
||||
* acyclic DFA. Returns true if successful. */
|
||||
bool splitOffAnchoredAcyclic(RoseBuild &rose, const NGHolder &h,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_ANCHORED_ACYCLIC_H
|
||||
654
src/nfagraph/ng_anchored_dots.cpp
Normal file
654
src/nfagraph/ng_anchored_dots.cpp
Normal file
@@ -0,0 +1,654 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Analysis pass to reform leading dots.
|
||||
*
|
||||
* We have found that many regexes found in the wild use an anchored dot-repeat
|
||||
* to represent an unanchored pattern, particularly if they have been used with
|
||||
* a regex engine that assumes that a pattern is anchored. This pass reforms
|
||||
* patterns that begin with sequences of dots into a more standard form.
|
||||
*
|
||||
* In addition, both anchored and unanchored patterns with dot repeats as
|
||||
* prefixes will have these prefixes reformed into a canonical form, which some
|
||||
* later analyses depend upon.
|
||||
*/
|
||||
#include "ng_anchored_dots.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool findStarts(const NGHolder &g, set<NFAVertex> &anchored,
|
||||
set<NFAVertex> &unanchored) {
|
||||
// Populate unanchored map
|
||||
for (auto v : adjacent_vertices_range(g.startDs, g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
unanchored.insert(v);
|
||||
}
|
||||
|
||||
// Populate anchored map
|
||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
anchored.insert(v);
|
||||
}
|
||||
|
||||
if (unanchored == anchored) {
|
||||
anchored.clear();
|
||||
} else if (!unanchored.empty() && !anchored.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return !anchored.empty() || !unanchored.empty();
|
||||
}
|
||||
|
||||
namespace {
|
||||
class DotInfo {
|
||||
public:
|
||||
DotInfo(NFAVertex v, bool se, u32 idx)
|
||||
: vertex(v), hasSelfLoop(se), index(idx) {}
|
||||
|
||||
bool operator<(const DotInfo &other) const {
|
||||
if (hasSelfLoop != other.hasSelfLoop)
|
||||
return hasSelfLoop < other.hasSelfLoop;
|
||||
// tie break with vertex id: lowest ID wins
|
||||
return index > other.index;
|
||||
}
|
||||
|
||||
NFAVertex vertex;
|
||||
bool hasSelfLoop;
|
||||
u32 index;
|
||||
};
|
||||
}
|
||||
|
||||
// Returns nullptr if all vertices in the given set are not dots.
|
||||
// We can only pick one dot vertex, so we go for a dot-star if it exists,
|
||||
// otherwise the dot without a self-edge with the lowest ID.
|
||||
static
|
||||
NFAVertex findReformable(const NGHolder &g, const set<NFAVertex> &starts,
|
||||
set<NFAVertex> &otherV) {
|
||||
priority_queue<DotInfo> dotq;
|
||||
for (auto v : starts) {
|
||||
if (is_dot(v, g)) {
|
||||
u32 idx = g[v].index;
|
||||
dotq.push(DotInfo(v, hasSelfLoop(v, g), idx));
|
||||
}
|
||||
}
|
||||
|
||||
if (dotq.empty()) {
|
||||
return NFAGraph::null_vertex();
|
||||
}
|
||||
|
||||
const DotInfo &dot = dotq.top();
|
||||
otherV = starts;
|
||||
otherV.erase(dot.vertex);
|
||||
DEBUG_PRINTF("selected dot vertex %u (%s)\n", dot.index,
|
||||
dot.hasSelfLoop ? "has self-edge" : "no self-edge");
|
||||
DEBUG_PRINTF("%zu other vertices\n", otherV.size());
|
||||
return dot.vertex;
|
||||
}
|
||||
|
||||
// Returns true if the given vertex is only preceded by start. If start is
|
||||
// graph.startDs (i.e. unanchored), the given vertex can also be connected to
|
||||
// graph.start. If selfLoopIsAcceptable is set, self-loops are ignored.
|
||||
static
|
||||
bool isStartNode(NFAVertex v, NFAVertex start, const NGHolder &g,
|
||||
bool selfLoopIsAcceptable) {
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (selfLoopIsAcceptable && u == v) {
|
||||
continue;
|
||||
} else if (u == start) {
|
||||
continue;
|
||||
} else if (start == g.startDs && u == g.start) {
|
||||
continue;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Note: this will only remove the anchored first dot in the chain -- any other
|
||||
// removable nodes will be handled by the unanchored case below.
|
||||
static
|
||||
void reformAnchoredRepeatsComponent(NGHolder &g,
|
||||
set<NFAVertex> &compAnchoredStarts,
|
||||
set<NFAVertex> &compUnanchoredStarts,
|
||||
set<NFAVertex> &dead, depth *startBegin,
|
||||
depth *startEnd) {
|
||||
// anchored cases can not have any unanchored starts
|
||||
if (!compUnanchoredStarts.empty()) {
|
||||
DEBUG_PRINTF("we have unanchored starts, skipping\n");
|
||||
return;
|
||||
}
|
||||
|
||||
NFAVertex dotV = NFAGraph::null_vertex();
|
||||
set<NFAVertex> otherV;
|
||||
dotV = findReformable(g, compAnchoredStarts, otherV);
|
||||
if (dotV == NFAGraph::null_vertex()) {
|
||||
DEBUG_PRINTF("no candidate reformable dot found.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
NFAEdge loopEdge;
|
||||
bool selfLoop = false;
|
||||
bool bustOut = false;
|
||||
|
||||
for (const auto &e : out_edges_range(dotV, g)) {
|
||||
NFAVertex t = target(e, g);
|
||||
if (t == dotV) {
|
||||
selfLoop = true;
|
||||
loopEdge = e;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_special(t, g)) {
|
||||
bustOut = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!otherV.empty() && otherV.find(t) == otherV.end()) {
|
||||
bustOut = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bustOut) {
|
||||
DEBUG_PRINTF("busting out\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isStartNode(dotV, g.start, g, true)) {
|
||||
DEBUG_PRINTF("fleeing: vertex %u has other preds\n", g[dotV].index);
|
||||
return;
|
||||
}
|
||||
|
||||
/* get bounds */
|
||||
depth min;
|
||||
depth max = 1;
|
||||
|
||||
if (selfLoop) {
|
||||
// A self-loop indicates that this is a '.+' or '.*'
|
||||
max = depth::infinity();
|
||||
}
|
||||
|
||||
if (!otherV.empty()) {
|
||||
/* We require that the successors of the dot node are are the same
|
||||
* as the start vertex. TODO: remember why.
|
||||
*/
|
||||
if (selfLoop) {
|
||||
if (otherV.size() != out_degree(dotV, g) - 1) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (otherV.size() != out_degree(dotV, g)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
min = 0;
|
||||
} else {
|
||||
min = 1;
|
||||
}
|
||||
|
||||
*startBegin = min;
|
||||
*startEnd = max;
|
||||
|
||||
for (auto t : adjacent_vertices_range(dotV, g)) {
|
||||
if (t != dotV) {
|
||||
add_edge_if_not_present(g.startDs, t, g);
|
||||
add_edge_if_not_present(g.start, t, g);
|
||||
compUnanchoredStarts.insert(t);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto v : otherV) {
|
||||
remove_edge(g.start, v, g);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("removing vertex %u\n", g[dotV].index);
|
||||
clear_vertex(dotV, g);
|
||||
dead.insert(dotV);
|
||||
compAnchoredStarts.erase(dotV);
|
||||
}
|
||||
|
||||
static
|
||||
void reformUnanchoredRepeatsComponent(NGHolder &g,
|
||||
set<NFAVertex> &compAnchoredStarts,
|
||||
set<NFAVertex> &compUnanchoredStarts,
|
||||
set<NFAVertex> &dead,
|
||||
depth *startBegin, depth *startEnd) {
|
||||
// unanchored cases can not have any anchored starts
|
||||
if (!compAnchoredStarts.empty()) {
|
||||
DEBUG_PRINTF("we have anchored starts, skipping\n");
|
||||
return;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
NFAVertex dotV = NFAGraph::null_vertex();
|
||||
set<NFAVertex> otherV;
|
||||
dotV = findReformable(g, compUnanchoredStarts, otherV);
|
||||
if (dotV == NFAGraph::null_vertex()) {
|
||||
DEBUG_PRINTF("no candidate reformable dot found.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
NFAEdge loopEdge;
|
||||
bool selfLoop = false;
|
||||
bool bustOut = false;
|
||||
|
||||
for (const auto &e : out_edges_range(dotV, g)) {
|
||||
NFAVertex t = target(e, g);
|
||||
|
||||
if (t == dotV) {
|
||||
selfLoop = true;
|
||||
loopEdge = e;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_special(t, g)) {
|
||||
bustOut = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!otherV.empty() && otherV.find(t) == otherV.end()) {
|
||||
bustOut = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (bustOut) {
|
||||
DEBUG_PRINTF("busting out\n");
|
||||
if (!selfLoop) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto v : otherV) {
|
||||
if (!edge(dotV, v, g).second) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// A self-loop indicates that this is a '.+' or '.*'
|
||||
DEBUG_PRINTF("self-loop detected on %u\n", g[dotV].index);
|
||||
*startEnd = depth::infinity();
|
||||
remove_edge(dotV, dotV, g);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!isStartNode(dotV, g.startDs, g, true)) {
|
||||
DEBUG_PRINTF("fleeing: vertex %u has other preds\n", g[dotV].index);
|
||||
return;
|
||||
}
|
||||
|
||||
/* get bounds */
|
||||
depth min = 1;
|
||||
depth max = 1;
|
||||
|
||||
if (selfLoop) {
|
||||
// A self-loop indicates that this is a '.+' or '.*'
|
||||
DEBUG_PRINTF("self-loop detected\n");
|
||||
max = depth::infinity();
|
||||
}
|
||||
|
||||
if (!otherV.empty()) {
|
||||
if (!selfLoop && otherV.size() != out_degree(dotV, g)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (selfLoop && otherV.size() != out_degree(dotV, g) - 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (min > depth(1)) {
|
||||
/* this is not a case we can handle */
|
||||
DEBUG_PRINTF("min greater than one, skipping\n");
|
||||
return;
|
||||
}
|
||||
min = 0;
|
||||
}
|
||||
|
||||
*startBegin += min;
|
||||
*startEnd += max;
|
||||
|
||||
for (auto v : otherV) {
|
||||
remove_edge(g.start, v, g);
|
||||
remove_edge(g.startDs, v, g);
|
||||
}
|
||||
|
||||
compUnanchoredStarts.clear();
|
||||
for (auto t : adjacent_vertices_range(dotV, g)) {
|
||||
if (t != dotV) {
|
||||
DEBUG_PRINTF("connecting sds -> %u\n", g[t].index);
|
||||
add_edge(g.startDs, t, g);
|
||||
add_edge(g.start, t, g);
|
||||
compUnanchoredStarts.insert(t);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("removing vertex %u\n", g[dotV].index);
|
||||
dead.insert(dotV);
|
||||
clear_vertex(dotV, g);
|
||||
compUnanchoredStarts.erase(dotV);
|
||||
}
|
||||
}
|
||||
|
||||
// for t to be another optional dot, it must have only in-edges from v and from
|
||||
// starts
|
||||
static
|
||||
bool isOptionalDot(NFAVertex t, NFAVertex v, const NGHolder &g) {
|
||||
if (!is_dot(t, g)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool found_v = false, found_start = false;
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(t, g)) {
|
||||
if (u == v) {
|
||||
found_v = true;
|
||||
} else if (u == g.start || u == g.startDs) {
|
||||
found_start = true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return found_v && found_start;
|
||||
}
|
||||
|
||||
static
|
||||
bool gatherParticipants(const NGHolder &g,
|
||||
NFAVertex start, NFAVertex initialDot,
|
||||
set<NFAVertex> &dots, set<NFAVertex> &succ) {
|
||||
// Walk the graph downwards from the initial dot; each dot will have:
|
||||
// 1) a single optional dot successor, or
|
||||
// 2) N successors (our terminating case)
|
||||
dots.insert(initialDot);
|
||||
NFAVertex v = initialDot;
|
||||
|
||||
while (out_degree(v, g) == 1) {
|
||||
NFAVertex t = *(adjacent_vertices(v, g).first);
|
||||
// for t to be another optional dot, it must have only in-edges from v
|
||||
// and from starts
|
||||
if (isOptionalDot(t, v, g)) {
|
||||
// another dot; bail if we've seen it once already
|
||||
if (dots.find(t) != dots.end()) {
|
||||
DEBUG_PRINTF("cycle detected at vertex %u\n", g[t].index);
|
||||
return false;
|
||||
}
|
||||
dots.insert(t);
|
||||
v = t;
|
||||
continue;
|
||||
}
|
||||
// otherwise, we found a terminating dot state
|
||||
break;
|
||||
}
|
||||
|
||||
// Our terminating states are the successors of v.
|
||||
// All of these MUST have an edge from start as well.
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
succ.insert(w);
|
||||
if (!edge(start, w, g).second) {
|
||||
DEBUG_PRINTF("failing, vertex %u does not have edge from start\n",
|
||||
g[w].index);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* All the non chained v connected to start must be in succ as well
|
||||
* TODO: remember why (and document). */
|
||||
for (auto u : adjacent_vertices_range(start, g)) {
|
||||
if (is_special(u, g)) {
|
||||
continue;
|
||||
}
|
||||
if (!contains(dots, u) && !contains(succ, u)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return !succ.empty();
|
||||
}
|
||||
|
||||
static
|
||||
void collapseVariableDotRepeat(NGHolder &g, NFAVertex start,
|
||||
set<NFAVertex> &dead, UNUSED depth *startBegin,
|
||||
depth *startEnd) {
|
||||
// Handle optional dot repeat prefixes, e.g.
|
||||
// /^.{0,30}foo/s, /^.{0,5}foo/s, unanchored equivs
|
||||
// Note that this code assumes that fixed repeats ('^.{5,20}') have been
|
||||
// pruned already, down (in this case) to '^.{0,15}'.
|
||||
|
||||
// The first of our optional dots must be connected to start. The jump edge
|
||||
// past it will be verified in gatherParticipants(). If start is
|
||||
// graph.start, it should not be connected to startDs.
|
||||
NFAVertex initialDot = NFAGraph::null_vertex();
|
||||
for (auto v : adjacent_vertices_range(start, g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
if (is_dot(v, g) && isStartNode(v, start, g, false)) {
|
||||
if (initialDot) {
|
||||
return;
|
||||
}
|
||||
initialDot = v;
|
||||
DEBUG_PRINTF("initial dot vertex is %u\n", g[v].index);
|
||||
}
|
||||
}
|
||||
|
||||
if (!initialDot) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Collect all the other optional dot vertices and the successor vertices
|
||||
// by walking down the graph from initialDot
|
||||
set<NFAVertex> dots, succ;
|
||||
if (!gatherParticipants(g, start, initialDot, dots, succ)) {
|
||||
DEBUG_PRINTF("gatherParticipants failed\n");
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("optional dot repeat with %zu participants, "
|
||||
"terminating in %zu non-dot nodes\n",
|
||||
dots.size(), succ.size());
|
||||
|
||||
// Remove all the participants and set the start offset
|
||||
dead.insert(dots.begin(), dots.end());
|
||||
|
||||
DEBUG_PRINTF("current offsets: %s-%s\n", startBegin->str().c_str(),
|
||||
startEnd->str().c_str());
|
||||
|
||||
if (start == g.start && startEnd->is_infinite()) {
|
||||
*startEnd = dots.size();
|
||||
} else if (startEnd->is_finite()) {
|
||||
*startEnd += dots.size();
|
||||
}
|
||||
assert(startEnd->is_reachable());
|
||||
|
||||
// For determinism, copy and sort our successor vertices.
|
||||
deque<NFAVertex> s(succ.begin(), succ.end());
|
||||
sort(s.begin(), s.end(), make_index_ordering(g));
|
||||
|
||||
// Connect our successor vertices to both start and startDs.
|
||||
for (auto v : s) {
|
||||
add_edge_if_not_present(g.start, v, g);
|
||||
add_edge_if_not_present(g.startDs, v, g);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void deleteVertices(set<NFAVertex> &dead, NGHolder &g) {
|
||||
if (!dead.empty()) {
|
||||
DEBUG_PRINTF("pruning %zu vertices\n", dead.size());
|
||||
remove_vertices(dead, g);
|
||||
}
|
||||
dead.clear();
|
||||
}
|
||||
|
||||
static
|
||||
void reformAnchoredRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
|
||||
DEBUG_PRINTF("component\n");
|
||||
set<NFAVertex> anchored, unanchored, dead;
|
||||
if (!findStarts(g, anchored, unanchored)) {
|
||||
DEBUG_PRINTF("no starts\n");
|
||||
return;
|
||||
}
|
||||
|
||||
reformAnchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
|
||||
startEnd);
|
||||
deleteVertices(dead, g);
|
||||
|
||||
reformUnanchoredRepeatsComponent(g, anchored, unanchored, dead, startBegin,
|
||||
startEnd);
|
||||
deleteVertices(dead, g);
|
||||
}
|
||||
|
||||
static
|
||||
void collapseVariableRepeats(NGHolder &g, depth *startBegin, depth *startEnd) {
|
||||
DEBUG_PRINTF("collapseVariableRepeats\n");
|
||||
set<NFAVertex> dead;
|
||||
|
||||
collapseVariableDotRepeat(g, g.start, dead, startBegin, startEnd);
|
||||
deleteVertices(dead, g);
|
||||
|
||||
collapseVariableDotRepeat(g, g.startDs, dead, startBegin, startEnd);
|
||||
deleteVertices(dead, g);
|
||||
}
|
||||
|
||||
static
|
||||
void addDotsBetween(NGHolder &g, NFAVertex lhs, vector<NFAVertex> &rhs,
|
||||
depth min_repeat, depth max_repeat) {
|
||||
const bool unbounded = max_repeat.is_infinite();
|
||||
if (unbounded) {
|
||||
max_repeat = min_repeat;
|
||||
}
|
||||
|
||||
assert(max_repeat.is_finite());
|
||||
|
||||
NFAVertex u = lhs;
|
||||
|
||||
if (!min_repeat && unbounded) {
|
||||
NFAVertex v = add_vertex(g);
|
||||
add_edge(u, v, g);
|
||||
g[v].char_reach.setall();
|
||||
|
||||
for (auto w : rhs) {
|
||||
add_edge(lhs, w, g);
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < min_repeat; i++) {
|
||||
NFAVertex v = add_vertex(g);
|
||||
add_edge(u, v, g);
|
||||
g[v].char_reach.setall();
|
||||
u = v;
|
||||
}
|
||||
|
||||
NFAVertex split = u;
|
||||
/* lhs now split point for optional */
|
||||
for (u32 i = min_repeat; i < max_repeat; i++) {
|
||||
NFAVertex v = add_vertex(g);
|
||||
add_edge(u, v, g);
|
||||
if (u != split) {
|
||||
add_edge(split, v, g);
|
||||
}
|
||||
g[v].char_reach.setall();
|
||||
u = v;
|
||||
}
|
||||
|
||||
if (unbounded) {
|
||||
add_edge(u, u, g);
|
||||
}
|
||||
|
||||
for (auto w : rhs) {
|
||||
add_edge(u, w, g);
|
||||
if (split != u) {
|
||||
add_edge(split, w, g);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void restoreLeadingDots(NGHolder &g, const depth &startBegin,
|
||||
const depth &startEnd) {
|
||||
if (startBegin == depth(0) && startEnd.is_infinite()) {
|
||||
return;
|
||||
}
|
||||
DEBUG_PRINTF("ungobble (%s, %s)\n", startBegin.str().c_str(),
|
||||
startEnd.str().c_str());
|
||||
|
||||
for (UNUSED auto v : adjacent_vertices_range(g.start, g)) {
|
||||
assert(edge(g.startDs, v, g).second);
|
||||
}
|
||||
clear_out_edges(g.start, g);
|
||||
add_edge(g.start, g.startDs, g);
|
||||
|
||||
const bool unbounded = startEnd.is_infinite();
|
||||
|
||||
NFAVertex root = unbounded ? g.startDs : g.start;
|
||||
|
||||
vector<NFAVertex> rhs;
|
||||
insert(&rhs, rhs.end(), adjacent_vertices(g.startDs, g));
|
||||
rhs.erase(remove(rhs.begin(), rhs.end(), g.startDs), rhs.end());
|
||||
for (auto v : rhs) {
|
||||
remove_edge(g.startDs, v, g);
|
||||
}
|
||||
|
||||
addDotsBetween(g, root, rhs, startBegin, startEnd);
|
||||
g.renumberVertices();
|
||||
g.renumberEdges();
|
||||
}
|
||||
|
||||
// Entry point.
|
||||
void reformLeadingDots(NGHolder &g) {
|
||||
depth startBegin(0);
|
||||
depth startEnd = depth::infinity();
|
||||
|
||||
reformAnchoredRepeats(g, &startBegin, &startEnd);
|
||||
collapseVariableRepeats(g, &startBegin, &startEnd);
|
||||
restoreLeadingDots(g, startBegin, startEnd);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
45
src/nfagraph/ng_anchored_dots.h
Normal file
45
src/nfagraph/ng_anchored_dots.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Analysis pass to reform leading dots.
|
||||
*/
|
||||
|
||||
#ifndef NG_ANCHORED_BOUNDED_REPEATS_H
|
||||
#define NG_ANCHORED_BOUNDED_REPEATS_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
/* should not be used if SoM is required */
|
||||
void reformLeadingDots(NGHolder &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
559
src/nfagraph/ng_asserts.cpp
Normal file
559
src/nfagraph/ng_asserts.cpp
Normal file
@@ -0,0 +1,559 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Resolve special assert vertices.
|
||||
*
|
||||
* The assert resolution algorithm proceeds by iterating over those edges with
|
||||
* assertion flags, considering source and target vertices of each edge. If a
|
||||
* vertex has a superset of the reachability demanded by the assertion on the
|
||||
* edge, it is split into alternatives providing the word and non-word paths
|
||||
* through that vertex.
|
||||
*
|
||||
* A great deal of the complexity in the resolveAsserts pass is devoted to
|
||||
* handling these assertions when the UCP flag is specified (meaning \\w and \\W
|
||||
* are implemented with Unicode properties, rather than their ASCII
|
||||
* interpretation) and the prefiltering flag is also used. Complete,
|
||||
* non-prefiltering UCP support is not available yet.
|
||||
*/
|
||||
#include "ng_asserts.h"
|
||||
|
||||
#include "ng.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_redundancy.h"
|
||||
#include "ng_util.h"
|
||||
#include "parser/position.h" // for POS flags
|
||||
#include "util/bitutils.h" // for findAndClearLSB_32
|
||||
#include "util/boundary_reports.h"
|
||||
#include "util/container.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/unicode_def.h"
|
||||
|
||||
#include <queue>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Hard limit on the maximum number of vertices we'll clone before we
|
||||
* throw up our hands and report 'Pattern too large.' */
|
||||
static const size_t MAX_CLONED_VERTICES = 2048;
|
||||
|
||||
/** \brief The definition of \\w, since we use it everywhere in here. */
|
||||
static const CharReach CHARREACH_WORD(CharReach('a', 'z') |
|
||||
CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_'));
|
||||
|
||||
/** \brief \\W is the inverse of \\w */
|
||||
static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD);
|
||||
|
||||
/** \brief Prefiltering definition of \\w for UCP mode.
|
||||
*
|
||||
* Includes all high bytes as to capture all non-ASCII, however depending on
|
||||
* direction only continuers or starters are strictly required - as the input
|
||||
* is well-formed, this laxness will not cost us. */
|
||||
static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD
|
||||
| CharReach(128, 255));
|
||||
|
||||
/** \brief Prefiltering definition of \\W for UCP Mode.
|
||||
*
|
||||
* (non-word already includes high bytes) */
|
||||
static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD);
|
||||
|
||||
/** \brief Find all the edges with assertion flags. */
|
||||
static
|
||||
vector<NFAEdge> getAsserts(const NGHolder &g) {
|
||||
vector<NFAEdge> out;
|
||||
for (const auto &e : edges_range(g)) {
|
||||
if (g[e].assert_flags) {
|
||||
out.push_back(e);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static
|
||||
void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) {
|
||||
DEBUG_PRINTF("%u needs splitting\n", g[v].index);
|
||||
to_split->emplace(g[v].index, v);
|
||||
}
|
||||
|
||||
/** \brief Find vertices that need to be split due to an assertion edge.
|
||||
*
|
||||
* A vertex needs to be split if has an edge to/from it with an assert with a
|
||||
* restriction on the relevant end. */
|
||||
static
|
||||
void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
|
||||
map<u32, NFAVertex> *to_split,
|
||||
map<u32, NFAVertex> *to_split_ucp) {
|
||||
for (const auto &e : asserts) {
|
||||
NFAVertex u = source(e, g);
|
||||
NFAVertex v = target(e, g);
|
||||
u32 flags = g[e].assert_flags;
|
||||
assert(flags);
|
||||
|
||||
const CharReach &u_cr = g[u].char_reach;
|
||||
const CharReach &v_cr = g[v].char_reach;
|
||||
|
||||
bool ucp_assert = flags & UCP_ASSERT_FLAGS;
|
||||
bool normal_assert = flags & NON_UCP_ASSERT_FLAGS;
|
||||
/* In reality, an expression can only be entirely ucp or not ucp */
|
||||
assert(ucp_assert != normal_assert);
|
||||
|
||||
if (normal_assert) {
|
||||
/* assume any flag results in us have to split if the vertex is not
|
||||
* a subset of word or completely disjoint from it. We could be more
|
||||
* nuanced if flags is a disjunction of multiple assertions. */
|
||||
if (!u_cr.isSubsetOf(CHARREACH_WORD)
|
||||
&& !u_cr.isSubsetOf(CHARREACH_NONWORD)
|
||||
&& u != g.start) { /* start is always considered a nonword */
|
||||
addToSplit(g, u, to_split);
|
||||
}
|
||||
|
||||
if (!v_cr.isSubsetOf(CHARREACH_WORD)
|
||||
&& !v_cr.isSubsetOf(CHARREACH_NONWORD)
|
||||
&& v != g.accept /* accept require special handling, done on a
|
||||
* per edge basis in resolve asserts
|
||||
*/
|
||||
&& v != g.acceptEod) { /* eod is always considered a nonword */
|
||||
addToSplit(g, v, to_split);
|
||||
}
|
||||
}
|
||||
|
||||
if (ucp_assert) {
|
||||
/* note: the ucp prefilter crs overlap - requires a bit more care */
|
||||
if (u == g.start) { /* start never needs to be split,
|
||||
* treat nonword */
|
||||
} else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) {
|
||||
if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
|
||||
&& !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
|
||||
addToSplit(g, u, to_split_ucp);
|
||||
}
|
||||
} else {
|
||||
assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP);
|
||||
if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
|
||||
&& !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
|
||||
addToSplit(g, u, to_split_ucp);
|
||||
}
|
||||
}
|
||||
|
||||
if (v == g.acceptEod /* eod is always considered a nonword */
|
||||
|| v == g.accept) { /* accept require special handling, done on
|
||||
* a per edge basis in resolve asserts */
|
||||
} else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) {
|
||||
if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
|
||||
&& !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
|
||||
addToSplit(g, v, to_split_ucp);
|
||||
}
|
||||
} else {
|
||||
assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP);
|
||||
if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
|
||||
&& !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
|
||||
addToSplit(g, v, to_split_ucp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
|
||||
// Don't try and set the report ID of a special vertex.
|
||||
assert(!is_special(v, g));
|
||||
|
||||
// If there's a report set already, we're replacing it.
|
||||
g[v].reports.clear();
|
||||
|
||||
Report ir = rm.getBasicInternalReport(g, adj);
|
||||
|
||||
g[v].reports.insert(rm.getInternalId(ir));
|
||||
DEBUG_PRINTF("set report id for vertex %u, adj %d\n", g[v].index, adj);
|
||||
}
|
||||
|
||||
static
|
||||
NFAVertex makeClone(ReportManager &rm, NGWrapper &g, NFAVertex v,
|
||||
const CharReach &cr_mask) {
|
||||
NFAVertex clone = clone_vertex(g, v);
|
||||
g[clone].char_reach &= cr_mask;
|
||||
clone_out_edges(g, v, clone);
|
||||
clone_in_edges(g, v, clone);
|
||||
|
||||
if (v == g.startDs) {
|
||||
if (g.utf8) {
|
||||
g[clone].char_reach &= ~UTF_START_CR;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("marked as virt\n");
|
||||
g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
|
||||
setReportId(rm, g, clone, 0);
|
||||
}
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
static
|
||||
void splitVertex(ReportManager &rm, NGWrapper &g, NFAVertex v, bool ucp) {
|
||||
assert(v != g.start);
|
||||
assert(v != g.accept);
|
||||
assert(v != g.acceptEod);
|
||||
DEBUG_PRINTF("partitioning vertex %u ucp:%d\n", g[v].index, (int)ucp);
|
||||
|
||||
CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD;
|
||||
CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD;
|
||||
|
||||
auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
|
||||
|
||||
// Split v into word/nonword vertices with only asserting out-edges.
|
||||
NFAVertex w_out = makeClone(rm, g, v, cr_word);
|
||||
NFAVertex nw_out = makeClone(rm, g, v, cr_nonword);
|
||||
remove_out_edge_if(w_out, has_no_assert, g);
|
||||
remove_out_edge_if(nw_out, has_no_assert, g);
|
||||
|
||||
// Split v into word/nonword vertices with only asserting in-edges.
|
||||
NFAVertex w_in = makeClone(rm, g, v, cr_word);
|
||||
NFAVertex nw_in = makeClone(rm, g, v, cr_nonword);
|
||||
remove_in_edge_if(w_in, has_no_assert, g);
|
||||
remove_in_edge_if(nw_in, has_no_assert, g);
|
||||
|
||||
// Prune edges with asserts from original v.
|
||||
auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; };
|
||||
remove_in_edge_if(v, has_assert, g);
|
||||
remove_out_edge_if(v, has_assert, g);
|
||||
}
|
||||
|
||||
static
|
||||
void resolveEdges(ReportManager &rm, NGWrapper &g, set<NFAEdge> *dead) {
|
||||
for (const auto &e : edges_range(g)) {
|
||||
u32 flags = g[e].assert_flags;
|
||||
if (!flags) {
|
||||
continue;
|
||||
}
|
||||
|
||||
NFAVertex u = source(e, g);
|
||||
NFAVertex v = target(e, g);
|
||||
|
||||
assert(u != g.startDs);
|
||||
|
||||
const CharReach &u_cr = g[u].char_reach;
|
||||
const CharReach &v_cr = g[v].char_reach;
|
||||
|
||||
bool impassable = true;
|
||||
bool ucp = flags & UCP_ASSERT_FLAGS;
|
||||
DEBUG_PRINTF("resolving edge %u->%u (flags=0x%x, ucp=%d)\n", g[u].index,
|
||||
g[v].index, flags, (int)ucp);
|
||||
while (flags && impassable) {
|
||||
u32 flag = 1U << findAndClearLSB_32(&flags);
|
||||
switch (flag) {
|
||||
case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
|
||||
case POS_FLAG_ASSERT_NONWORD_TO_WORD:
|
||||
if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case POS_FLAG_ASSERT_WORD_TO_NONWORD:
|
||||
case POS_FLAG_ASSERT_WORD_TO_WORD:
|
||||
if ((u_cr & CHARREACH_WORD).none() || u == g.start) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
|
||||
case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
|
||||
if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
|
||||
case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
|
||||
if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (v == g.accept) {
|
||||
/* accept special will need to be treated specially later */
|
||||
impassable = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (flag) {
|
||||
case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
|
||||
case POS_FLAG_ASSERT_WORD_TO_NONWORD:
|
||||
if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case POS_FLAG_ASSERT_WORD_TO_WORD:
|
||||
case POS_FLAG_ASSERT_NONWORD_TO_WORD:
|
||||
if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
|
||||
case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
|
||||
if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any()
|
||||
&& v != g.acceptEod) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
|
||||
case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
|
||||
if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any()
|
||||
|| v == g.acceptEod) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
impassable = false;
|
||||
}
|
||||
|
||||
if (impassable) {
|
||||
dead->insert(e);
|
||||
} else if (v == g.accept && !ucp) {
|
||||
bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start;
|
||||
UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start;
|
||||
assert(u_w != u_nw);
|
||||
bool v_w = false;
|
||||
bool v_nw = false;
|
||||
|
||||
flags = g[e].assert_flags;
|
||||
if (u_w) {
|
||||
v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD;
|
||||
v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD;
|
||||
} else {
|
||||
v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD;
|
||||
v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD;
|
||||
}
|
||||
assert(v_w || v_nw);
|
||||
if (v_w && v_nw) {
|
||||
/* edge is effectively unconditional */
|
||||
g[e].assert_flags = 0;
|
||||
} else if (v_w) {
|
||||
/* need to add a word byte */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_WORD;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
add_edge(u, vv, g[e], g);
|
||||
dead->insert(e);
|
||||
} else {
|
||||
/* need to add a non word byte or see eod */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_NONWORD;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
add_edge(u, vv, g[e], g);
|
||||
if (!edge(u, g.acceptEod, g).second) {
|
||||
add_edge(u, g.acceptEod, g[e], g);
|
||||
} else {
|
||||
/* there may already be a different edge from start to eod
|
||||
* if so we need to make it unconditional and alive
|
||||
*/
|
||||
NFAEdge start_eod = edge(u, g.acceptEod, g).first;
|
||||
|
||||
g[start_eod].assert_flags = 0;
|
||||
dead->erase(start_eod);
|
||||
|
||||
}
|
||||
dead->insert(e);
|
||||
}
|
||||
} else if (v == g.accept && ucp) {
|
||||
DEBUG_PRINTF("resolving ucp assert to accept\n");
|
||||
assert(u_cr.any());
|
||||
bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any()
|
||||
&& u != g.start;
|
||||
bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any()
|
||||
|| u == g.start;
|
||||
assert(u_w || u_nw);
|
||||
|
||||
bool v_w = false;
|
||||
bool v_nw = false;
|
||||
|
||||
flags = g[e].assert_flags;
|
||||
if (u_w) {
|
||||
v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP;
|
||||
v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP;
|
||||
}
|
||||
if (u_nw) {
|
||||
v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP;
|
||||
v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
|
||||
}
|
||||
assert(v_w || v_nw);
|
||||
if (v_w && v_nw) {
|
||||
/* edge is effectively unconditional */
|
||||
g[e].assert_flags = 0;
|
||||
} else if (v_w) {
|
||||
/* need to add a word byte */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
add_edge(u, vv, g[e], g);
|
||||
dead->insert(e);
|
||||
} else {
|
||||
/* need to add a non word byte or see eod */
|
||||
NFAVertex vv = add_vertex(g);
|
||||
setReportId(rm, g, vv, -1);
|
||||
g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
|
||||
add_edge(vv, g.accept, g);
|
||||
g[e].assert_flags = 0;
|
||||
add_edge(u, vv, g[e], g);
|
||||
if (!edge(u, g.acceptEod, g).second) {
|
||||
add_edge(u, g.acceptEod, g[e], g);
|
||||
} else {
|
||||
/* there may already be a different edge from start to eod
|
||||
* if so we need to make it unconditional and alive
|
||||
*/
|
||||
NFAEdge start_eod = edge(u, g.acceptEod, g).first;
|
||||
|
||||
g[start_eod].assert_flags = 0;
|
||||
dead->erase(start_eod);
|
||||
|
||||
}
|
||||
dead->insert(e);
|
||||
}
|
||||
} else {
|
||||
/* we can remove the asserts as we have partitioned the vertices
|
||||
* into w/nw around the assert edges
|
||||
*/
|
||||
g[e].assert_flags = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void resolveAsserts(ReportManager &rm, NGWrapper &g) {
|
||||
vector<NFAEdge> asserts = getAsserts(g);
|
||||
if (asserts.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
map<u32, NFAVertex> to_split; /* by index, for determinism */
|
||||
map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
|
||||
findSplitters(g, asserts, &to_split, &to_split_ucp);
|
||||
if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
|
||||
throw CompileError(g.expressionIndex, "Pattern is too large.");
|
||||
}
|
||||
|
||||
for (const auto &m : to_split) {
|
||||
assert(!contains(to_split_ucp, m.first));
|
||||
splitVertex(rm, g, m.second, false);
|
||||
}
|
||||
|
||||
for (const auto &m : to_split_ucp) {
|
||||
splitVertex(rm, g, m.second, true);
|
||||
}
|
||||
|
||||
set<NFAEdge> dead;
|
||||
resolveEdges(rm, g, &dead);
|
||||
|
||||
remove_edges(dead, g);
|
||||
g.renumberVertices();
|
||||
pruneUseless(g);
|
||||
pruneEmptyVertices(g);
|
||||
|
||||
g.renumberVertices();
|
||||
g.renumberEdges();
|
||||
clearReports(g);
|
||||
}
|
||||
|
||||
void ensureCodePointStart(ReportManager &rm, NGWrapper &g) {
|
||||
/* In utf8 mode there is an implicit assertion that we start at codepoint
|
||||
* boundaries. Assert resolution handles the badness coming from asserts.
|
||||
* The only other source of trouble is startDs->accept connections.
|
||||
*/
|
||||
bool exists;
|
||||
NFAEdge orig;
|
||||
tie(orig, exists) = edge(g.startDs, g.accept, g);
|
||||
if (g.utf8 && exists) {
|
||||
DEBUG_PRINTF("rectifying %u\n", g.reportId);
|
||||
Report ir = rm.getBasicInternalReport(g);
|
||||
ReportID rep = rm.getInternalId(ir);
|
||||
|
||||
NFAVertex v_a = add_vertex(g);
|
||||
g[v_a].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
g[v_a].char_reach = UTF_ASCII_CR;
|
||||
add_edge(v_a, g.accept, g[orig], g);
|
||||
|
||||
NFAVertex v_2 = add_vertex(g);
|
||||
g[v_2].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX);
|
||||
|
||||
NFAVertex v_3 = add_vertex(g);
|
||||
g[v_3].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX);
|
||||
|
||||
NFAVertex v_4 = add_vertex(g);
|
||||
g[v_4].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX);
|
||||
|
||||
NFAVertex v_c = add_vertex(g);
|
||||
g[v_c].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
g[v_c].char_reach = UTF_CONT_CR;
|
||||
add_edge(v_c, g.accept, g[orig], g);
|
||||
|
||||
add_edge(v_2, v_c, g);
|
||||
|
||||
NFAVertex v_3c = add_vertex(g);
|
||||
g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
g[v_3c].char_reach = UTF_CONT_CR;
|
||||
add_edge(v_3c, v_c, g);
|
||||
add_edge(v_3, v_3c, g);
|
||||
|
||||
NFAVertex v_4c = add_vertex(g);
|
||||
g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START;
|
||||
g[v_4c].char_reach = UTF_CONT_CR;
|
||||
add_edge(v_4c, v_3c, g);
|
||||
add_edge(v_4, v_4c, g);
|
||||
|
||||
g[v_a].reports.insert(rep);
|
||||
g[v_c].reports.insert(rep);
|
||||
|
||||
add_edge(g.start, v_a, g);
|
||||
add_edge(g.startDs, v_a, g);
|
||||
add_edge(g.start, v_2, g);
|
||||
add_edge(g.startDs, v_2, g);
|
||||
add_edge(g.start, v_3, g);
|
||||
add_edge(g.startDs, v_3, g);
|
||||
add_edge(g.start, v_4, g);
|
||||
add_edge(g.startDs, v_4, g);
|
||||
remove_edge(orig, g);
|
||||
g.renumberEdges();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
48
src/nfagraph/ng_asserts.h
Normal file
48
src/nfagraph/ng_asserts.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Resolve special assert vertices.
|
||||
*/
|
||||
|
||||
#ifndef NG_ASSERTS_H
|
||||
#define NG_ASSERTS_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct BoundaryReports;
|
||||
class NGWrapper;
|
||||
class ReportManager;
|
||||
|
||||
void resolveAsserts(ReportManager &rm, NGWrapper &g);
|
||||
|
||||
void ensureCodePointStart(ReportManager &rm, NGWrapper &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_ASSERTS_H
|
||||
278
src/nfagraph/ng_builder.cpp
Normal file
278
src/nfagraph/ng_builder.cpp
Normal file
@@ -0,0 +1,278 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief: NFA Graph Builder: used by Glushkov construction to construct an
|
||||
* NGWrapper from a parsed expression.
|
||||
*/
|
||||
#include "grey.h"
|
||||
#include "ng.h"
|
||||
#include "ng_builder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "compiler/compiler.h" // for ParsedExpression
|
||||
#include "util/compile_error.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
/** Concrete implementation of NFABuilder interface. */
|
||||
class NFABuilderImpl : public NFABuilder {
|
||||
public:
|
||||
NFABuilderImpl(ReportManager &rm, const Grey &grey,
|
||||
const ParsedExpression &expr);
|
||||
|
||||
~NFABuilderImpl() override;
|
||||
|
||||
Position makePositions(size_t nPositions) override;
|
||||
Position getStart() const override;
|
||||
Position getStartDotStar() const override;
|
||||
Position getAccept() const override;
|
||||
Position getAcceptEOD() const override;
|
||||
|
||||
bool isSpecialState(Position p) const override;
|
||||
|
||||
void setNodeReportID(Position position, int offsetAdjust) override;
|
||||
void addCharReach(Position position, const CharReach &cr) override;
|
||||
void setAssertFlag(Position position, u32 flag) override;
|
||||
u32 getAssertFlag(Position position) override;
|
||||
|
||||
void addVertex(Position p) override;
|
||||
|
||||
void addEdge(Position start, Position end) override;
|
||||
|
||||
bool hasEdge(Position start, Position end) const override;
|
||||
|
||||
u32 numVertices() const override { return vertIdx; }
|
||||
|
||||
void cloneRegion(Position first, Position last,
|
||||
unsigned posOffset) override;
|
||||
|
||||
unique_ptr<NGWrapper> getGraph() override;
|
||||
|
||||
private:
|
||||
/** fetch a vertex given its Position ID. */
|
||||
NFAVertex getVertex(Position pos) const;
|
||||
|
||||
/** \brief Internal convenience function to add an edge (u, v). */
|
||||
pair<NFAEdge, bool> addEdge(NFAVertex u, NFAVertex v);
|
||||
|
||||
/** \brief We use the ReportManager to hand out new internal reports. */
|
||||
ReportManager &rm;
|
||||
|
||||
/** \brief Greybox: used for resource limits. */
|
||||
const Grey &grey;
|
||||
|
||||
/** \brief Underlying NGWrapper graph. */
|
||||
unique_ptr<NGWrapper> graph;
|
||||
|
||||
/** \brief mapping from position to vertex. Use \ref getVertex for access.
|
||||
* */
|
||||
vector<NFAVertex> id2vertex;
|
||||
|
||||
/** \brief Index of next vertex. */
|
||||
u32 vertIdx;
|
||||
}; // class NFABuilderImpl
|
||||
|
||||
} // namespace
|
||||
|
||||
NFABuilderImpl::NFABuilderImpl(ReportManager &rm_in, const Grey &grey_in,
|
||||
const ParsedExpression &expr)
|
||||
: rm(rm_in), grey(grey_in),
|
||||
graph(ue2::make_unique<NGWrapper>(
|
||||
expr.index, expr.highlander, expr.utf8, expr.prefilter, expr.som,
|
||||
expr.id, expr.min_offset, expr.max_offset, expr.min_length)),
|
||||
vertIdx(N_SPECIALS) {
|
||||
|
||||
// Reserve space for a reasonably-sized NFA
|
||||
id2vertex.reserve(64);
|
||||
id2vertex.resize(N_SPECIALS);
|
||||
id2vertex[NODE_START] = graph->start;
|
||||
id2vertex[NODE_START_DOTSTAR] = graph->startDs;
|
||||
id2vertex[NODE_ACCEPT] = graph->accept;
|
||||
id2vertex[NODE_ACCEPT_EOD] = graph->acceptEod;
|
||||
}
|
||||
|
||||
NFABuilderImpl::~NFABuilderImpl() {
|
||||
// empty
|
||||
}
|
||||
|
||||
NFAVertex NFABuilderImpl::getVertex(Position pos) const {
|
||||
assert(id2vertex.size() >= pos);
|
||||
const NFAVertex v = id2vertex[pos];
|
||||
assert(v != NFAGraph::null_vertex());
|
||||
assert(graph->g[v].index == pos);
|
||||
return v;
|
||||
}
|
||||
|
||||
void NFABuilderImpl::addVertex(Position pos) {
|
||||
// Enforce resource limit.
|
||||
if (pos > grey.limitGraphVertices) {
|
||||
throw CompileError("Pattern too large.");
|
||||
}
|
||||
|
||||
NFAVertex v = add_vertex(*graph);
|
||||
if (id2vertex.size() <= pos) {
|
||||
id2vertex.resize(pos + 1);
|
||||
}
|
||||
id2vertex[pos] = v;
|
||||
graph->g[v].index = pos;
|
||||
}
|
||||
|
||||
unique_ptr<NGWrapper> NFABuilderImpl::getGraph() {
|
||||
DEBUG_PRINTF("built graph has %zu vertices and %zu edges\n",
|
||||
num_vertices(*graph), num_edges(*graph));
|
||||
|
||||
if (num_edges(*graph) > grey.limitGraphEdges) {
|
||||
throw CompileError("Pattern too large.");
|
||||
}
|
||||
if (num_vertices(*graph) > grey.limitGraphVertices) {
|
||||
throw CompileError("Pattern too large.");
|
||||
}
|
||||
|
||||
return move(graph);
|
||||
}
|
||||
|
||||
void NFABuilderImpl::setNodeReportID(Position pos, int offsetAdjust) {
|
||||
Report ir = rm.getBasicInternalReport(*graph, offsetAdjust);
|
||||
DEBUG_PRINTF("setting report id on %u = (%u, %d, %u)\n",
|
||||
pos, graph->reportId, offsetAdjust, ir.ekey);
|
||||
|
||||
NFAVertex v = getVertex(pos);
|
||||
auto &reports = (*graph)[v].reports;
|
||||
reports.clear();
|
||||
reports.insert(rm.getInternalId(ir));
|
||||
}
|
||||
|
||||
void NFABuilderImpl::addCharReach(Position pos, const CharReach &cr) {
|
||||
NFAVertex v = getVertex(pos);
|
||||
graph->g[v].char_reach |= cr;
|
||||
}
|
||||
|
||||
void NFABuilderImpl::setAssertFlag(Position pos, u32 flag) {
|
||||
NFAVertex v = getVertex(pos);
|
||||
graph->g[v].assert_flags |= flag;
|
||||
}
|
||||
|
||||
u32 NFABuilderImpl::getAssertFlag(Position pos) {
|
||||
NFAVertex v = getVertex(pos);
|
||||
return graph->g[v].assert_flags;
|
||||
}
|
||||
|
||||
pair<NFAEdge, bool> NFABuilderImpl::addEdge(NFAVertex u, NFAVertex v) {
|
||||
// assert that the edge doesn't already exist
|
||||
assert(edge(u, v, graph->g).second == false);
|
||||
|
||||
pair<NFAEdge, bool> e = add_edge(u, v, *graph);
|
||||
assert(e.second);
|
||||
return e;
|
||||
}
|
||||
|
||||
void NFABuilderImpl::addEdge(Position startPos, Position endPos) {
|
||||
DEBUG_PRINTF("%u -> %u\n", startPos, endPos);
|
||||
assert(startPos < vertIdx);
|
||||
assert(endPos < vertIdx);
|
||||
|
||||
NFAVertex u = getVertex(startPos);
|
||||
NFAVertex v = getVertex(endPos);
|
||||
|
||||
if ((u == graph->start || u == graph->startDs) && v == graph->startDs) {
|
||||
/* standard special -> special edges already exist */
|
||||
assert(edge(u, v, graph->g).second == true);
|
||||
return;
|
||||
}
|
||||
|
||||
assert(edge(u, v, graph->g).second == false);
|
||||
addEdge(u, v);
|
||||
}
|
||||
|
||||
bool NFABuilderImpl::hasEdge(Position startPos, Position endPos) const {
|
||||
return edge(getVertex(startPos), getVertex(endPos), graph->g).second;
|
||||
}
|
||||
|
||||
Position NFABuilderImpl::getStart() const {
|
||||
return NODE_START;
|
||||
}
|
||||
|
||||
Position NFABuilderImpl::getStartDotStar() const {
|
||||
return NODE_START_DOTSTAR;
|
||||
}
|
||||
|
||||
Position NFABuilderImpl::getAccept() const {
|
||||
return NODE_ACCEPT;
|
||||
}
|
||||
|
||||
Position NFABuilderImpl::getAcceptEOD() const {
|
||||
return NODE_ACCEPT_EOD;
|
||||
}
|
||||
|
||||
bool NFABuilderImpl::isSpecialState(Position p) const {
|
||||
return (p == NODE_START || p == NODE_START_DOTSTAR ||
|
||||
p == NODE_ACCEPT || p == NODE_ACCEPT_EOD);
|
||||
}
|
||||
|
||||
Position NFABuilderImpl::makePositions(size_t nPositions) {
|
||||
Position base = vertIdx;
|
||||
for (size_t i = 0; i < nPositions; i++) {
|
||||
addVertex(vertIdx++);
|
||||
}
|
||||
DEBUG_PRINTF("built %zu positions from base %u\n", nPositions, base);
|
||||
return base;
|
||||
}
|
||||
|
||||
void NFABuilderImpl::cloneRegion(Position first, Position last, unsigned posOffset) {
|
||||
NFAGraph &g = graph->g;
|
||||
assert(posOffset > 0);
|
||||
|
||||
// walk the nodes between first and last and copy their vertex properties
|
||||
DEBUG_PRINTF("cloning nodes in [%u, %u], offset %u\n", first, last,
|
||||
posOffset);
|
||||
for (Position i = first; i <= last; ++i) {
|
||||
NFAVertex orig = getVertex(i);
|
||||
Position destIdx = i + posOffset;
|
||||
assert(destIdx < vertIdx);
|
||||
NFAVertex dest = getVertex(destIdx);
|
||||
g[dest] = g[orig]; // all properties
|
||||
g[dest].index = destIdx;
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm, const CompileContext &cc,
|
||||
const ParsedExpression &expr) {
|
||||
return ue2::make_unique<NFABuilderImpl>(rm, cc.grey, expr);
|
||||
}
|
||||
|
||||
NFABuilder::~NFABuilder() { }
|
||||
|
||||
} // namespace ue2
|
||||
99
src/nfagraph/ng_builder.h
Normal file
99
src/nfagraph/ng_builder.h
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief: NFA Graph Builder: used by Glushkov construction to construct an
|
||||
* NGWrapper from a parsed expression.
|
||||
*/
|
||||
|
||||
#ifndef NG_BUILDER_H
|
||||
#define NG_BUILDER_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "parser/position.h"
|
||||
|
||||
#include <memory>
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
class NGWrapper;
|
||||
class ReportManager;
|
||||
struct CompileContext;
|
||||
|
||||
class ParsedExpression;
|
||||
|
||||
/** \brief Abstract builder interface. Use \ref makeNFABuilder to construct
|
||||
* one. Used by GlushkovBuildState. */
|
||||
class NFABuilder : boost::noncopyable {
|
||||
public:
|
||||
virtual ~NFABuilder();
|
||||
|
||||
virtual Position makePositions(size_t nPositions) = 0;
|
||||
virtual Position getStart() const = 0;
|
||||
virtual Position getStartDotStar() const = 0;
|
||||
virtual Position getAccept() const = 0;
|
||||
virtual Position getAcceptEOD() const = 0;
|
||||
|
||||
virtual bool isSpecialState(Position p) const = 0;
|
||||
|
||||
virtual void setNodeReportID(Position position, int offsetAdjust) = 0;
|
||||
virtual void addCharReach(Position position, const CharReach &cr) = 0;
|
||||
|
||||
/* or-in vertex assertions */
|
||||
virtual void setAssertFlag(Position position, u32 flag) = 0;
|
||||
virtual u32 getAssertFlag(Position position) = 0;
|
||||
|
||||
virtual void addVertex(Position p) = 0;
|
||||
|
||||
virtual void addEdge(Position start, Position end) = 0;
|
||||
|
||||
virtual bool hasEdge(Position start, Position end) const = 0;
|
||||
|
||||
virtual u32 numVertices() const = 0;
|
||||
|
||||
virtual void cloneRegion(Position first, Position last,
|
||||
unsigned posOffset) = 0;
|
||||
|
||||
/**
|
||||
* \brief Returns the built NGWrapper graph.
|
||||
* Note that this builder cannot be used after this call.
|
||||
*/
|
||||
virtual std::unique_ptr<NGWrapper> getGraph() = 0;
|
||||
};
|
||||
|
||||
/** Construct a usable NFABuilder. */
|
||||
std::unique_ptr<NFABuilder> makeNFABuilder(ReportManager &rm,
|
||||
const CompileContext &cc,
|
||||
const ParsedExpression &expr);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
422
src/nfagraph/ng_calc_components.cpp
Normal file
422
src/nfagraph/ng_calc_components.cpp
Normal file
@@ -0,0 +1,422 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Splits an NFA graph into its connected components.
|
||||
*
|
||||
* This pass takes a NGHolder and splits its graph into a set of connected
|
||||
* components, returning them as individual NGHolder graphs. For example, the
|
||||
* graph for the regex /foo.*bar|[a-z]{7,13}|hatstand|teakettle$/ will be split
|
||||
* into four NGHolders, representing these four components:
|
||||
*
|
||||
* - /foo.*bar/
|
||||
* - /[a-z]{7,13}/
|
||||
* - /hatstand/
|
||||
* - /teakettle$/
|
||||
*
|
||||
* The pass operates by creating an undirected graph from the input graph, and
|
||||
* then using the BGL's connected_components algorithm to do the work, cloning
|
||||
* the identified components into their own graphs. A "shell" of vertices
|
||||
* is identified and removed first from the head and tail of the graph, in
|
||||
* order to handle cases where there is a common head/tail region.
|
||||
*
|
||||
* Trivial cases, such as an alternation of single vertices like /a|b|c|d|e|f/,
|
||||
* are not split, as later optimisations will handle these cases efficiently.
|
||||
*/
|
||||
#include "ng_calc_components.h"
|
||||
|
||||
#include "ng_depth.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_undirected.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/graph/connected_components.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static constexpr u32 MAX_HEAD_SHELL_DEPTH = 3;
|
||||
static constexpr u32 MAX_TAIL_SHELL_DEPTH = 3;
|
||||
|
||||
/**
|
||||
* \brief Returns true if the whole graph is just an alternation of character
|
||||
* classes.
|
||||
*/
|
||||
bool isAlternationOfClasses(const NGHolder &g) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
// Vertex must have in edges from starts only.
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (!is_any_start(u, g)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Vertex must have out edges to accepts only.
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (!is_any_accept(w, g)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("alternation of single states, treating as one comp\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Compute initial max distance to v from start (i.e. ignoring its own
|
||||
* self-loop).
|
||||
*/
|
||||
static
|
||||
depth max_dist_from_start(const NGHolder &g,
|
||||
const vector<NFAVertexBidiDepth> &depths,
|
||||
NFAVertex v) {
|
||||
depth max_depth(0);
|
||||
for (const auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == v) {
|
||||
continue;
|
||||
}
|
||||
const auto &d = depths.at(g[u].index);
|
||||
if (d.fromStart.max.is_reachable()) {
|
||||
max_depth = max(max_depth, d.fromStart.max);
|
||||
}
|
||||
if (d.fromStartDotStar.max.is_reachable()) {
|
||||
max_depth = max(max_depth, d.fromStartDotStar.max);
|
||||
}
|
||||
}
|
||||
return max_depth + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Compute initial max depth from v from accept (i.e. ignoring its own
|
||||
* self-loop).
|
||||
*/
|
||||
static
|
||||
depth max_dist_to_accept(const NGHolder &g,
|
||||
const vector<NFAVertexBidiDepth> &depths,
|
||||
NFAVertex v) {
|
||||
depth max_depth(0);
|
||||
for (const auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w == v) {
|
||||
continue;
|
||||
}
|
||||
const auto &d = depths.at(g[w].index);
|
||||
if (d.toAccept.max.is_reachable()) {
|
||||
max_depth = max(max_depth, d.toAccept.max);
|
||||
}
|
||||
if (d.toAcceptEod.max.is_reachable()) {
|
||||
max_depth = max(max_depth, d.toAcceptEod.max);
|
||||
}
|
||||
}
|
||||
return max_depth + 1;
|
||||
}
|
||||
|
||||
static
|
||||
flat_set<NFAVertex> findHeadShell(const NGHolder &g,
|
||||
const vector<NFAVertexBidiDepth> &depths,
|
||||
const depth &max_dist) {
|
||||
flat_set<NFAVertex> shell;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
if (max_dist_from_start(g, depths, v) <= max_dist) {
|
||||
shell.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
for (UNUSED auto v : shell) {
|
||||
DEBUG_PRINTF("shell: %u\n", g[v].index);
|
||||
}
|
||||
|
||||
return shell;
|
||||
}
|
||||
|
||||
static
|
||||
flat_set<NFAVertex> findTailShell(const NGHolder &g,
|
||||
const vector<NFAVertexBidiDepth> &depths,
|
||||
const depth &max_dist) {
|
||||
flat_set<NFAVertex> shell;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
if (max_dist_to_accept(g, depths, v) <= max_dist) {
|
||||
shell.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
for (UNUSED auto v : shell) {
|
||||
DEBUG_PRINTF("shell: %u\n", g[v].index);
|
||||
}
|
||||
|
||||
return shell;
|
||||
}
|
||||
|
||||
static
|
||||
vector<NFAEdge> findShellEdges(const NGHolder &g,
|
||||
const flat_set<NFAVertex> &head_shell,
|
||||
const flat_set<NFAVertex> &tail_shell) {
|
||||
vector<NFAEdge> shell_edges;
|
||||
|
||||
for (const auto &e : edges_range(g)) {
|
||||
auto u = source(e, g);
|
||||
auto v = target(e, g);
|
||||
|
||||
if (v == g.startDs && is_any_start(u, g)) {
|
||||
continue;
|
||||
}
|
||||
if (u == g.accept && v == g.acceptEod) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((is_special(u, g) || contains(head_shell, u)) &&
|
||||
(is_special(v, g) || contains(tail_shell, v))) {
|
||||
DEBUG_PRINTF("edge (%u,%u) is a shell edge\n", g[u].index, g[v].index);
|
||||
shell_edges.push_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
return shell_edges;
|
||||
}
|
||||
|
||||
static
|
||||
void removeVertices(const flat_set<NFAVertex> &verts, NFAUndirectedGraph &ug,
|
||||
ue2::unordered_map<NFAVertex, NFAUndirectedVertex> &old2new,
|
||||
ue2::unordered_map<NFAVertex, NFAUndirectedVertex> &new2old) {
|
||||
for (auto v : verts) {
|
||||
assert(contains(old2new, v));
|
||||
auto uv = old2new.at(v);
|
||||
clear_vertex(uv, ug);
|
||||
remove_vertex(uv, ug);
|
||||
old2new.erase(v);
|
||||
new2old.erase(uv);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void renumberVertices(NFAUndirectedGraph &ug) {
|
||||
u32 vertexIndex = 0;
|
||||
for (auto uv : vertices_range(ug)) {
|
||||
put(boost::vertex_index, ug, uv, vertexIndex++);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Common code called by calc- and recalc- below. Splits the given holder into
|
||||
* one or more connected components, adding them to the comps deque.
|
||||
*/
|
||||
static
|
||||
void splitIntoComponents(const NGHolder &g, deque<unique_ptr<NGHolder>> &comps,
|
||||
const depth &max_head_depth,
|
||||
const depth &max_tail_depth, bool *shell_comp) {
|
||||
DEBUG_PRINTF("graph has %zu vertices\n", num_vertices(g));
|
||||
|
||||
assert(shell_comp);
|
||||
*shell_comp = false;
|
||||
|
||||
// Compute "shell" head and tail subgraphs.
|
||||
vector<NFAVertexBidiDepth> depths;
|
||||
calcDepths(g, depths);
|
||||
auto head_shell = findHeadShell(g, depths, max_head_depth);
|
||||
auto tail_shell = findTailShell(g, depths, max_tail_depth);
|
||||
for (auto v : head_shell) {
|
||||
tail_shell.erase(v);
|
||||
}
|
||||
|
||||
if (head_shell.size() + tail_shell.size() + N_SPECIALS >= num_vertices(g)) {
|
||||
DEBUG_PRINTF("all in shell component\n");
|
||||
comps.push_back(cloneHolder(g));
|
||||
*shell_comp = true;
|
||||
return;
|
||||
}
|
||||
|
||||
vector<NFAEdge> shell_edges = findShellEdges(g, head_shell, tail_shell);
|
||||
|
||||
DEBUG_PRINTF("%zu vertices in head, %zu in tail, %zu shell edges\n",
|
||||
head_shell.size(), tail_shell.size(), shell_edges.size());
|
||||
|
||||
NFAUndirectedGraph ug;
|
||||
ue2::unordered_map<NFAVertex, NFAUndirectedVertex> old2new;
|
||||
ue2::unordered_map<u32, NFAVertex> newIdx2old;
|
||||
|
||||
createUnGraph(g.g, true, true, ug, old2new, newIdx2old);
|
||||
|
||||
// Construct reverse mapping.
|
||||
ue2::unordered_map<NFAVertex, NFAUndirectedVertex> new2old;
|
||||
for (const auto &m : old2new) {
|
||||
new2old.emplace(m.second, m.first);
|
||||
}
|
||||
|
||||
// Remove shells from undirected graph and renumber so we have dense
|
||||
// vertex indices.
|
||||
removeVertices(head_shell, ug, old2new, new2old);
|
||||
removeVertices(tail_shell, ug, old2new, new2old);
|
||||
renumberVertices(ug);
|
||||
|
||||
map<NFAUndirectedVertex, u32> split_components;
|
||||
const u32 num = connected_components(
|
||||
ug, boost::make_assoc_property_map(split_components));
|
||||
|
||||
assert(num > 0);
|
||||
if (num == 1 && shell_edges.empty()) {
|
||||
DEBUG_PRINTF("single component\n");
|
||||
comps.push_back(cloneHolder(g));
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("broke graph into %u components\n", num);
|
||||
|
||||
vector<deque<NFAVertex>> verts(num);
|
||||
|
||||
// Collect vertex lists per component.
|
||||
for (const auto &m : split_components) {
|
||||
NFAVertex uv = m.first;
|
||||
u32 c = m.second;
|
||||
assert(contains(new2old, uv));
|
||||
NFAVertex v = new2old.at(uv);
|
||||
verts[c].push_back(v);
|
||||
DEBUG_PRINTF("vertex %u is in comp %u\n", g[v].index, c);
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> v_map; // temp map for fillHolder
|
||||
for (auto &vv : verts) {
|
||||
// Shells are in every component.
|
||||
vv.insert(vv.end(), begin(head_shell), end(head_shell));
|
||||
vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
|
||||
|
||||
// Sort by vertex index for determinism.
|
||||
sort(begin(vv), end(vv), VertexIndexOrdering<NGHolder>(g));
|
||||
|
||||
auto gc = ue2::make_unique<NGHolder>();
|
||||
v_map.clear();
|
||||
fillHolder(gc.get(), g, vv, &v_map);
|
||||
|
||||
// Remove shell edges, which will get their own component.
|
||||
for (const auto &e : shell_edges) {
|
||||
auto cu = v_map.at(source(e, g));
|
||||
auto cv = v_map.at(target(e, g));
|
||||
assert(edge(cu, cv, *gc).second);
|
||||
remove_edge(cu, cv, *gc);
|
||||
}
|
||||
|
||||
pruneUseless(*gc);
|
||||
DEBUG_PRINTF("component %zu has %zu vertices\n", comps.size(),
|
||||
num_vertices(*gc));
|
||||
comps.push_back(move(gc));
|
||||
}
|
||||
|
||||
// Another component to handle the direct shell-to-shell edges.
|
||||
if (!shell_edges.empty()) {
|
||||
deque<NFAVertex> vv;
|
||||
vv.insert(vv.end(), begin(head_shell), end(head_shell));
|
||||
vv.insert(vv.end(), begin(tail_shell), end(tail_shell));
|
||||
|
||||
// Sort by vertex index for determinism.
|
||||
sort(begin(vv), end(vv), VertexIndexOrdering<NGHolder>(g));
|
||||
|
||||
auto gc = ue2::make_unique<NGHolder>();
|
||||
v_map.clear();
|
||||
fillHolder(gc.get(), g, vv, &v_map);
|
||||
|
||||
pruneUseless(*gc);
|
||||
DEBUG_PRINTF("shell edge component %zu has %zu vertices\n",
|
||||
comps.size(), num_vertices(*gc));
|
||||
comps.push_back(move(gc));
|
||||
*shell_comp = true;
|
||||
}
|
||||
|
||||
// We should never produce empty component graphs.
|
||||
assert(all_of(begin(comps), end(comps),
|
||||
[](const unique_ptr<NGHolder> &g_comp) {
|
||||
return num_vertices(*g_comp) > N_SPECIALS;
|
||||
}));
|
||||
}
|
||||
|
||||
deque<unique_ptr<NGHolder>> calcComponents(const NGHolder &g) {
|
||||
deque<unique_ptr<NGHolder>> comps;
|
||||
|
||||
// For trivial cases, we needn't bother running the full
|
||||
// connected_components algorithm.
|
||||
if (isAlternationOfClasses(g)) {
|
||||
comps.push_back(cloneHolder(g));
|
||||
return comps;
|
||||
}
|
||||
|
||||
bool shell_comp = false;
|
||||
splitIntoComponents(g, comps, MAX_HEAD_SHELL_DEPTH, MAX_TAIL_SHELL_DEPTH,
|
||||
&shell_comp);
|
||||
|
||||
if (shell_comp) {
|
||||
DEBUG_PRINTF("re-running on shell comp\n");
|
||||
assert(!comps.empty());
|
||||
auto sc = move(comps.back());
|
||||
comps.pop_back();
|
||||
splitIntoComponents(*sc, comps, 0, 0, &shell_comp);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("finished; split into %zu components\n", comps.size());
|
||||
return comps;
|
||||
}
|
||||
|
||||
void recalcComponents(deque<unique_ptr<NGHolder>> &comps) {
|
||||
deque<unique_ptr<NGHolder>> out;
|
||||
|
||||
for (auto &gc : comps) {
|
||||
if (!gc) {
|
||||
continue; // graph has been consumed already.
|
||||
}
|
||||
|
||||
if (isAlternationOfClasses(*gc)) {
|
||||
out.push_back(move(gc));
|
||||
continue;
|
||||
}
|
||||
|
||||
auto gc_comps = calcComponents(*gc);
|
||||
for (auto &elem : gc_comps) {
|
||||
out.push_back(move(elem));
|
||||
}
|
||||
}
|
||||
|
||||
// Replace comps with our recalculated list.
|
||||
comps.swap(out);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
51
src/nfagraph/ng_calc_components.h
Normal file
51
src/nfagraph/ng_calc_components.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Splits an NFA graph into its connected components.
|
||||
*/
|
||||
|
||||
#ifndef NG_CALC_COMPONENTS_H
|
||||
#define NG_CALC_COMPONENTS_H
|
||||
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
bool isAlternationOfClasses(const NGHolder &g);
|
||||
|
||||
std::deque<std::unique_ptr<NGHolder>> calcComponents(const NGHolder &g);
|
||||
|
||||
void recalcComponents(std::deque<std::unique_ptr<NGHolder>> &comps);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
264
src/nfagraph/ng_cyclic_redundancy.cpp
Normal file
264
src/nfagraph/ng_cyclic_redundancy.cpp
Normal file
@@ -0,0 +1,264 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
|
||||
* leading to a cyclic repeat.
|
||||
*
|
||||
* This is a graph reduction pass intended to remove vertices that are
|
||||
* redundant because they lead solely to a cyclic vertex with a superset of
|
||||
* their character reachability. For example, in this pattern:
|
||||
*
|
||||
* /(abc|def|abcghi).*0123/s
|
||||
*
|
||||
* The vertices for 'ghi' can be removed due to the presence of the dot-star
|
||||
* repeat.
|
||||
*
|
||||
* Algorithm:
|
||||
*
|
||||
* for each cyclic vertex V:
|
||||
* for each proper predecessor U of V:
|
||||
* let S be the set of successors of U that are successors of V
|
||||
* (including V itself)
|
||||
* for each successor W of U not in S:
|
||||
* perform a DFS forward from W, stopping exploration when a vertex
|
||||
* in S is encountered;
|
||||
* if a vertex with reach not in reach(V) or an accept is encountered:
|
||||
* fail and continue to the next W.
|
||||
* else:
|
||||
* remove (U, W)
|
||||
*
|
||||
* NOTE: the following code is templated not just for fun, but so that we can
|
||||
* run this analysis both forward and in reverse over the graph.
|
||||
*/
|
||||
#include "ng_cyclic_redundancy.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::reverse_graph;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
// Terminator function for depth first traversal, tells us not to explore
|
||||
// beyond vertices in set S.
|
||||
template<class Vertex, class Graph>
|
||||
class VertexInSet {
|
||||
public:
|
||||
explicit VertexInSet(const flat_set<Vertex> &s) : verts(s) {}
|
||||
bool operator()(const Vertex &v, const Graph&) const {
|
||||
return contains(verts, v);
|
||||
}
|
||||
|
||||
private:
|
||||
const flat_set<Vertex> &verts;
|
||||
};
|
||||
|
||||
struct SearchFailed {};
|
||||
|
||||
// Visitor for depth first traversal, throws an error if we encounter a vertex
|
||||
// with bad reach or a report.
|
||||
class SearchVisitor : public boost::default_dfs_visitor {
|
||||
public:
|
||||
explicit SearchVisitor(const CharReach &r) : cr(r) {}
|
||||
|
||||
template<class Vertex, class Graph>
|
||||
void discover_vertex(const Vertex &v, const Graph &g) const {
|
||||
DEBUG_PRINTF("vertex %u\n", g[v].index);
|
||||
if (is_special(v, g)) {
|
||||
DEBUG_PRINTF("start or accept\n");
|
||||
throw SearchFailed();
|
||||
}
|
||||
|
||||
if (g[v].assert_flags) {
|
||||
DEBUG_PRINTF("assert flags\n");
|
||||
throw SearchFailed();
|
||||
}
|
||||
|
||||
const CharReach &vcr = g[v].char_reach;
|
||||
if (vcr != (vcr & cr)) {
|
||||
DEBUG_PRINTF("bad reach\n");
|
||||
throw SearchFailed();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const CharReach &cr;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template<class Graph>
|
||||
static
|
||||
bool searchForward(const Graph &g, const CharReach &reach,
|
||||
const flat_set<typename Graph::vertex_descriptor> &s,
|
||||
typename Graph::vertex_descriptor w) {
|
||||
map<NFAVertex, boost::default_color_type> colours;
|
||||
try {
|
||||
depth_first_visit(g, w, SearchVisitor(reach),
|
||||
make_assoc_property_map(colours),
|
||||
VertexInSet<typename Graph::vertex_descriptor, Graph>(s));
|
||||
} catch (SearchFailed&) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
NFAEdge to_raw(const NFAEdge &e, const NFAGraph &, const NGHolder &) {
|
||||
return e;
|
||||
}
|
||||
|
||||
static
|
||||
NFAEdge to_raw(const reverse_graph<NFAGraph, NFAGraph&>::edge_descriptor &e,
|
||||
const reverse_graph<NFAGraph, NFAGraph&> &g,
|
||||
const NGHolder &raw) {
|
||||
/* clang doesn't seem to like edge_underlying */
|
||||
NFAVertex t = source(e, g);
|
||||
NFAVertex s = target(e, g);
|
||||
|
||||
assert(edge(s, t, raw).second);
|
||||
|
||||
return edge(s, t, raw).first;
|
||||
}
|
||||
|
||||
|
||||
/* returns true if we did stuff */
|
||||
template<class Graph>
|
||||
static
|
||||
bool removeCyclicPathRedundancy(Graph &g, typename Graph::vertex_descriptor v,
|
||||
NGHolder &raw) {
|
||||
bool did_stuff = false;
|
||||
|
||||
const CharReach &reach = g[v].char_reach;
|
||||
|
||||
typedef typename Graph::vertex_descriptor vertex_descriptor;
|
||||
|
||||
// precalc successors of v.
|
||||
flat_set<vertex_descriptor> succ_v;
|
||||
insert(&succ_v, adjacent_vertices(v, g));
|
||||
|
||||
flat_set<vertex_descriptor> s;
|
||||
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
vertex_descriptor u = source(e, g);
|
||||
if (u == v) {
|
||||
continue;
|
||||
}
|
||||
if (is_any_accept(u, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("- checking u %u\n", g[u].index);
|
||||
|
||||
// let s be intersection(succ(u), succ(v))
|
||||
s.clear();
|
||||
for (auto b : adjacent_vertices_range(u, g)) {
|
||||
if (contains(succ_v, b)) {
|
||||
s.insert(b);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &e_u : make_vector_from(out_edges(u, g))) {
|
||||
vertex_descriptor w = target(e_u, g);
|
||||
if (is_special(w, g) || contains(s, w)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const CharReach &w_reach = g[w].char_reach;
|
||||
if (!w_reach.isSubsetOf(reach)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF(" - checking w %u\n", g[w].index);
|
||||
|
||||
if (searchForward(g, reach, s, w)) {
|
||||
DEBUG_PRINTF("removing edge (%u,%u)\n",
|
||||
g[u].index, g[w].index);
|
||||
/* we are currently iterating over the in-edges of v, so it
|
||||
would be unwise to remove edges to v. However, */
|
||||
assert(w != v); /* as v is in s */
|
||||
remove_edge(to_raw(e_u, g, raw), raw);
|
||||
did_stuff = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return did_stuff;
|
||||
}
|
||||
|
||||
template<class Graph>
|
||||
static
|
||||
bool cyclicPathRedundancyPass(Graph &g, NGHolder &raw) {
|
||||
bool did_stuff = false;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g) || !edge(v, v, g).second) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("examining cyclic vertex %u\n", g[v].index);
|
||||
did_stuff |= removeCyclicPathRedundancy(g, v, raw);
|
||||
}
|
||||
|
||||
return did_stuff;
|
||||
}
|
||||
|
||||
bool removeCyclicPathRedundancy(NGHolder &g) {
|
||||
// Forward pass.
|
||||
bool f_changed = cyclicPathRedundancyPass(g.g, g);
|
||||
if (f_changed) {
|
||||
DEBUG_PRINTF("edges removed by forward pass\n");
|
||||
pruneUseless(g);
|
||||
}
|
||||
|
||||
// Reverse pass.
|
||||
DEBUG_PRINTF("REVERSE PASS\n");
|
||||
typedef reverse_graph<NFAGraph, NFAGraph&> RevGraph;
|
||||
RevGraph revg(g.g);
|
||||
bool r_changed = cyclicPathRedundancyPass(revg, g);
|
||||
if (r_changed) {
|
||||
DEBUG_PRINTF("edges removed by reverse pass\n");
|
||||
pruneUseless(g);
|
||||
}
|
||||
|
||||
return f_changed || r_changed;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
45
src/nfagraph/ng_cyclic_redundancy.h
Normal file
45
src/nfagraph/ng_cyclic_redundancy.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Cyclic Path Redundancy pass. Removes redundant vertices on paths
|
||||
* leading to a cyclic repeat.
|
||||
*/
|
||||
|
||||
#ifndef NG_CYCLIC_REDUNDANCY_H
|
||||
#define NG_CYCLIC_REDUNDANCY_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
bool removeCyclicPathRedundancy(NGHolder &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
383
src/nfagraph/ng_depth.cpp
Normal file
383
src/nfagraph/ng_depth.cpp
Normal file
@@ -0,0 +1,383 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA graph vertex depth calculations.
|
||||
*/
|
||||
#include "ng_depth.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/graph/dag_shortest_paths.hpp>
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/graph/breadth_first_search.hpp>
|
||||
#include <boost/graph/filtered_graph.hpp>
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
#include <boost/graph/topological_sort.hpp>
|
||||
#include <boost/graph/property_maps/constant_property_map.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::filtered_graph;
|
||||
using boost::make_constant_property;
|
||||
using boost::reverse_graph;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
/** Distance value used to indicate that the vertex can't be reached. */
|
||||
static const int DIST_UNREACHABLE = INT_MAX;
|
||||
|
||||
/**
|
||||
* Distance value used to indicate that the distance to a vertex is infinite
|
||||
* (for example, it's the max distance and there's a cycle in the path) or so
|
||||
* large that we should consider it effectively infinite.
|
||||
*/
|
||||
static const int DIST_INFINITY = INT_MAX - 1;
|
||||
|
||||
//
|
||||
// Filters
|
||||
//
|
||||
|
||||
template <class GraphT>
|
||||
struct NodeFilter {
|
||||
typedef typename GraphT::edge_descriptor EdgeT;
|
||||
NodeFilter() { }
|
||||
NodeFilter(const vector<bool> *bad_in, const GraphT *g_in)
|
||||
: bad(bad_in), g(g_in) { }
|
||||
bool operator()(const EdgeT &e) const {
|
||||
u32 src_idx = (*g)[source(e, *g)].index;
|
||||
u32 tar_idx = (*g)[target(e, *g)].index;
|
||||
|
||||
if (tar_idx == NODE_START_DOTSTAR) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return !(*bad)[src_idx] && !(*bad)[tar_idx];
|
||||
}
|
||||
const vector<bool> *bad;
|
||||
const GraphT *g;
|
||||
};
|
||||
|
||||
template <class GraphT>
|
||||
struct StartFilter {
|
||||
typedef typename GraphT::edge_descriptor EdgeT;
|
||||
StartFilter() { }
|
||||
explicit StartFilter(const GraphT *g_in) : g(g_in) { }
|
||||
bool operator()(const EdgeT &e) const {
|
||||
u32 src_idx = (*g)[source(e, *g)].index;
|
||||
u32 tar_idx = (*g)[target(e, *g)].index;
|
||||
|
||||
// Remove our stylised edges from anchored start to startDs.
|
||||
if (src_idx == NODE_START && tar_idx == NODE_START_DOTSTAR) {
|
||||
return false;
|
||||
}
|
||||
// Also remove the equivalent in the reversed direction.
|
||||
if (src_idx == NODE_ACCEPT_EOD && tar_idx == NODE_ACCEPT) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
const GraphT *g;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template<class GraphT>
|
||||
static
|
||||
void findLoopReachable(const GraphT &g, const NFAVertex srcVertex,
|
||||
vector<bool> &deadNodes) {
|
||||
typedef typename GraphT::edge_descriptor EdgeT;
|
||||
typedef set<EdgeT> EdgeSet;
|
||||
|
||||
EdgeSet deadEdges;
|
||||
BackEdges<EdgeSet> be(deadEdges);
|
||||
|
||||
auto index_map = get(&NFAGraphVertexProps::index, g);
|
||||
|
||||
depth_first_search(g, visitor(be).root_vertex(srcVertex).vertex_index_map(
|
||||
index_map));
|
||||
AcyclicFilter<EdgeSet> af(&deadEdges);
|
||||
filtered_graph<GraphT, AcyclicFilter<EdgeSet> > acyclic_g(g, af);
|
||||
|
||||
vector<NFAVertex> topoOrder; /* actually reverse topological order */
|
||||
topoOrder.reserve(deadNodes.size());
|
||||
topological_sort(acyclic_g, back_inserter(topoOrder),
|
||||
vertex_index_map(index_map));
|
||||
|
||||
for (const auto &e : deadEdges) {
|
||||
u32 srcIdx = g[source(e, g)].index;
|
||||
if (srcIdx != NODE_START_DOTSTAR) {
|
||||
deadNodes[srcIdx] = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto it = topoOrder.rbegin(); it != topoOrder.rend(); ++it) {
|
||||
NFAVertex v = *it;
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
if (deadNodes[g[source(e, g)].index]) {
|
||||
deadNodes[g[v].index] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class GraphT>
|
||||
static
|
||||
void calcDepthFromSource(const NGHolder &graph, const GraphT &g,
|
||||
typename GraphT::vertex_descriptor srcVertex,
|
||||
const vector<bool> &deadNodes,
|
||||
vector<int> &dMin, vector<int> &dMax) {
|
||||
typedef typename GraphT::edge_descriptor EdgeT;
|
||||
|
||||
const size_t numVerts = num_vertices(graph);
|
||||
|
||||
NodeFilter<GraphT> nf(&deadNodes, &g);
|
||||
StartFilter<GraphT> sf(&g);
|
||||
|
||||
/* minimum distance needs to run on a graph with .*start unreachable
|
||||
* from start */
|
||||
typedef filtered_graph<GraphT, StartFilter<GraphT> > StartFilteredGraph;
|
||||
const StartFilteredGraph mindist_g(g, sf);
|
||||
|
||||
/* maximum distance needs to run on a graph without cycles & nodes
|
||||
* reachable from cycles */
|
||||
typedef filtered_graph<GraphT, NodeFilter<GraphT> > NodeFilteredGraph;
|
||||
const NodeFilteredGraph maxdist_g(g, nf);
|
||||
|
||||
// Record distance of each vertex from source using one of the following
|
||||
// algorithms.
|
||||
|
||||
/* note: filtered graphs have same num_{vertices,edges} as base */
|
||||
|
||||
dMin.assign(numVerts, DIST_UNREACHABLE);
|
||||
dMax.assign(numVerts, DIST_UNREACHABLE);
|
||||
dMin[mindist_g[srcVertex].index] = 0;
|
||||
|
||||
using boost::make_iterator_property_map;
|
||||
|
||||
auto min_index_map = get(&NFAGraphVertexProps::index, mindist_g);
|
||||
|
||||
breadth_first_search(mindist_g, srcVertex,
|
||||
boost::vertex_index_map(min_index_map).
|
||||
visitor(make_bfs_visitor(record_distances(
|
||||
make_iterator_property_map(
|
||||
dMin.begin(), min_index_map),
|
||||
boost::on_tree_edge()))));
|
||||
|
||||
auto max_index_map = get(&NFAGraphVertexProps::index, maxdist_g);
|
||||
|
||||
dag_shortest_paths(maxdist_g, srcVertex,
|
||||
boost::vertex_index_map(max_index_map).
|
||||
distance_map(make_iterator_property_map(dMax.begin(),
|
||||
max_index_map)).
|
||||
weight_map(make_constant_property<EdgeT>(-1)));
|
||||
|
||||
for (size_t i = 0; i < numVerts; i++) {
|
||||
if (dMin[i] > DIST_UNREACHABLE) {
|
||||
dMin[i] = DIST_UNREACHABLE;
|
||||
}
|
||||
DEBUG_PRINTF("%zu: dm %d %d\n", i, dMin[i], dMax[i]);
|
||||
if (dMax[i] >= DIST_UNREACHABLE && dMin[i] < DIST_UNREACHABLE) {
|
||||
dMax[i] = -DIST_INFINITY; /* max depths currently negative */
|
||||
DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
|
||||
} else if (dMax[i] >= DIST_UNREACHABLE
|
||||
|| dMax[i] < -DIST_UNREACHABLE) {
|
||||
dMax[i] = -DIST_UNREACHABLE;
|
||||
DEBUG_PRINTF("bumping max to %d\n", dMax[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Convert the integer distance we use in our shortest path calculations
|
||||
* to a \ref depth value.
|
||||
*/
|
||||
static
|
||||
depth depthFromDistance(int val) {
|
||||
assert(val >= 0);
|
||||
if (val >= DIST_UNREACHABLE) {
|
||||
return depth::unreachable();
|
||||
} else if (val == DIST_INFINITY) {
|
||||
return depth::infinity();
|
||||
}
|
||||
return depth((u32)val);
|
||||
}
|
||||
|
||||
static
|
||||
DepthMinMax getDepths(u32 idx, const vector<int> &dMin,
|
||||
const vector<int> &dMax) {
|
||||
DepthMinMax d(depthFromDistance(dMin[idx]),
|
||||
depthFromDistance(-1 * dMax[idx]));
|
||||
DEBUG_PRINTF("idx=%u, depths=%s\n", idx, d.str().c_str());
|
||||
assert(d.min <= d.max);
|
||||
return d;
|
||||
}
|
||||
|
||||
template<class Graph, class Output>
|
||||
static
|
||||
void calcAndStoreDepth(const NGHolder &h, const Graph &g,
|
||||
const typename Graph::vertex_descriptor src,
|
||||
const vector<bool> &deadNodes,
|
||||
vector<int> &dMin /* util */,
|
||||
vector<int> &dMax /* util */,
|
||||
vector<Output> &depths,
|
||||
DepthMinMax Output::*store) {
|
||||
calcDepthFromSource(h, g, src, deadNodes, dMin, dMax);
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 idx = g[v].index;
|
||||
assert(idx < depths.size());
|
||||
Output &d = depths.at(idx);
|
||||
d.*store = getDepths(idx, dMin, dMax);
|
||||
}
|
||||
}
|
||||
|
||||
void calcDepths(const NGHolder &g, std::vector<NFAVertexDepth> &depths) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
const size_t numVertices = num_vertices(g);
|
||||
depths.clear();
|
||||
depths.resize(numVertices);
|
||||
|
||||
vector<int> dMin;
|
||||
vector<int> dMax;
|
||||
|
||||
/*
|
||||
* create a filtered graph for max depth calculations: all nodes/edges
|
||||
* reachable from a loop need to be removed
|
||||
*/
|
||||
vector<bool> deadNodes(numVertices);
|
||||
findLoopReachable(g.g, g.start, deadNodes);
|
||||
|
||||
DEBUG_PRINTF("doing start\n");
|
||||
calcAndStoreDepth(g, g.g, g.start, deadNodes, dMin, dMax,
|
||||
depths, &NFAVertexDepth::fromStart);
|
||||
DEBUG_PRINTF("doing startds\n");
|
||||
calcAndStoreDepth(g, g.g, g.startDs, deadNodes, dMin, dMax,
|
||||
depths, &NFAVertexDepth::fromStartDotStar);
|
||||
}
|
||||
|
||||
void calcDepths(const NGHolder &g, std::vector<NFAVertexRevDepth> &depths) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
const size_t numVertices = num_vertices(g);
|
||||
depths.clear();
|
||||
depths.resize(numVertices);
|
||||
|
||||
vector<int> dMin;
|
||||
vector<int> dMax;
|
||||
|
||||
/* reverse the graph before walking it */
|
||||
typedef reverse_graph<NFAGraph, const NFAGraph&> RevNFAGraph;
|
||||
const RevNFAGraph rg(g.g);
|
||||
|
||||
/*
|
||||
* create a filtered graph for max depth calculations: all nodes/edges
|
||||
* reachable from a loop need to be removed
|
||||
*/
|
||||
vector<bool> deadNodes(numVertices);
|
||||
findLoopReachable(rg, g.acceptEod, deadNodes);
|
||||
|
||||
DEBUG_PRINTF("doing accept\n");
|
||||
calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
|
||||
g, rg, g.accept, deadNodes, dMin, dMax, depths,
|
||||
&NFAVertexRevDepth::toAccept);
|
||||
DEBUG_PRINTF("doing accepteod\n");
|
||||
deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
|
||||
calcAndStoreDepth<RevNFAGraph, NFAVertexRevDepth>(
|
||||
g, rg, g.acceptEod, deadNodes, dMin, dMax, depths,
|
||||
&NFAVertexRevDepth::toAcceptEod);
|
||||
}
|
||||
|
||||
void calcDepths(const NGHolder &g, vector<NFAVertexBidiDepth> &depths) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
const size_t numVertices = num_vertices(g);
|
||||
depths.clear();
|
||||
depths.resize(numVertices);
|
||||
|
||||
vector<int> dMin;
|
||||
vector<int> dMax;
|
||||
|
||||
/*
|
||||
* create a filtered graph for max depth calculations: all nodes/edges
|
||||
* reachable from a loop need to be removed
|
||||
*/
|
||||
vector<bool> deadNodes(numVertices);
|
||||
findLoopReachable(g.g, g.start, deadNodes);
|
||||
|
||||
DEBUG_PRINTF("doing start\n");
|
||||
calcAndStoreDepth<NFAGraph, NFAVertexBidiDepth>(
|
||||
g, g.g, g.start, deadNodes, dMin, dMax, depths,
|
||||
&NFAVertexBidiDepth::fromStart);
|
||||
DEBUG_PRINTF("doing startds\n");
|
||||
calcAndStoreDepth<NFAGraph, NFAVertexBidiDepth>(
|
||||
g, g.g, g.startDs, deadNodes, dMin, dMax, depths,
|
||||
&NFAVertexBidiDepth::fromStartDotStar);
|
||||
|
||||
/* Now go backwards */
|
||||
typedef reverse_graph<NFAGraph, const NFAGraph&> RevNFAGraph;
|
||||
const RevNFAGraph rg(g.g);
|
||||
deadNodes.assign(numVertices, false);
|
||||
findLoopReachable(rg, g.acceptEod, deadNodes);
|
||||
|
||||
DEBUG_PRINTF("doing accept\n");
|
||||
calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
|
||||
g, rg, g.accept, deadNodes, dMin, dMax, depths,
|
||||
&NFAVertexBidiDepth::toAccept);
|
||||
DEBUG_PRINTF("doing accepteod\n");
|
||||
deadNodes[NODE_ACCEPT] = true; // Hide accept->acceptEod edge.
|
||||
calcAndStoreDepth<RevNFAGraph, NFAVertexBidiDepth>(
|
||||
g, rg, g.acceptEod, deadNodes, dMin, dMax, depths,
|
||||
&NFAVertexBidiDepth::toAcceptEod);
|
||||
}
|
||||
|
||||
void calcDepthsFrom(const NGHolder &g, const NFAVertex src,
|
||||
vector<DepthMinMax> &depths) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
const size_t numVertices = num_vertices(g);
|
||||
|
||||
vector<bool> deadNodes(numVertices);
|
||||
findLoopReachable(g.g, g.start, deadNodes);
|
||||
|
||||
vector<int> dMin, dMax;
|
||||
calcDepthFromSource(g, g.g, src, deadNodes, dMin, dMax);
|
||||
|
||||
depths.clear();
|
||||
depths.resize(numVertices);
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 idx = g[v].index;
|
||||
depths.at(idx) = getDepths(idx, dMin, dMax);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
95
src/nfagraph/ng_depth.h
Normal file
95
src/nfagraph/ng_depth.h
Normal file
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA graph vertex depth calculations.
|
||||
*/
|
||||
|
||||
#ifndef STRUCTURAL_ANALYSIS_H
|
||||
#define STRUCTURAL_ANALYSIS_H
|
||||
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/depth.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
/**
|
||||
* \brief Encapsulates min/max depths relative to the start and startDs
|
||||
* vertices.
|
||||
*/
|
||||
struct NFAVertexDepth {
|
||||
DepthMinMax fromStart;
|
||||
DepthMinMax fromStartDotStar;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Encapsulates min/max depths relative to the accept and acceptEod
|
||||
* vertices.
|
||||
*/
|
||||
struct NFAVertexRevDepth {
|
||||
DepthMinMax toAccept;
|
||||
DepthMinMax toAcceptEod;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Encapsulates min/max depths relative to all of our special vertices.
|
||||
*/
|
||||
struct NFAVertexBidiDepth : NFAVertexDepth, NFAVertexRevDepth {
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Calculate depths from start and startDs.
|
||||
* Fills the vector \p depths (indexed by \p vertex_index).
|
||||
*/
|
||||
void calcDepths(const NGHolder &g, std::vector<NFAVertexDepth> &depths);
|
||||
|
||||
/**
|
||||
* \brief Calculate depths to accept and acceptEod.
|
||||
* Fills the vector \p depths (indexed by \p vertex_index).
|
||||
*/
|
||||
void calcDepths(const NGHolder &g, std::vector<NFAVertexRevDepth> &depths);
|
||||
|
||||
/**
|
||||
* \brief Calculate depths to/from all special vertices.
|
||||
* Fills the vector \p depths (indexed by \p vertex_index).
|
||||
*/
|
||||
void calcDepths(const NGHolder &g, std::vector<NFAVertexBidiDepth> &depths);
|
||||
|
||||
/** Calculate the (min, max) depths from the given \p src to every vertex in
|
||||
* the graph and return them in a vector, indexed by \p vertex_index. */
|
||||
void calcDepthsFrom(const NGHolder &g, const NFAVertex src,
|
||||
std::vector<DepthMinMax> &depths);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
85
src/nfagraph/ng_dominators.cpp
Normal file
85
src/nfagraph/ng_dominators.cpp
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Calculate dominator and post-dominator trees.
|
||||
*
|
||||
* A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
|
||||
*/
|
||||
#include "ng_dominators.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <boost-patched/graph/dominator_tree.hpp> // locally patched version
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::make_assoc_property_map;
|
||||
using boost::make_iterator_property_map;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
template <class Graph>
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> calcDominators(const Graph &g,
|
||||
NFAVertex source) {
|
||||
const size_t num_verts = num_vertices(g);
|
||||
auto index_map = get(&NFAGraphVertexProps::index, g);
|
||||
|
||||
vector<size_t> dfnum(num_verts, 0);
|
||||
vector<NFAVertex> parents(num_verts, Graph::null_vertex());
|
||||
|
||||
auto dfnum_map = make_iterator_property_map(dfnum.begin(), index_map);
|
||||
auto parent_map = make_iterator_property_map(parents.begin(), index_map);
|
||||
vector<NFAVertex> vertices_by_dfnum(num_verts, Graph::null_vertex());
|
||||
|
||||
// Output map.
|
||||
unordered_map<NFAVertex, NFAVertex> doms;
|
||||
auto dom_map = make_assoc_property_map(doms);
|
||||
|
||||
boost_ue2::lengauer_tarjan_dominator_tree(g, source, index_map, dfnum_map,
|
||||
parent_map, vertices_by_dfnum,
|
||||
dom_map);
|
||||
|
||||
return doms;
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
return calcDominators(g.g, g.start);
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
return calcDominators(boost::reverse_graph<NFAGraph, const NFAGraph &>(g.g),
|
||||
g.acceptEod);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
51
src/nfagraph/ng_dominators.h
Normal file
51
src/nfagraph/ng_dominators.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Calculate dominator and post-dominator trees.
|
||||
*
|
||||
* A small wrapper around the BGL's lengauer_tarjan_dominator_tree algorithm.
|
||||
*/
|
||||
|
||||
#ifndef NG_DOMINATORS_H
|
||||
#define NG_DOMINATORS_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> findDominators(const NGHolder &g);
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> findPostDominators(const NGHolder &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_DOMINATORS_H
|
||||
454
src/nfagraph/ng_dump.cpp
Normal file
454
src/nfagraph/ng_dump.cpp
Normal file
@@ -0,0 +1,454 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Dump code for NFA graphs.
|
||||
*
|
||||
* The dump support in this file is for internal use only, and thus is not even
|
||||
* compiled in release builds, where DUMP_SUPPORT is not switched on.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "ng_dump.h"
|
||||
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "ng.h"
|
||||
#include "ng_util.h"
|
||||
#include "parser/position.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/accel.h"
|
||||
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
|
||||
#include "smallwrite/smallwrite_dump.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_internal.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/report.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "hs_compile.h" /* for HS_MODE_* flags */
|
||||
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void describeAssert(ostream &os, u32 flags) {
|
||||
#define DESCRIBE_ASSERT_CASE(x) case POS_FLAG_##x: s = #x; break
|
||||
while (flags) {
|
||||
const char *s;
|
||||
switch (1 << findAndClearLSB_32(&flags)) {
|
||||
DESCRIBE_ASSERT_CASE(NOFLOAT);
|
||||
DESCRIBE_ASSERT_CASE(MUST_FLOAT);
|
||||
DESCRIBE_ASSERT_CASE(FIDDLE_ACCEPT);
|
||||
DESCRIBE_ASSERT_CASE(VIRTUAL_START);
|
||||
DESCRIBE_ASSERT_CASE(MULTILINE_START);
|
||||
DESCRIBE_ASSERT_CASE(ASSERT_WORD_TO_WORD);
|
||||
DESCRIBE_ASSERT_CASE(ASSERT_WORD_TO_NONWORD);
|
||||
DESCRIBE_ASSERT_CASE(ASSERT_NONWORD_TO_WORD);
|
||||
DESCRIBE_ASSERT_CASE(ASSERT_NONWORD_TO_NONWORD);
|
||||
DESCRIBE_ASSERT_CASE(ASSERT_WORD_TO_WORD_UCP);
|
||||
DESCRIBE_ASSERT_CASE(ASSERT_WORD_TO_NONWORD_UCP);
|
||||
DESCRIBE_ASSERT_CASE(ASSERT_NONWORD_TO_WORD_UCP);
|
||||
DESCRIBE_ASSERT_CASE(ASSERT_NONWORD_TO_NONWORD_UCP);
|
||||
default:
|
||||
s = "unknown flag";
|
||||
}
|
||||
os << s << "\\n";
|
||||
}
|
||||
#undef DESCRIBE_ASSERT_CASE
|
||||
}
|
||||
|
||||
static
|
||||
void describeReport(ostream &os, const ReportID report,
|
||||
const ReportManager *rm) {
|
||||
if (!rm) {
|
||||
os << "\\nReport: " << report;
|
||||
} else {
|
||||
os << "\\nReport: " << report << " (";
|
||||
const Report &ir = rm->getReport(report);
|
||||
switch (ir.type) {
|
||||
case EXTERNAL_CALLBACK:
|
||||
os << "EXTERNAL " << ir.onmatch;
|
||||
if (ir.offsetAdjust) {
|
||||
os << " adj " << ir.offsetAdjust;
|
||||
}
|
||||
break;
|
||||
case EXTERNAL_CALLBACK_SOM_STORED:
|
||||
os << "SOM_STORED " << ir.somDistance;
|
||||
break;
|
||||
case EXTERNAL_CALLBACK_SOM_REL:
|
||||
os << "SOM_REL " << ir.somDistance;
|
||||
break;
|
||||
case EXTERNAL_CALLBACK_SOM_ABS:
|
||||
os << "SOM_ABS " << ir.somDistance;
|
||||
break;
|
||||
case EXTERNAL_CALLBACK_SOM_REV_NFA:
|
||||
os << "SOM_REV_NFA " << ir.revNfaIndex;
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_SET:
|
||||
os << "SOM_LOC_SET " << ir.onmatch;
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_SET_IF_UNSET:
|
||||
os << "SOM_LOC_SET_IF_UNSET " << ir.onmatch;
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
|
||||
os << "SOM_LOC_SET_IF_WRITABLE " << ir.onmatch;
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
|
||||
os << "SOM_LOC_SET_SOM_REV_NFA " << ir.onmatch << " nfa="
|
||||
<< ir.revNfaIndex;
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
|
||||
os << "SOM_LOC_SET_SOM_REV_NFA_IF_UNSET " << ir.onmatch << " nfa="
|
||||
<< ir.revNfaIndex;
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
|
||||
os << "SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE " << ir.onmatch
|
||||
<< " nfa=" << ir.revNfaIndex;
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_COPY:
|
||||
os << "SOM_LOC_COPY " << ir.somDistance << " to " << ir.onmatch;
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
|
||||
os << "SOM_LOC_COPY_IF_WRITABLE " << ir.somDistance
|
||||
<< " to " << ir.onmatch;
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_MAKE_WRITABLE:
|
||||
os << "SOM_LOC_MAKE_WRITABLE " << ir.onmatch;
|
||||
break;
|
||||
default:
|
||||
os << "no dump code!";
|
||||
break;
|
||||
}
|
||||
os << ")";
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <typename VertexT, typename EdgeT, typename GraphT>
|
||||
class NFAWriter {
|
||||
public:
|
||||
explicit NFAWriter(const GraphT &g_in) : g(g_in) {}
|
||||
|
||||
NFAWriter(const GraphT &g_in, const ReportManager &rm_in)
|
||||
: g(g_in), rm(&rm_in) {}
|
||||
|
||||
NFAWriter(const GraphT &g_in,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map_in)
|
||||
: g(g_in), region_map(®ion_map_in) {}
|
||||
|
||||
void operator()(ostream& os, const VertexT& v) const {
|
||||
u32 v_index = g[v].index;
|
||||
|
||||
os << "[";
|
||||
os << "fontsize=11, width=2, height=2, ";
|
||||
os << "label=\"" << v_index;
|
||||
os << "\\n";
|
||||
|
||||
if (is_special(v, g)) {
|
||||
switch (v_index) {
|
||||
case NODE_START:
|
||||
os << "START"; break;
|
||||
case NODE_START_DOTSTAR:
|
||||
os << "START-DS"; break;
|
||||
case NODE_ACCEPT:
|
||||
os << "ACCEPT"; break;
|
||||
case NODE_ACCEPT_EOD:
|
||||
os << "ACCEPT-EOD"; break;
|
||||
default:
|
||||
os << "UNKNOWN-SPECIAL"; break;
|
||||
}
|
||||
os << "\\n";
|
||||
} else {
|
||||
// If it's an assert vertex, then display its info.
|
||||
u32 assert_flags = g[v].assert_flags;
|
||||
if (assert_flags) {
|
||||
describeAssert(os, assert_flags);
|
||||
os << "\\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Dump character reachability (in brief).
|
||||
describeClass(os, g[v].char_reach, 5, CC_OUT_DOT);
|
||||
|
||||
for (const auto &report : g[v].reports) {
|
||||
describeReport(os, report, rm);
|
||||
}
|
||||
|
||||
os << "\",";
|
||||
|
||||
if (is_any_start(v, g)) {
|
||||
os << "shape=octagon,";
|
||||
}
|
||||
|
||||
os << "]";
|
||||
|
||||
// If we have a region map, use it to generate clusters.
|
||||
if (region_map) {
|
||||
auto region_id = region_map->at(v);
|
||||
os << "subgraph cluster_" << region_id << " { label=\"region "
|
||||
<< region_id << "\"; style=dashed;" << v_index << ";}";
|
||||
}
|
||||
}
|
||||
|
||||
void operator()(ostream& os, const EdgeT& e) const {
|
||||
// Edge label. Print priority.
|
||||
os << "[fontsize=9,label=\"";
|
||||
// If it's an edge from start, print top id.
|
||||
if (is_any_start(source(e, g), g) && !is_any_start(target(e, g), g)) {
|
||||
os << "TOP " << g[e].top << "\\n";
|
||||
}
|
||||
|
||||
// If it's an assert vertex, then display its info.
|
||||
int assert_flags = g[e].assert_flags;
|
||||
if (assert_flags) {
|
||||
os << "\\n";
|
||||
describeAssert(os, assert_flags);
|
||||
}
|
||||
|
||||
os << "\"]";
|
||||
}
|
||||
|
||||
private:
|
||||
const GraphT &g;
|
||||
const ReportManager *rm = nullptr;
|
||||
const ue2::unordered_map<NFAVertex, u32> *region_map = nullptr;
|
||||
};
|
||||
}
|
||||
|
||||
template <typename GraphT>
|
||||
void dumpGraphImpl(const char *name, const GraphT &g) {
|
||||
typedef typename boost::graph_traits<GraphT>::vertex_descriptor VertexT;
|
||||
typedef typename boost::graph_traits<GraphT>::edge_descriptor EdgeT;
|
||||
ofstream os(name);
|
||||
NFAWriter<VertexT, EdgeT, GraphT> writer(g);
|
||||
writeGraphviz(os, g, writer, get(&NFAGraphVertexProps::index, g));
|
||||
}
|
||||
|
||||
template <typename GraphT>
|
||||
void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm) {
|
||||
typedef typename boost::graph_traits<GraphT>::vertex_descriptor VertexT;
|
||||
typedef typename boost::graph_traits<GraphT>::edge_descriptor EdgeT;
|
||||
ofstream os(name);
|
||||
NFAWriter<VertexT, EdgeT, GraphT> writer(g, rm);
|
||||
writeGraphviz(os, g, writer, get(&NFAGraphVertexProps::index, g));
|
||||
}
|
||||
|
||||
template <typename GraphT>
|
||||
void dumpGraphImpl(const char *name, const GraphT &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
typedef typename boost::graph_traits<GraphT>::vertex_descriptor VertexT;
|
||||
typedef typename boost::graph_traits<GraphT>::edge_descriptor EdgeT;
|
||||
ofstream os(name);
|
||||
NFAWriter<VertexT, EdgeT, GraphT> writer(g, region_map);
|
||||
writeGraphviz(os, g, writer, get(&NFAGraphVertexProps::index, g));
|
||||
}
|
||||
|
||||
// manual instantiation of templated dumpGraph above.
|
||||
template void dumpGraphImpl(const char *, const NFAGraph &);
|
||||
|
||||
void dumpDotWrapperImpl(const NGWrapper &nw, const char *name,
|
||||
const Grey &grey) {
|
||||
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "Expr_" << nw.expressionIndex << "_" << name << ".dot";
|
||||
DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str());
|
||||
dumpGraphImpl(ss.str().c_str(), nw.g);
|
||||
}
|
||||
}
|
||||
|
||||
void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr,
|
||||
u32 comp, const Grey &grey) {
|
||||
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "Comp_" << expr << "-" << comp << "_"
|
||||
<< name << ".dot";
|
||||
DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str());
|
||||
dumpGraphImpl(ss.str().c_str(), g.g);
|
||||
}
|
||||
}
|
||||
|
||||
void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr,
|
||||
u32 comp, u32 plan, const Grey &grey) {
|
||||
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "Comp_" << expr << "-" << comp << "_"
|
||||
<< name << "_" << plan << ".dot";
|
||||
DEBUG_PRINTF("dumping dot graph to '%s'\n", ss.str().c_str());
|
||||
dumpGraphImpl(ss.str().c_str(), g.g);
|
||||
}
|
||||
}
|
||||
|
||||
void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
|
||||
const char *stageName, const Grey &grey) {
|
||||
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "Holder_X_" << stageNumber
|
||||
<< "-" << stageName << ".dot";
|
||||
dumpGraphImpl(ss.str().c_str(), h.g);
|
||||
}
|
||||
}
|
||||
|
||||
void dumpHolderImpl(const NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
unsigned int stageNumber, const char *stageName,
|
||||
const Grey &grey) {
|
||||
if (grey.dumpFlags & Grey::DUMP_INT_GRAPH) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "Holder_X_" << stageNumber
|
||||
<< "-" << stageName << ".dot";
|
||||
dumpGraphImpl(ss.str().c_str(), h.g, region_map);
|
||||
}
|
||||
}
|
||||
|
||||
void dumpSmallWrite(const RoseEngine *rose, const Grey &grey) {
|
||||
if (!grey.dumpFlags) {
|
||||
return;
|
||||
}
|
||||
|
||||
const struct SmallWriteEngine *smwr = getSmallWrite(rose);
|
||||
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "smallwrite.txt";
|
||||
|
||||
FILE *f = fopen(ss.str().c_str(), "w");
|
||||
smwrDumpText(smwr, f);
|
||||
fclose(f);
|
||||
|
||||
smwrDumpNFA(smwr, false, grey.dumpPath);
|
||||
}
|
||||
|
||||
static UNUSED
|
||||
const char *irTypeToString(u8 type) {
|
||||
#define IR_TYPE_CASE(x) case x: return #x
|
||||
switch (type) {
|
||||
IR_TYPE_CASE(EXTERNAL_CALLBACK);
|
||||
IR_TYPE_CASE(EXTERNAL_CALLBACK_SOM_REL);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_SET);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_IF_UNSET);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_IF_WRITABLE);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_SOM_REV_NFA);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_COPY);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_COPY_IF_WRITABLE);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_MAKE_WRITABLE);
|
||||
IR_TYPE_CASE(EXTERNAL_CALLBACK_SOM_STORED);
|
||||
IR_TYPE_CASE(EXTERNAL_CALLBACK_SOM_ABS);
|
||||
IR_TYPE_CASE(EXTERNAL_CALLBACK_SOM_REV_NFA);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_FROM);
|
||||
IR_TYPE_CASE(INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE);
|
||||
IR_TYPE_CASE(INTERNAL_ROSE_CHAIN);
|
||||
default: return "<unknown>";
|
||||
}
|
||||
#undef IR_TYPE_CASE
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int isReverseNfaReport(const Report &ri) {
|
||||
switch (ri.type) {
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
|
||||
case EXTERNAL_CALLBACK_SOM_REV_NFA:
|
||||
return 1;
|
||||
default:
|
||||
break; // fall through
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int isSomRelSetReport(const Report &ri) {
|
||||
switch (ri.type) {
|
||||
case INTERNAL_SOM_LOC_SET:
|
||||
case INTERNAL_SOM_LOC_SET_IF_UNSET:
|
||||
case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
|
||||
return 1;
|
||||
default:
|
||||
break; // fall through
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dumpReportManager(const ReportManager &rm, const Grey &grey) {
|
||||
if (!grey.dumpFlags) {
|
||||
return;
|
||||
}
|
||||
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "internal_reports.txt";
|
||||
FILE *f = fopen(ss.str().c_str(), "w");
|
||||
const vector<Report> &reports = rm.reports();
|
||||
for (u32 i = 0; i < reports.size(); i++) {
|
||||
const Report &ir = reports[i];
|
||||
fprintf(f, "int %u: %s onmatch: %u", i, irTypeToString(ir.type),
|
||||
ir.onmatch);
|
||||
|
||||
u32 dkey = rm.getDkey(ir);
|
||||
if (dkey != MO_INVALID_IDX) {
|
||||
fprintf(f, " dkey %u", dkey);
|
||||
}
|
||||
if (ir.ekey != MO_INVALID_IDX) {
|
||||
fprintf(f, " ekey %u", ir.ekey);
|
||||
}
|
||||
if (ir.hasBounds()) {
|
||||
fprintf(f, " hasBounds (minOffset=%llu, maxOffset=%llu, "
|
||||
"minLength=%llu)",
|
||||
ir.minOffset, ir.maxOffset, ir.minLength);
|
||||
}
|
||||
if (ir.offsetAdjust != 0) {
|
||||
fprintf(f, " offsetAdjust: %d", ir.offsetAdjust);
|
||||
}
|
||||
if (isReverseNfaReport(ir)) {
|
||||
fprintf(f, " reverse nfa: %u", ir.revNfaIndex);
|
||||
}
|
||||
if (isSomRelSetReport(ir)) {
|
||||
fprintf(f, " set, adjust: %lld", ir.somDistance);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
173
src/nfagraph/ng_dump.h
Normal file
173
src/nfagraph/ng_dump.h
Normal file
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Dump code for NFA graphs.
|
||||
*/
|
||||
|
||||
#ifndef NG_DUMP_H
|
||||
#define NG_DUMP_H
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h" // for graph types
|
||||
#include "ue2common.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
#include <fstream>
|
||||
#endif
|
||||
|
||||
struct RoseEngine;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class NG;
|
||||
class NGWrapper;
|
||||
class ReportManager;
|
||||
|
||||
// Implementations for stubs below -- all have the suffix "Impl".
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
template <typename GraphT>
|
||||
void dumpGraphImpl(const char *name, const GraphT &g);
|
||||
|
||||
template <typename GraphT>
|
||||
void dumpGraphImpl(const char *name, const GraphT &g, const ReportManager &rm);
|
||||
|
||||
void dumpDotWrapperImpl(const NGWrapper &w, const char *name, const Grey &grey);
|
||||
|
||||
void dumpComponentImpl(const NGHolder &g, const char *name, u32 expr, u32 comp,
|
||||
const Grey &grey);
|
||||
|
||||
void dumpSomSubComponentImpl(const NGHolder &g, const char *name, u32 expr,
|
||||
u32 comp, u32 plan, const Grey &grey);
|
||||
|
||||
void dumpHolderImpl(const NGHolder &h, unsigned int stageNumber,
|
||||
const char *stageName, const Grey &grey);
|
||||
|
||||
// Variant that takes a region map as well.
|
||||
void dumpHolderImpl(const NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
unsigned int stageNumber, const char *stageName,
|
||||
const Grey &grey);
|
||||
|
||||
template <typename GraphT>
|
||||
static inline void dumpGraph(UNUSED const char *name, UNUSED const GraphT &g) {
|
||||
dumpGraphImpl(name, g);
|
||||
}
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
|
||||
// Stubs which call through to dump code if compiled in.
|
||||
|
||||
UNUSED static inline
|
||||
void dumpDotWrapper(UNUSED const NGWrapper &w, UNUSED const char *name,
|
||||
UNUSED const Grey &grey) {
|
||||
#ifdef DUMP_SUPPORT
|
||||
dumpDotWrapperImpl(w, name, grey);
|
||||
#endif
|
||||
}
|
||||
|
||||
UNUSED static inline
|
||||
void dumpComponent(UNUSED const NGHolder &h, UNUSED const char *name,
|
||||
UNUSED u32 expr, UNUSED u32 comp, UNUSED const Grey &grey) {
|
||||
#ifdef DUMP_SUPPORT
|
||||
dumpComponentImpl(h, name, expr, comp, grey);
|
||||
#endif
|
||||
}
|
||||
|
||||
UNUSED static inline
|
||||
void dumpSomSubComponent(UNUSED const NGHolder &h, UNUSED const char *name,
|
||||
UNUSED u32 expr, UNUSED u32 comp, UNUSED u32 plan,
|
||||
UNUSED const Grey &grey) {
|
||||
#ifdef DUMP_SUPPORT
|
||||
dumpSomSubComponentImpl(h, name, expr, comp, plan, grey);
|
||||
#endif
|
||||
}
|
||||
|
||||
UNUSED static inline
|
||||
void dumpHolder(UNUSED const NGHolder &h, UNUSED unsigned int stageNumber,
|
||||
UNUSED const char *name, UNUSED const Grey &grey) {
|
||||
#ifdef DUMP_SUPPORT
|
||||
dumpHolderImpl(h, stageNumber, name, grey);
|
||||
#endif
|
||||
}
|
||||
|
||||
UNUSED static inline
|
||||
void dumpHolder(UNUSED const NGHolder &h,
|
||||
UNUSED const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
UNUSED unsigned int stageNumber, UNUSED const char *name,
|
||||
UNUSED const Grey &grey) {
|
||||
#ifdef DUMP_SUPPORT
|
||||
dumpHolderImpl(h, region_map, stageNumber, name, grey);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
void dumpReportManager(const ReportManager &rm, const Grey &grey);
|
||||
void dumpSmallWrite(const RoseEngine *rose, const Grey &grey);
|
||||
#else
|
||||
static UNUSED
|
||||
void dumpReportManager(const ReportManager &, const Grey &) {
|
||||
}
|
||||
static UNUSED
|
||||
void dumpSmallWrite(const RoseEngine *, const Grey &) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
// replace boost's graphviz writer
|
||||
template <typename GraphT, typename WriterT, typename VertexID>
|
||||
static void writeGraphviz(std::ostream &out, const GraphT &g, WriterT w,
|
||||
const VertexID &vertex_id) {
|
||||
const std::string delimiter(" -> ");
|
||||
out << "digraph G {" << std::endl;
|
||||
|
||||
typename boost::graph_traits<GraphT>::vertex_iterator i, end;
|
||||
for(boost::tie(i,end) = vertices(g); i != end; ++i) {
|
||||
out << get(vertex_id, *i);
|
||||
w(out, *i); // print vertex attributes
|
||||
out << ";" << std::endl;
|
||||
}
|
||||
typename boost::graph_traits<GraphT>::edge_iterator ei, edge_end;
|
||||
for(boost::tie(ei, edge_end) = edges(g); ei != edge_end; ++ei) {
|
||||
out << (get(vertex_id, source(*ei, g))) << delimiter
|
||||
<< (get(vertex_id, target(*ei, g))) << " ";
|
||||
w(out, *ei); // print edge attributes
|
||||
out << ";" << std::endl;
|
||||
}
|
||||
out << "}" << std::endl;
|
||||
}
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_DUMP_H
|
||||
517
src/nfagraph/ng_edge_redundancy.cpp
Normal file
517
src/nfagraph/ng_edge_redundancy.cpp
Normal file
@@ -0,0 +1,517 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Edge redundancy graph reductions.
|
||||
*/
|
||||
#include "ng_edge_redundancy.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "parser/position.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/* reverse edge redundancy removal is possible but is not implemented as it
|
||||
* regressed rose pattern support in the regression suite: 19026 - 19027
|
||||
* (foo.{1,5}b?ar)
|
||||
*
|
||||
* If rose becomes smarter we can reimplement.
|
||||
*/
|
||||
|
||||
static never_inline
|
||||
bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad,
|
||||
const set<NFAVertex> &happy) {
|
||||
/* need to check if for each vertex in sad if it has an edge to a happy
|
||||
* vertex */
|
||||
for (auto u : sad) {
|
||||
bool ok = false;
|
||||
for (auto v : adjacent_vertices_range(u, g)) {
|
||||
if (contains(happy, v)) {
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad,
|
||||
const set<NFAVertex> &happy) {
|
||||
/* need to check if for each vertex in sad if it has an edge to a happy
|
||||
* vertex */
|
||||
for (auto v : sad) {
|
||||
bool ok = false;
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (contains(happy, u)) {
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \brief Redundant self-loop removal.
|
||||
*
|
||||
* A self loop on a vertex v can be removed if:
|
||||
*
|
||||
* For every vertex u in pred(v) either:
|
||||
* 1: u has a self loop and cr(v) subset of cr(u)
|
||||
* OR
|
||||
* 2: u has an edge to vertex satisfying criterion 1
|
||||
*
|
||||
* Note: we remove all dead loops at the end of the pass and do not check the
|
||||
* live status of the loops we are depending on during the analysis.
|
||||
*
|
||||
* We don't end up in situations where we remove a group of loops which depend
|
||||
* on each other as:
|
||||
*
|
||||
* - there must be at least one vertex not in the group which is a pred of some
|
||||
* member of the group (as we don't remove loops on specials)
|
||||
*
|
||||
* For each pred vertex of the group:
|
||||
* - the vertex must be 'sad' as it is not part of the group
|
||||
* - therefore it must have edges to each member of the group (to happy, trans)
|
||||
* - therefore the group is enabled simultaneously
|
||||
* - due to internal group edges, all members will still be active after the
|
||||
* next character.
|
||||
*
|
||||
* Actually, the vertex redundancy code will merge the entire group into one
|
||||
* cyclic state.
|
||||
*/
|
||||
static
|
||||
bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) {
|
||||
unsigned dead_count = 0;
|
||||
|
||||
set<NFAVertex> happy;
|
||||
set<NFAVertex> sad;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g) || !hasSelfLoop(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const CharReach &cr_v = g[v].char_reach;
|
||||
|
||||
happy.clear();
|
||||
sad.clear();
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == v) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!hasSelfLoop(u, g)) {
|
||||
sad.insert(u);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ignore_starts) {
|
||||
if (u == g.startDs || is_virtual_start(u, g)) {
|
||||
sad.insert(u);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const CharReach &cr_u = g[u].char_reach;
|
||||
|
||||
if ((cr_u & cr_v) != cr_v) {
|
||||
sad.insert(u);
|
||||
continue;
|
||||
}
|
||||
|
||||
happy.insert(u);
|
||||
}
|
||||
|
||||
if (!happy.empty() && checkVerticesFwd(g, sad, happy)) {
|
||||
dead_count++;
|
||||
remove_edge(v, v, g);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("found %u removable edges.\n", dead_count);
|
||||
return dead_count;
|
||||
}
|
||||
|
||||
/** \brief Redundant self-loop removal (reverse version).
|
||||
*
|
||||
* A self loop on a vertex v can be removed if:
|
||||
*
|
||||
* For every vertex u in succ(v) either:
|
||||
* 1: u has a self loop and cr(v) is a subset of cr(u).
|
||||
* OR
|
||||
* 2: u is not an accept and u has an edge from a vertex satisfying
|
||||
* criterion 1.
|
||||
* OR
|
||||
* 3: u is in an accept and u has an edge from a vertex v' satisfying
|
||||
* criterion 1 and report(v) == report(v').
|
||||
*/
|
||||
static
|
||||
bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) {
|
||||
unsigned dead_count = 0;
|
||||
|
||||
set<NFAVertex> happy;
|
||||
set<NFAVertex> sad;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g) || !hasSelfLoop(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const CharReach &cr_v = g[v].char_reach;
|
||||
|
||||
happy.clear();
|
||||
sad.clear();
|
||||
|
||||
for (auto u : adjacent_vertices_range(v, g)) {
|
||||
if (u == v) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!hasSelfLoop(u, g)) {
|
||||
sad.insert(u);
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(!is_special(u, g));
|
||||
|
||||
const CharReach &cr_u = g[u].char_reach;
|
||||
|
||||
if (!cr_v.isSubsetOf(cr_u)) {
|
||||
sad.insert(u);
|
||||
continue;
|
||||
}
|
||||
|
||||
happy.insert(u);
|
||||
}
|
||||
|
||||
if (!happy.empty() && checkVerticesRev(g, sad, happy)) {
|
||||
dead_count++;
|
||||
remove_edge(v, v, g);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("found %u removable edges.\n", dead_count);
|
||||
return dead_count;
|
||||
}
|
||||
|
||||
static
|
||||
bool parentsSubsetOf(const NGHolder &g, NFAVertex v,
|
||||
const flat_set<NFAVertex> &other_parents, NFAVertex other,
|
||||
map<NFAVertex, bool> &done) {
|
||||
map<NFAVertex, bool>::const_iterator dit = done.find(v);
|
||||
if (dit != done.end()) {
|
||||
return dit->second;
|
||||
}
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == v && contains(other_parents, other)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!contains(other_parents, u)) {
|
||||
done[v] = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
done[v] = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src,
|
||||
const flat_set<NFAVertex> &fixed_parents,
|
||||
const NFAEdge &candidate,
|
||||
map<NFAVertex, bool> &done) {
|
||||
NFAVertex w = source(candidate, g);
|
||||
NFAVertex v = target(candidate, g);
|
||||
const CharReach &cr_w = g[w].char_reach;
|
||||
const CharReach &cr_u = g[fixed_src].char_reach;
|
||||
|
||||
/* There is no reason why self loops cannot be considered by this
|
||||
* transformation but the removal is already handled by many other
|
||||
* transformations. */
|
||||
if (w == v) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_special(w, g)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!cr_w.isSubsetOf(cr_u)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* check that each parent of w is also a parent of u */
|
||||
if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("edge (%u, %u) killed by edge (%u, %u)\n",
|
||||
g[w].index, g[v].index,
|
||||
g[fixed_src].index, g[v].index);
|
||||
return true;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
void checkLargeOutU(const NGHolder &g, NFAVertex u,
|
||||
const flat_set<NFAVertex> &parents_u,
|
||||
flat_set<NFAVertex> &possible_w,
|
||||
map<NFAVertex, bool> &done,
|
||||
set<NFAEdge> *dead) {
|
||||
/* only vertices with at least one parent in common with u need to be
|
||||
* considered, and we also only consider potential siblings with subset
|
||||
* reach. */
|
||||
possible_w.clear();
|
||||
const CharReach &cr_u = g[u].char_reach;
|
||||
for (auto p : parents_u) {
|
||||
for (auto v : adjacent_vertices_range(p, g)) {
|
||||
const CharReach &cr_w = g[v].char_reach;
|
||||
if (cr_w.isSubsetOf(cr_u)) {
|
||||
possible_w.insert(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there's only one, it's us, and we have no work to do.
|
||||
if (possible_w.size() <= 1) {
|
||||
assert(possible_w.empty() || *possible_w.begin() == u);
|
||||
return;
|
||||
}
|
||||
|
||||
for (const auto &e : out_edges_range(u, g)) {
|
||||
const NFAVertex v = target(e, g);
|
||||
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (contains(*dead, e)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Now need check to find any edges which can be removed due to the
|
||||
* existence of edge e */
|
||||
for (const auto &e2 : in_edges_range(v, g)) {
|
||||
if (e == e2 || contains(*dead, e2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const NFAVertex w = source(e2, g);
|
||||
if (!contains(possible_w, w)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (checkFwdCandidate(g, u, parents_u, e2, done)) {
|
||||
dead->insert(e2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static never_inline
|
||||
void checkSmallOutU(const NGHolder &g, NFAVertex u,
|
||||
const flat_set<NFAVertex> &parents_u,
|
||||
map<NFAVertex, bool> &done,
|
||||
set<NFAEdge> *dead) {
|
||||
for (const auto &e : out_edges_range(u, g)) {
|
||||
const NFAVertex v = target(e, g);
|
||||
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (contains(*dead, e)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Now need check to find any edges which can be removed due to the
|
||||
* existence of edge e */
|
||||
for (const auto &e2 : in_edges_range(v, g)) {
|
||||
if (e == e2 || contains(*dead, e2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (checkFwdCandidate(g, u, parents_u, e2, done)) {
|
||||
dead->insert(e2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Forward edge redundancy pass.
|
||||
*
|
||||
* An edge e from w to v is redundant if there exists an edge e' such that:
|
||||
* e' is from u to v
|
||||
* and: reach(w) is a subset of reach(u)
|
||||
* and: proper_pred(w) is a subset of pred(u)
|
||||
* and: self_loop(w) implies self_loop(u) or edge from (w to u)
|
||||
*
|
||||
* Note: edges to accepts also require report ID checks.
|
||||
*/
|
||||
static
|
||||
bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) {
|
||||
set<NFAEdge> dead;
|
||||
map<NFAVertex, bool> done;
|
||||
flat_set<NFAVertex> parents_u;
|
||||
flat_set<NFAVertex> possible_w;
|
||||
|
||||
for (auto u : vertices_range(g)) {
|
||||
if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
parents_u.clear();
|
||||
pred(g, u, &parents_u);
|
||||
|
||||
done.clear();
|
||||
if (hasGreaterOutDegree(1, u, g)) {
|
||||
checkLargeOutU(g, u, parents_u, possible_w, done, &dead);
|
||||
} else {
|
||||
checkSmallOutU(g, u, parents_u, done, &dead);
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size());
|
||||
remove_edges(dead, g);
|
||||
pruneUseless(g);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Entry point: Runs all the edge redundancy passes. If SoM is tracked,
|
||||
* don't consider startDs or virtual starts as cyclic vertices. */
|
||||
bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) {
|
||||
if (!cc.grey.removeEdgeRedundancy) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool changed = false;
|
||||
changed |= removeEdgeRedundancyNearCyclesFwd(g, som);
|
||||
changed |= removeEdgeRedundancyNearCyclesRev(g);
|
||||
changed |= removeEdgeRedundancyFwd(g, som);
|
||||
return changed;
|
||||
}
|
||||
|
||||
/** \brief Removes optional stuff from the front of floating patterns, since it's
|
||||
* redundant with startDs.
|
||||
*
|
||||
* For each successor of startDs, remove any in-edges that aren't from either
|
||||
* start or startDs. This allows us to prune redundant vertices at the start of
|
||||
* a pattern:
|
||||
*
|
||||
* /(hat)?stand --> /stand/
|
||||
*
|
||||
*/
|
||||
bool removeSiblingsOfStartDotStar(NGHolder &g) {
|
||||
vector<NFAEdge> dead;
|
||||
|
||||
for (auto v : adjacent_vertices_range(g.startDs, g)) {
|
||||
DEBUG_PRINTF("checking %u\n", g[v].index);
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
if (is_special(u, g)) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("removing %u->%u\n", g[u].index,
|
||||
g[v].index);
|
||||
dead.push_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("found %zu removable edges.\n", dead.size());
|
||||
remove_edges(dead, g);
|
||||
pruneUseless(g);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Removes all edges into virtual starts other than those from start/startDs,
|
||||
* providing there is an edge from startDs. This operation is an optimisation
|
||||
* for SOM mode. (see UE-1544) */
|
||||
bool optimiseVirtualStarts(NGHolder &g) {
|
||||
vector<NFAEdge> dead;
|
||||
for (auto v : adjacent_vertices_range(g.startDs, g)) {
|
||||
u32 flags = g[v].assert_flags;
|
||||
if (!(flags & POS_FLAG_VIRTUAL_START)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
if (!is_any_start(source(e, g), g)) {
|
||||
dead.push_back(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size());
|
||||
remove_edges(dead, g);
|
||||
pruneUseless(g);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
65
src/nfagraph/ng_edge_redundancy.h
Normal file
65
src/nfagraph/ng_edge_redundancy.h
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Edge redundancy graph reductions.
|
||||
*/
|
||||
#ifndef NG_EDGE_REDUNDANCY_H
|
||||
#define NG_EDGE_REDUNDANCY_H
|
||||
|
||||
#include "som/som.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
struct CompileContext;
|
||||
|
||||
/** \brief Entry point: Runs all the edge redundancy passes. */
|
||||
bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc);
|
||||
|
||||
/** \brief Removes optional stuff from the front of floating patterns, since
|
||||
* it's redundant with startDs.
|
||||
*
|
||||
* For each successor of startDs, remove any in-edges that aren't from either
|
||||
* start or startDs. This allows us to prune redundant vertices at the start of
|
||||
* a pattern:
|
||||
*
|
||||
* /(hat)?stand --> /stand/
|
||||
*
|
||||
*/
|
||||
bool removeSiblingsOfStartDotStar(NGHolder &g);
|
||||
|
||||
/** \brief Removes all edges into virtual starts other than those from
|
||||
* start/startDs, providing there is an edge from startDs.
|
||||
*
|
||||
* This operation is an optimisation for SOM mode. (see UE-1544) */
|
||||
bool optimiseVirtualStarts(NGHolder &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
695
src/nfagraph/ng_equivalence.cpp
Normal file
695
src/nfagraph/ng_equivalence.cpp
Normal file
@@ -0,0 +1,695 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Equivalence class graph reduction pass.
|
||||
*/
|
||||
|
||||
#include "ng_equivalence.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_depth.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/order_check.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <stack>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/ptr_container/ptr_vector.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::ptr_vector;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
enum EquivalenceType {
|
||||
LEFT_EQUIVALENCE = 0,
|
||||
RIGHT_EQUIVALENCE,
|
||||
MAX_EQUIVALENCE
|
||||
};
|
||||
|
||||
namespace {
|
||||
class VertexInfo;
|
||||
|
||||
// custom comparison functor for unordered_set and flat_set
|
||||
struct VertexInfoPtrCmp {
|
||||
// for flat_set
|
||||
bool operator()(const VertexInfo *a, const VertexInfo *b) const;
|
||||
// for unordered_set
|
||||
size_t operator()(const VertexInfo *a) const;
|
||||
};
|
||||
|
||||
/** Precalculated (and maintained) information about a vertex. */
|
||||
class VertexInfo {
|
||||
public:
|
||||
VertexInfo(NFAVertex v_in, const NGHolder &g)
|
||||
: v(v_in), vert_index(g[v].index), cr(g[v].char_reach), edge_top(~0),
|
||||
equivalence_class(~0), vertex_flags(g[v].assert_flags) {}
|
||||
|
||||
flat_set<VertexInfo *, VertexInfoPtrCmp> pred; //!< predecessors of this vertex
|
||||
flat_set<VertexInfo *, VertexInfoPtrCmp> succ; //!< successors of this vertex
|
||||
NFAVertex v;
|
||||
u32 vert_index;
|
||||
CharReach cr;
|
||||
CharReach pred_cr;
|
||||
CharReach succ_cr;
|
||||
unsigned edge_top;
|
||||
unsigned equivalence_class;
|
||||
unsigned vertex_flags;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
typedef ue2::unordered_set<VertexInfo *, VertexInfoPtrCmp> VertexInfoSet;
|
||||
typedef ue2::unordered_map<unsigned, VertexInfoSet> ClassMap;
|
||||
|
||||
// compare two vertex info pointers on their vertex index
|
||||
bool VertexInfoPtrCmp::operator()(const VertexInfo *a,
|
||||
const VertexInfo *b) const {
|
||||
return a->vert_index < b->vert_index;
|
||||
}
|
||||
// provide a "hash" for vertex info pointer by returning its vertex index
|
||||
size_t VertexInfoPtrCmp::operator()(const VertexInfo *a) const {
|
||||
return a->vert_index;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// to avoid traversing infomap each time we need to check the class during
|
||||
// partitioning, we will cache the information pertaining to a particular class
|
||||
class ClassInfo {
|
||||
public:
|
||||
struct ClassDepth {
|
||||
ClassDepth() {}
|
||||
ClassDepth(const NFAVertexDepth &d)
|
||||
: d1(d.fromStart), d2(d.fromStartDotStar) {}
|
||||
ClassDepth(const NFAVertexRevDepth &rd)
|
||||
: d1(rd.toAccept), d2(rd.toAcceptEod) {}
|
||||
DepthMinMax d1;
|
||||
DepthMinMax d2;
|
||||
};
|
||||
ClassInfo(const NGHolder &g, VertexInfo &vi, ClassDepth &d_in,
|
||||
EquivalenceType eq)
|
||||
: vertex_flags(vi.vertex_flags), edge_top(vi.edge_top), cr(vi.cr),
|
||||
depth(d_in) {
|
||||
|
||||
// hackety-hack!
|
||||
node_type = g[vi.v].index;
|
||||
if (node_type > N_SPECIALS) {
|
||||
// we treat all regular vertices the same
|
||||
node_type = N_SPECIALS;
|
||||
}
|
||||
|
||||
// get all the adjacent vertices' CharReach
|
||||
adjacent_cr = eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr;
|
||||
|
||||
if (eq == RIGHT_EQUIVALENCE) {
|
||||
rs = g[vi.v].reports;
|
||||
}
|
||||
}
|
||||
|
||||
bool operator<(const ClassInfo &b) const;
|
||||
|
||||
private:
|
||||
flat_set<ReportID> rs; /* for right equiv only */
|
||||
unsigned vertex_flags;
|
||||
u32 edge_top;
|
||||
CharReach cr;
|
||||
CharReach adjacent_cr;
|
||||
unsigned node_type;
|
||||
ClassDepth depth;
|
||||
};
|
||||
|
||||
// work queue class. this contraption has two goals:
|
||||
// 1. uniqueness of elements
|
||||
// 2. FILO operation
|
||||
class WorkQueue {
|
||||
public:
|
||||
explicit WorkQueue(unsigned c) {
|
||||
q.reserve(c);
|
||||
}
|
||||
// unique push
|
||||
void push(unsigned id) {
|
||||
if (ids.insert(id).second) {
|
||||
q.push_back(id);
|
||||
}
|
||||
}
|
||||
|
||||
// pop
|
||||
unsigned pop() {
|
||||
unsigned id = q.back();
|
||||
ids.erase(id);
|
||||
q.pop_back();
|
||||
return id;
|
||||
}
|
||||
|
||||
void append(WorkQueue &other) {
|
||||
for (const auto &e : other) {
|
||||
push(e);
|
||||
}
|
||||
}
|
||||
|
||||
void clear() {
|
||||
ids.clear();
|
||||
q.clear();
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return ids.empty();
|
||||
}
|
||||
|
||||
vector<unsigned>::const_iterator begin() const {
|
||||
return q.begin();
|
||||
}
|
||||
|
||||
vector<unsigned>::const_iterator end() const {
|
||||
return q.end();
|
||||
}
|
||||
|
||||
size_t capacity() const {
|
||||
return q.capacity();
|
||||
}
|
||||
private:
|
||||
set<unsigned> ids; //!< stores id's, for uniqueness
|
||||
vector<unsigned> q; //!< vector of id's that we use as FILO.
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
bool ClassInfo::operator<(const ClassInfo &b) const {
|
||||
const ClassInfo &a = *this;
|
||||
|
||||
ORDER_CHECK(node_type);
|
||||
ORDER_CHECK(depth.d1);
|
||||
ORDER_CHECK(depth.d2);
|
||||
ORDER_CHECK(cr);
|
||||
ORDER_CHECK(adjacent_cr);
|
||||
ORDER_CHECK(edge_top);
|
||||
ORDER_CHECK(vertex_flags);
|
||||
ORDER_CHECK(rs);
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
|
||||
unsigned nonSpecialVertices = 0;
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (!is_special(w, g) && w != v) {
|
||||
nonSpecialVertices++;
|
||||
}
|
||||
}
|
||||
return nonSpecialVertices == 1;
|
||||
}
|
||||
|
||||
static
|
||||
bool inIsIrreducible(NFAVertex &v, const NGHolder &g) {
|
||||
unsigned nonSpecialVertices = 0;
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (!is_special(u, g) && u != v) {
|
||||
nonSpecialVertices++;
|
||||
}
|
||||
}
|
||||
return nonSpecialVertices == 1;
|
||||
}
|
||||
|
||||
/** Cheaply check whether this graph can't be reduced at all, because it is
|
||||
* just a chain of vertices with no other edges. */
|
||||
static
|
||||
bool isIrreducible(const NGHolder &g) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
// skip specials
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// we want meaningful in_degree to be 1. we also want to make sure we
|
||||
// don't count self-loop + 1 incoming edge as not irreducible
|
||||
if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) {
|
||||
return false;
|
||||
}
|
||||
// we want meaningful out_degree to be 1. we also want to make sure we
|
||||
// don't count self-loop + 1 outgoing edge as not irreducible
|
||||
if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
static
|
||||
bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) {
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
if (g[e].assert_flags != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
if (g[e].assert_flags != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
// populate VertexInfo table
|
||||
static
|
||||
void getVertexInfos(const NGHolder &g, ptr_vector<VertexInfo> &infos) {
|
||||
vector<VertexInfo *> vertex_map; // indexed by vertex_index property
|
||||
vertex_map.resize(num_vertices(g));
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
VertexInfo *vi = new VertexInfo(v, g);
|
||||
|
||||
// insert our new shiny VertexInfo into the info map
|
||||
infos.push_back(vi);
|
||||
|
||||
vertex_map[g[v].index] = vi;
|
||||
}
|
||||
|
||||
// now, go through each vertex and populate its predecessor and successor lists
|
||||
for (VertexInfo &cur_vi : infos) {
|
||||
// find predecessors
|
||||
for (const auto &e : in_edges_range(cur_vi.v, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
VertexInfo *vmi = vertex_map[g[u].index];
|
||||
|
||||
cur_vi.pred_cr |= vmi->cr;
|
||||
cur_vi.pred.insert(vmi);
|
||||
|
||||
// also set up edge tops
|
||||
if (is_triggered(g) && u == g.start) {
|
||||
cur_vi.edge_top = g[e].top;
|
||||
}
|
||||
}
|
||||
|
||||
// find successors
|
||||
for (auto w : adjacent_vertices_range(cur_vi.v, g)) {
|
||||
VertexInfo *vmi = vertex_map[g[w].index];
|
||||
cur_vi.succ_cr |= vmi->cr;
|
||||
cur_vi.succ.insert(vmi);
|
||||
}
|
||||
assert(!hasEdgeAsserts(cur_vi.v, g));
|
||||
}
|
||||
}
|
||||
|
||||
// store equivalence class in VertexInfo for each vertex
|
||||
static
|
||||
void partitionGraph(ptr_vector<VertexInfo> &infos, ClassMap &classes,
|
||||
WorkQueue &work_queue, const NGHolder &g,
|
||||
EquivalenceType eq) {
|
||||
map<ClassInfo, unsigned> classinfomap;
|
||||
|
||||
// get distances from start (or accept) for all vertices
|
||||
// only one of them is used at a time, never both
|
||||
vector<NFAVertexDepth> depths;
|
||||
vector<NFAVertexRevDepth> rdepths;
|
||||
|
||||
if (eq == LEFT_EQUIVALENCE) {
|
||||
calcDepths(g, depths);
|
||||
} else {
|
||||
calcDepths(g, rdepths);
|
||||
}
|
||||
|
||||
// partition the graph based on CharReach
|
||||
for (VertexInfo &vi : infos) {
|
||||
ClassInfo::ClassDepth depth;
|
||||
|
||||
if (eq == LEFT_EQUIVALENCE) {
|
||||
depth = depths[vi.vert_index];
|
||||
} else {
|
||||
depth = rdepths[vi.vert_index];
|
||||
}
|
||||
ClassInfo ci(g, vi, depth, eq);
|
||||
|
||||
auto ii = classinfomap.find(ci);
|
||||
if (ii == classinfomap.end()) {
|
||||
unsigned new_class = classinfomap.size();
|
||||
vi.equivalence_class = new_class;
|
||||
|
||||
classinfomap[ci] = new_class;
|
||||
|
||||
// insert this vertex into the class map
|
||||
VertexInfoSet &vertices = classes[new_class];
|
||||
vertices.insert(&vi);
|
||||
} else {
|
||||
unsigned eq_class = ii->second;
|
||||
vi.equivalence_class = eq_class;
|
||||
|
||||
// insert this vertex into the class map
|
||||
VertexInfoSet &vertices = classes[eq_class];
|
||||
vertices.insert(&vi);
|
||||
|
||||
// we now know that this particular class has more than one
|
||||
// vertex, so we add it to the work queue
|
||||
work_queue.push(eq_class);
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("partitioned, %lu equivalence classes\n", classinfomap.size());
|
||||
}
|
||||
|
||||
// generalized equivalence processing (left and right)
|
||||
// basically, goes through every vertex in a class and checks if all successor or
|
||||
// predecessor classes match in all vertices. if classes mismatch, a vertex is
|
||||
// split into a separate class, along with all vertices having the same set of
|
||||
// successor/predecessor classes. the opposite side (successors for left
|
||||
// equivalence, predecessors for right equivalence) classes get revalidated in
|
||||
// case of a split.
|
||||
static
|
||||
void equivalence(ClassMap &classmap, WorkQueue &work_queue,
|
||||
EquivalenceType eq_type) {
|
||||
// now, go through the work queue until it's empty
|
||||
map<flat_set<unsigned>, VertexInfoSet> tentative_classmap;
|
||||
flat_set<unsigned> cur_classes;
|
||||
// local work queue, to store classes we want to revalidate in case of split
|
||||
WorkQueue reval_queue(work_queue.capacity());
|
||||
|
||||
while (!work_queue.empty()) {
|
||||
|
||||
// dequeue our class from the work queue
|
||||
unsigned cur_class = work_queue.pop();
|
||||
|
||||
// get all vertices in current equivalence class
|
||||
VertexInfoSet &cur_class_vertices = classmap[cur_class];
|
||||
|
||||
if (cur_class_vertices.size() < 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// clear data from previous iterations
|
||||
tentative_classmap.clear();
|
||||
|
||||
DEBUG_PRINTF("doing equivalence pass for class %u, %zd vertices\n",
|
||||
cur_class, cur_class_vertices.size());
|
||||
|
||||
// go through vertices in this class
|
||||
for (VertexInfo *vi : cur_class_vertices) {
|
||||
cur_classes.clear();
|
||||
|
||||
// get vertex lists for equivalence vertices and vertices for
|
||||
// revalidation in case of split
|
||||
const auto &eq_vertices =
|
||||
(eq_type == LEFT_EQUIVALENCE) ? vi->pred : vi->succ;
|
||||
const auto &reval_vertices =
|
||||
(eq_type == LEFT_EQUIVALENCE) ? vi->succ : vi->pred;
|
||||
|
||||
// go through equivalence and note the classes
|
||||
for (const VertexInfo *tmp : eq_vertices) {
|
||||
cur_classes.insert(tmp->equivalence_class);
|
||||
}
|
||||
|
||||
// note all the classes that need to be reevaluated
|
||||
for (const VertexInfo *tmp : reval_vertices) {
|
||||
reval_queue.push(tmp->equivalence_class);
|
||||
}
|
||||
|
||||
VertexInfoSet &tentative_classes = tentative_classmap[cur_classes];
|
||||
tentative_classes.insert(vi);
|
||||
}
|
||||
|
||||
// if we found more than one class, split and revalidate everything
|
||||
if (tentative_classmap.size() > 1) {
|
||||
auto tmi = tentative_classmap.begin();
|
||||
|
||||
// start from the second class
|
||||
for (++tmi; tmi != tentative_classmap.end(); ++tmi) {
|
||||
unsigned new_class = classmap.size();
|
||||
const VertexInfoSet &vertices_to_split = tmi->second;
|
||||
VertexInfoSet &new_class_vertices = classmap[new_class];
|
||||
|
||||
for (VertexInfo *vi : vertices_to_split) {
|
||||
vi->equivalence_class = new_class;
|
||||
cur_class_vertices.erase(vi);
|
||||
new_class_vertices.insert(vi);
|
||||
}
|
||||
if (tmi->first.find(cur_class) != tmi->first.end()) {
|
||||
reval_queue.push(new_class);
|
||||
}
|
||||
}
|
||||
work_queue.append(reval_queue);
|
||||
}
|
||||
reval_queue.clear();
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool require_separate_eod_vertex(const VertexInfoSet &vert_infos,
|
||||
const NGHolder &g) {
|
||||
/* We require separate eod and normal accept vertices for a class if we have
|
||||
* both normal accepts and eod accepts AND the reports are different for eod
|
||||
* and non-eod reports. */
|
||||
|
||||
flat_set<ReportID> non_eod;
|
||||
flat_set<ReportID> eod;
|
||||
|
||||
for (const VertexInfo *vi : vert_infos) {
|
||||
NFAVertex v = vi->v;
|
||||
|
||||
if (edge(v, g.accept, g).second) {
|
||||
insert(&non_eod, g[v].reports);
|
||||
}
|
||||
|
||||
if (edge(v, g.acceptEod, g).second) {
|
||||
insert(&eod, g[v].reports);
|
||||
}
|
||||
}
|
||||
|
||||
if (non_eod.empty() || eod.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return non_eod != eod;
|
||||
|
||||
}
|
||||
|
||||
static
|
||||
void mergeClass(ptr_vector<VertexInfo> &infos, NGHolder &g, unsigned eq_class,
|
||||
VertexInfoSet &cur_class_vertices, set<NFAVertex> *toRemove) {
|
||||
DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a "
|
||||
"single vertex.\n", cur_class_vertices.size(), eq_class);
|
||||
|
||||
// replace equivalence class with a single vertex:
|
||||
// 1. create new vertex with matching properties
|
||||
// 2. wire all predecessors to new vertex
|
||||
// 2a. update info for new vertex with new predecessors
|
||||
// 2b. update each predecessor's successor list
|
||||
// 3. wire all successors to new vertex
|
||||
// 3a. update info for new vertex with new successors
|
||||
// 3b. update each successor's predecessor list
|
||||
// 4. remove old vertex
|
||||
|
||||
// any differences between vertex properties were resolved during
|
||||
// initial partitioning, so we assume that every vertex in equivalence
|
||||
// class has the same CharReach et al.
|
||||
// so, we find the first vertex in our class and get all its properties
|
||||
|
||||
/* For left equivalence, if the members have different reporting behaviour
|
||||
* we sometimes require two vertices to be created (one connected to accept
|
||||
* and one to accepteod) */
|
||||
|
||||
NFAVertex old_v = (*cur_class_vertices.begin())->v;
|
||||
NFAVertex new_v = clone_vertex(g, old_v); /* set up new vertex with same
|
||||
* props */
|
||||
g[new_v].reports.clear(); /* populated as we pull in succs */
|
||||
|
||||
VertexInfo *new_vertex_info = new VertexInfo(new_v, g);
|
||||
// store this vertex in our global vertex list
|
||||
infos.push_back(new_vertex_info);
|
||||
|
||||
NFAVertex new_v_eod = NGHolder::null_vertex();
|
||||
VertexInfo *new_vertex_info_eod = nullptr;
|
||||
|
||||
if (require_separate_eod_vertex(cur_class_vertices, g)) {
|
||||
new_v_eod = clone_vertex(g, old_v);
|
||||
g[new_v_eod].reports.clear();
|
||||
new_vertex_info_eod = new VertexInfo(new_v_eod, g);
|
||||
infos.push_back(new_vertex_info_eod);
|
||||
}
|
||||
|
||||
const unsigned edgetop = (*cur_class_vertices.begin())->edge_top;
|
||||
for (VertexInfo *old_vertex_info : cur_class_vertices) {
|
||||
assert(old_vertex_info->equivalence_class == eq_class);
|
||||
|
||||
// mark this vertex for removal
|
||||
toRemove->insert(old_vertex_info->v);
|
||||
|
||||
// for each predecessor, add edge to new vertex and update info
|
||||
for (VertexInfo *pred_info : old_vertex_info->pred) {
|
||||
// update info for new vertex
|
||||
new_vertex_info->pred.insert(pred_info);
|
||||
if (new_vertex_info_eod) {
|
||||
new_vertex_info_eod->pred.insert(pred_info);
|
||||
}
|
||||
|
||||
// update info for predecessor
|
||||
pred_info->succ.erase(old_vertex_info);
|
||||
|
||||
// if edge doesn't exist, create it
|
||||
NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g).first;
|
||||
|
||||
// put edge top, if applicable
|
||||
if (edgetop != (unsigned) -1) {
|
||||
g[e].top = edgetop;
|
||||
}
|
||||
|
||||
pred_info->succ.insert(new_vertex_info);
|
||||
|
||||
if (new_v_eod) {
|
||||
NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod,
|
||||
g).first;
|
||||
|
||||
// put edge top, if applicable
|
||||
if (edgetop != (unsigned) -1) {
|
||||
g[ee].top = edgetop;
|
||||
}
|
||||
|
||||
pred_info->succ.insert(new_vertex_info_eod);
|
||||
}
|
||||
}
|
||||
|
||||
// for each successor, add edge from new vertex and update info
|
||||
for (VertexInfo *succ_info : old_vertex_info->succ) {
|
||||
NFAVertex succ_v = succ_info->v;
|
||||
|
||||
// update info for successor
|
||||
succ_info->pred.erase(old_vertex_info);
|
||||
|
||||
if (new_v_eod && succ_v == g.acceptEod) {
|
||||
// update info for new vertex
|
||||
new_vertex_info_eod->succ.insert(succ_info);
|
||||
insert(&g[new_v_eod].reports,
|
||||
g[old_vertex_info->v].reports);
|
||||
|
||||
add_edge_if_not_present(new_v_eod, succ_v, g);
|
||||
succ_info->pred.insert(new_vertex_info_eod);
|
||||
} else {
|
||||
// update info for new vertex
|
||||
new_vertex_info->succ.insert(succ_info);
|
||||
|
||||
// if edge doesn't exist, create it
|
||||
add_edge_if_not_present(new_v, succ_v, g);
|
||||
succ_info->pred.insert(new_vertex_info);
|
||||
|
||||
if (is_any_accept(succ_v, g)) {
|
||||
insert(&g[new_v].reports,
|
||||
g[old_vertex_info->v].reports);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update classmap
|
||||
new_vertex_info->equivalence_class = eq_class;
|
||||
cur_class_vertices.insert(new_vertex_info);
|
||||
}
|
||||
|
||||
// walk through vertices of an equivalence class and replace them with a single
|
||||
// vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the
|
||||
// report behaviour with a single vertex).
|
||||
static
|
||||
bool mergeEquivalentClasses(ClassMap &classmap, ptr_vector<VertexInfo> &infos,
|
||||
NGHolder &g) {
|
||||
bool merged = false;
|
||||
set<NFAVertex> toRemove;
|
||||
|
||||
// go through all classes and merge classes with more than one vertex
|
||||
for (auto &cm : classmap) {
|
||||
// get all vertices in current equivalence class
|
||||
unsigned eq_class = cm.first;
|
||||
VertexInfoSet &cur_class_vertices = cm.second;
|
||||
|
||||
// we don't care for single-vertex classes
|
||||
if (cur_class_vertices.size() > 1) {
|
||||
merged = true;
|
||||
mergeClass(infos, g, eq_class, cur_class_vertices, &toRemove);
|
||||
}
|
||||
}
|
||||
|
||||
// remove all dead vertices
|
||||
DEBUG_PRINTF("removing %zd vertices.\n", toRemove.size());
|
||||
remove_vertices(toRemove, g);
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
|
||||
if (!cc.grey.equivalenceEnable) {
|
||||
DEBUG_PRINTF("equivalence processing disabled in grey box\n");
|
||||
return false;
|
||||
}
|
||||
g.renumberVertices();
|
||||
|
||||
// Cheap check: if all the non-special vertices have in-degree one and
|
||||
// out-degree one, there's no redundancy in this here graph and we can
|
||||
// vamoose.
|
||||
if (isIrreducible(g)) {
|
||||
DEBUG_PRINTF("skipping equivalence processing, graph is irreducible\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// take note if we have merged any vertices
|
||||
bool merge = false;
|
||||
|
||||
for (int eqi = 0; eqi < MAX_EQUIVALENCE; ++eqi) {
|
||||
// map of all information pertaining a vertex
|
||||
ptr_vector<VertexInfo> infos;
|
||||
ClassMap classes;
|
||||
|
||||
// create a list of equivalence classes to check
|
||||
WorkQueue work_queue(num_vertices(g));
|
||||
EquivalenceType eq_type = (EquivalenceType) eqi;
|
||||
|
||||
// resize the vector, make room for twice the vertices we have
|
||||
infos.reserve(num_vertices(g) * 2);
|
||||
|
||||
// get information on every vertex in the graph
|
||||
// new vertices are allocated here, and stored in infos
|
||||
getVertexInfos(g, infos);
|
||||
|
||||
// partition the graph
|
||||
partitionGraph(infos, classes, work_queue, g, eq_type);
|
||||
|
||||
// do equivalence processing
|
||||
equivalence(classes, work_queue, eq_type);
|
||||
|
||||
// replace equivalent classes with single vertices
|
||||
// new vertices are (possibly) allocated here, and stored in infos
|
||||
merge |= mergeEquivalentClasses(classes, infos, g);
|
||||
}
|
||||
|
||||
return merge;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
47
src/nfagraph/ng_equivalence.h
Normal file
47
src/nfagraph/ng_equivalence.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Equivalence class graph reduction pass.
|
||||
*/
|
||||
|
||||
#ifndef NG_EQUIVALENCE_H_
|
||||
#define NG_EQUIVALENCE_H_
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
struct CompileContext;
|
||||
|
||||
/** Attempt to make the NFA graph \p g smaller by performing a number of local
|
||||
* transformations. */
|
||||
bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif /* NG_EQUIVALENCE_H_ */
|
||||
323
src/nfagraph/ng_execute.cpp
Normal file
323
src/nfagraph/ng_execute.cpp
Normal file
@@ -0,0 +1,323 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Execute an NFA over a given input, returning the set of states that
|
||||
* are active afterwards.
|
||||
*
|
||||
* Note: although our external interfaces for execute_graph() use std::set, we
|
||||
* use a dynamic bitset containing the vertex indices internally for
|
||||
* performance.
|
||||
*/
|
||||
#include "ng_execute.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::dynamic_bitset;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct StateInfo {
|
||||
StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {}
|
||||
StateInfo() : vertex(NFAGraph::null_vertex()) {}
|
||||
NFAVertex vertex;
|
||||
CharReach reach;
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
static
|
||||
std::string dumpStates(const dynamic_bitset<> &s) {
|
||||
std::ostringstream oss;
|
||||
for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) {
|
||||
oss << i << " ";
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void step(const NGHolder &g, const vector<StateInfo> &info,
|
||||
const dynamic_bitset<> &in, dynamic_bitset<> *out) {
|
||||
out->reset();
|
||||
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
||||
NFAVertex u = info[i].vertex;
|
||||
for (auto v : adjacent_vertices_range(u, g)) {
|
||||
out->set(g[v].index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states,
|
||||
const CharReach &cr) {
|
||||
for (size_t i = states->find_first(); i != states->npos;
|
||||
i = states->find_next(i)) {
|
||||
if ((info[i].reach & cr).none()) {
|
||||
states->reset(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename inputT>
|
||||
static
|
||||
void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
|
||||
const inputT &input, dynamic_bitset<> *states,
|
||||
bool kill_sds) {
|
||||
dynamic_bitset<> &curr = *states;
|
||||
dynamic_bitset<> next(curr.size());
|
||||
DEBUG_PRINTF("%zu states in\n", states->count());
|
||||
|
||||
for (const auto &e : input) {
|
||||
DEBUG_PRINTF("processing %s\n", describeClass(e).c_str());
|
||||
step(g, info, curr, &next);
|
||||
if (kill_sds) {
|
||||
next.reset(NODE_START_DOTSTAR);
|
||||
}
|
||||
filter_by_reach(info, &next, e);
|
||||
next.swap(curr);
|
||||
|
||||
if (curr.empty()) {
|
||||
DEBUG_PRINTF("went dead\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%zu states out\n", states->size());
|
||||
}
|
||||
|
||||
static
|
||||
void fillStateBitset(const NGHolder &g, const set<NFAVertex> &in,
|
||||
dynamic_bitset<> &out) {
|
||||
out.reset();
|
||||
for (auto v : in) {
|
||||
u32 idx = g[v].index;
|
||||
out.set(idx);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void fillVertexSet(const dynamic_bitset<> &in,
|
||||
const vector<StateInfo> &info, set<NFAVertex> &out) {
|
||||
out.clear();
|
||||
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
||||
out.insert(info[i].vertex);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void fillInfoTable(const NGHolder &g, vector<StateInfo> &info) {
|
||||
info.resize(num_vertices(g));
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 idx = g[v].index;
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
assert(idx < info.size());
|
||||
info[idx] = StateInfo(v, cr);
|
||||
}
|
||||
}
|
||||
|
||||
void execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||
set<NFAVertex> *states, bool kill_sds) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
vector<StateInfo> info;
|
||||
fillInfoTable(g, info);
|
||||
dynamic_bitset<> work_states(num_vertices(g));
|
||||
fillStateBitset(g, *states, work_states);
|
||||
|
||||
execute_graph_i(g, info, input, &work_states, kill_sds);
|
||||
|
||||
fillVertexSet(work_states, info, *states);
|
||||
}
|
||||
|
||||
void execute_graph(const NGHolder &g, const vector<CharReach> &input,
|
||||
set<NFAVertex> *states) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
vector<StateInfo> info;
|
||||
fillInfoTable(g, info);
|
||||
dynamic_bitset<> work_states(num_vertices(g));
|
||||
fillStateBitset(g, *states, work_states);
|
||||
|
||||
execute_graph_i(g, info, input, &work_states, false);
|
||||
|
||||
fillVertexSet(work_states, info, *states);
|
||||
}
|
||||
|
||||
typedef boost::reverse_graph<const NFAGraph, const NFAGraph &> RevNFAGraph;
|
||||
|
||||
namespace {
|
||||
class eg_visitor : public boost::default_dfs_visitor {
|
||||
public:
|
||||
eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in,
|
||||
const NGHolder &input_g_in,
|
||||
map<NFAVertex, dynamic_bitset<> > &states_in)
|
||||
: vertex_count(num_vertices(running_g_in)), running_g(running_g_in),
|
||||
info(info_in), input_g(input_g_in), states(states_in),
|
||||
succs(vertex_count) {}
|
||||
|
||||
void finish_vertex(NFAVertex input_v, const RevNFAGraph &) {
|
||||
if (input_v == input_g.accept) {
|
||||
return;
|
||||
}
|
||||
assert(input_v != input_g.acceptEod);
|
||||
|
||||
DEBUG_PRINTF("finished p%u\n", input_g[input_v].index);
|
||||
|
||||
/* finish vertex is called on vertex --> implies that all its parents
|
||||
* (in the forward graph) are also finished. Our parents will have
|
||||
* pushed all of their successors for us into our stateset. */
|
||||
states[input_v].resize(vertex_count);
|
||||
dynamic_bitset<> our_states = states[input_v];
|
||||
states[input_v].reset();
|
||||
|
||||
filter_by_reach(info, &our_states,
|
||||
input_g[input_v].char_reach);
|
||||
|
||||
if (input_v != input_g.startDs &&
|
||||
edge(input_v, input_v, input_g).second) {
|
||||
bool changed;
|
||||
do {
|
||||
DEBUG_PRINTF("actually not finished -> have self loop\n");
|
||||
succs.reset();
|
||||
step(running_g, info, our_states, &succs);
|
||||
filter_by_reach(info, &succs,
|
||||
input_g[input_v].char_reach);
|
||||
dynamic_bitset<> our_states2 = our_states | succs;
|
||||
changed = our_states2 != our_states;
|
||||
our_states.swap(our_states2);
|
||||
} while (changed);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF(" active rstates: %s\n", dumpStates(our_states).c_str());
|
||||
|
||||
succs.reset();
|
||||
step(running_g, info, our_states, &succs);
|
||||
|
||||
/* we need to push into all our (forward) children their successors
|
||||
* from us. */
|
||||
for (auto v : adjacent_vertices_range(input_v, input_g)) {
|
||||
DEBUG_PRINTF("pushing our states to pstate %u\n",
|
||||
input_g[v].index);
|
||||
if (v == input_g.startDs) {
|
||||
/* no need for intra start edges */
|
||||
continue;
|
||||
}
|
||||
|
||||
states[v].resize(vertex_count); // May not yet exist
|
||||
|
||||
if (v != input_g.accept) {
|
||||
states[v] |= succs;
|
||||
} else {
|
||||
/* accept is a magical pseudo state which does not consume
|
||||
* characters and we are using to collect the output states. We
|
||||
* must fill it with our states rather than our succs. */
|
||||
DEBUG_PRINTF("prev outputted rstates: %s\n",
|
||||
dumpStates(states[v]).c_str());
|
||||
DEBUG_PRINTF("outputted rstates: %s\n",
|
||||
dumpStates(our_states).c_str());
|
||||
|
||||
states[v] |= our_states;
|
||||
|
||||
DEBUG_PRINTF("new outputted rstates: %s\n",
|
||||
dumpStates(states[v]).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
/* note: the states at this vertex are no longer required */
|
||||
}
|
||||
|
||||
private:
|
||||
const size_t vertex_count;
|
||||
const NGHolder &running_g;
|
||||
const vector<StateInfo> &info;
|
||||
const NGHolder &input_g;
|
||||
map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of
|
||||
states in running_g */
|
||||
dynamic_bitset<> succs; // temp use internally
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
||||
const set<NFAVertex> &input_start_states,
|
||||
set<NFAVertex> *states) {
|
||||
DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
|
||||
num_vertices(running_g), num_vertices(input_dag));
|
||||
assert(hasCorrectlyNumberedVertices(running_g));
|
||||
assert(in_degree(input_dag.acceptEod, input_dag) == 1);
|
||||
|
||||
map<NFAVertex, boost::default_color_type> colours;
|
||||
/* could just a topo order, but really it is time to pull a slightly bigger
|
||||
* gun: DFS */
|
||||
RevNFAGraph revg(input_dag.g);
|
||||
map<NFAVertex, dynamic_bitset<> > dfs_states;
|
||||
|
||||
vector<StateInfo> info;
|
||||
fillInfoTable(running_g, info);
|
||||
dynamic_bitset<> input_fs(num_vertices(running_g));
|
||||
fillStateBitset(running_g, *states, input_fs);
|
||||
|
||||
for (auto v : input_start_states) {
|
||||
dfs_states[v] = input_fs;
|
||||
}
|
||||
|
||||
depth_first_visit(revg, input_dag.accept,
|
||||
eg_visitor(running_g, info, input_dag, dfs_states),
|
||||
make_assoc_property_map(colours));
|
||||
|
||||
fillVertexSet(dfs_states[input_dag.accept], info, *states);
|
||||
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF(" output rstates:");
|
||||
for (auto v : *states) {
|
||||
printf(" %u", running_g[v].index);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
||||
set<NFAVertex> *states) {
|
||||
set<NFAVertex> input_start_states = {input_dag.start, input_dag.startDs};
|
||||
execute_graph(running_g, input_dag, input_start_states, states);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
67
src/nfagraph/ng_execute.h
Normal file
67
src/nfagraph/ng_execute.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Execute an NFA over a given input, returning the set of states that
|
||||
* are active afterwards.
|
||||
*/
|
||||
|
||||
#ifndef NG_EXECUTE_H
|
||||
#define NG_EXECUTE_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
struct ue2_literal;
|
||||
|
||||
void execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||
std::set<NFAVertex> *states, bool kill_sds = false);
|
||||
|
||||
void execute_graph(const NGHolder &g, const std::vector<CharReach> &input,
|
||||
std::set<NFAVertex> *states);
|
||||
|
||||
/** on exit, states contains any state which may still be enabled after
|
||||
* receiving an input which corresponds to some path through the input_dag from
|
||||
* start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
|
||||
*/
|
||||
void execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||
std::set<NFAVertex> *states);
|
||||
|
||||
/* as above, but able to specify the source states for the input graph */
|
||||
void execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||
const std::set<NFAVertex> &input_start_states,
|
||||
std::set<NFAVertex> *states);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
155
src/nfagraph/ng_expr_info.cpp
Normal file
155
src/nfagraph/ng_expr_info.cpp
Normal file
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Code for discovering properties of an NGWrapper used by
|
||||
* hs_expression_info.
|
||||
*/
|
||||
#include "ng_expr_info.h"
|
||||
|
||||
#include "hs_internal.h"
|
||||
#include "ng.h"
|
||||
#include "ng_asserts.h"
|
||||
#include "ng_depth.h"
|
||||
#include "ng_edge_redundancy.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "parser/position.h" // for POS flags
|
||||
#include "util/boundary_reports.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/report_manager.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/* get rid of leading \b and multiline ^ vertices */
|
||||
static
|
||||
void removeLeadingVirtualVerticesFromRoot(NGWrapper &w, NFAVertex root) {
|
||||
vector<NFAVertex> victims;
|
||||
|
||||
for (auto v : adjacent_vertices_range(root, w)) {
|
||||
if (w[v].assert_flags & POS_FLAG_VIRTUAL_START) {
|
||||
DEBUG_PRINTF("(?m)^ vertex or leading \\[bB] vertex\n");
|
||||
victims.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto u : victims) {
|
||||
for (auto v : adjacent_vertices_range(u, w)) {
|
||||
add_edge_if_not_present(root, v, w);
|
||||
}
|
||||
}
|
||||
|
||||
remove_vertices(victims, w);
|
||||
}
|
||||
|
||||
static
|
||||
void checkVertex(const ReportManager &rm, const NGWrapper &w, NFAVertex v,
|
||||
const vector<DepthMinMax> &depths, DepthMinMax &info) {
|
||||
if (is_any_accept(v, w)) {
|
||||
return;
|
||||
}
|
||||
if (is_any_start(v, w)) {
|
||||
info.min = 0;
|
||||
info.max = max(info.max, depth(0));
|
||||
return;
|
||||
}
|
||||
|
||||
u32 idx = w[v].index;
|
||||
assert(idx < depths.size());
|
||||
const DepthMinMax &d = depths.at(idx);
|
||||
|
||||
for (ReportID report_id : w[v].reports) {
|
||||
const Report &ir = rm.getReport(report_id);
|
||||
assert(ir.type == EXTERNAL_CALLBACK);
|
||||
s32 adjust = ir.offsetAdjust;
|
||||
info.min = min(info.min, d.min + adjust);
|
||||
info.max = max(info.max, d.max + adjust);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool hasOffsetAdjust(const ReportManager &rm, const NGWrapper &w) {
|
||||
for (const auto &report_id : all_reports(w)) {
|
||||
if (rm.getReport(report_id).offsetAdjust) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info) {
|
||||
assert(info);
|
||||
|
||||
/* ensure utf8 starts at cp boundary */
|
||||
ensureCodePointStart(rm, w);
|
||||
resolveAsserts(rm, w);
|
||||
optimiseVirtualStarts(w);
|
||||
|
||||
removeLeadingVirtualVerticesFromRoot(w, w.start);
|
||||
removeLeadingVirtualVerticesFromRoot(w, w.startDs);
|
||||
|
||||
vector<DepthMinMax> depths;
|
||||
calcDepthsFrom(w, w.start, depths);
|
||||
|
||||
DepthMinMax d;
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(w.accept, w)) {
|
||||
checkVertex(rm, w, u, depths, d);
|
||||
}
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(w.acceptEod, w)) {
|
||||
checkVertex(rm, w, u, depths, d);
|
||||
}
|
||||
|
||||
if (d.max.is_finite()) {
|
||||
info->max_width = d.max;
|
||||
} else {
|
||||
info->max_width = UINT_MAX;
|
||||
}
|
||||
if (d.min.is_finite()) {
|
||||
info->min_width = d.min;
|
||||
} else {
|
||||
info->min_width = UINT_MAX;
|
||||
}
|
||||
|
||||
info->unordered_matches = hasOffsetAdjust(rm, w);
|
||||
info->matches_at_eod = can_match_at_eod(w);
|
||||
info->matches_only_at_eod = can_only_match_at_eod(w);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
50
src/nfagraph/ng_expr_info.h
Normal file
50
src/nfagraph/ng_expr_info.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Code for discovering properties of an NGWrapper used by
|
||||
* hs_expression_info.
|
||||
*/
|
||||
|
||||
#ifndef NG_EXPR_INFO_H
|
||||
#define NG_EXPR_INFO_H
|
||||
|
||||
struct hs_expr_info;
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGWrapper;
|
||||
class ReportManager;
|
||||
|
||||
void fillExpressionInfo(ReportManager &rm, NGWrapper &w, hs_expr_info *info);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_EXPR_INFO_H
|
||||
878
src/nfagraph/ng_extparam.cpp
Normal file
878
src/nfagraph/ng_extparam.cpp
Normal file
@@ -0,0 +1,878 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Propagate extended parameters to vertex reports and reduce graph if
|
||||
* possible.
|
||||
*
|
||||
* This code handles the propagation of the extension parameters specified by
|
||||
* the user with the hs_expr_ext structure into the reports on the graph's
|
||||
* vertices.
|
||||
*
|
||||
* There are also some analyses that prune edges that cannot contribute to a
|
||||
* match given these constraints, or transform the graph in order to make a
|
||||
* constraint implicit.
|
||||
*/
|
||||
#include "ng.h"
|
||||
#include "ng_depth.h"
|
||||
#include "ng_dump.h"
|
||||
#include "ng_extparam.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_som_util.h"
|
||||
#include "ng_width.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "parser/position.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static const u32 MAX_MAXOFFSET_TO_ANCHOR = 2000;
|
||||
static const u32 MAX_MINLENGTH_TO_CONVERT = 2000;
|
||||
|
||||
/** \brief Find the (min, max) offset adjustment for the reports on a given
|
||||
* vertex. */
|
||||
static
|
||||
pair<s32,s32> getMinMaxOffsetAdjust(const ReportManager &rm,
|
||||
const NGHolder &g, NFAVertex v) {
|
||||
s32 minAdj = 0, maxAdj = 0;
|
||||
const auto &reports = g[v].reports;
|
||||
for (auto ri = reports.begin(), re = reports.end(); ri != re; ++ri) {
|
||||
const Report &ir = rm.getReport(*ri);
|
||||
if (ri == reports.begin()) {
|
||||
minAdj = ir.offsetAdjust;
|
||||
maxAdj = ir.offsetAdjust;
|
||||
} else {
|
||||
minAdj = min(minAdj, ir.offsetAdjust);
|
||||
maxAdj = max(maxAdj, ir.offsetAdjust);
|
||||
}
|
||||
}
|
||||
|
||||
return make_pair(minAdj, maxAdj);
|
||||
}
|
||||
|
||||
/** \brief Find the (min, max) length of any match for the given holder. */
|
||||
static
|
||||
DepthMinMax findMatchLengths(const ReportManager &rm, const NGHolder &g) {
|
||||
DepthMinMax match_depths;
|
||||
|
||||
vector<DepthMinMax> depths = getDistancesFromSOM(g);
|
||||
|
||||
pair<s32, s32> adj;
|
||||
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
u32 idx = g[v].index;
|
||||
DepthMinMax d = depths[idx]; // copy
|
||||
adj = getMinMaxOffsetAdjust(rm, g, v);
|
||||
DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
|
||||
d.str().c_str(), adj.first, adj.second);
|
||||
d.min += adj.first;
|
||||
d.max += adj.second;
|
||||
match_depths = unionDepthMinMax(match_depths, d);
|
||||
}
|
||||
|
||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
if (v == g.accept) {
|
||||
continue;
|
||||
}
|
||||
u32 idx = g[v].index;
|
||||
DepthMinMax d = depths[idx]; // copy
|
||||
adj = getMinMaxOffsetAdjust(rm, g, v);
|
||||
DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
|
||||
d.str().c_str(), adj.first, adj.second);
|
||||
d.min += adj.first;
|
||||
d.max += adj.second;
|
||||
match_depths = unionDepthMinMax(match_depths, d);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("match_depths=%s\n", match_depths.str().c_str());
|
||||
|
||||
assert(match_depths.min.is_reachable());
|
||||
assert(match_depths.max.is_reachable());
|
||||
return match_depths;
|
||||
}
|
||||
|
||||
/** \brief Replace the graph's reports with new reports that specify bounds. */
|
||||
static
|
||||
void updateReportBounds(ReportManager &rm, NGWrapper &g, NFAVertex accept,
|
||||
set<NFAVertex> &done) {
|
||||
for (auto v : inv_adjacent_vertices_range(accept, g)) {
|
||||
// Don't operate on g.accept itself.
|
||||
if (v == g.accept) {
|
||||
assert(accept == g.acceptEod);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Don't operate on a vertex we've already done.
|
||||
if (contains(done, v)) {
|
||||
continue;
|
||||
}
|
||||
done.insert(v);
|
||||
|
||||
flat_set<ReportID> new_reports;
|
||||
auto &reports = g[v].reports;
|
||||
|
||||
for (auto id : reports) {
|
||||
Report ir = rm.getReport(id); // make a copy
|
||||
assert(!ir.hasBounds());
|
||||
|
||||
// Note that we need to cope with offset adjustment here.
|
||||
|
||||
ir.minOffset = g.min_offset - ir.offsetAdjust;
|
||||
if (g.max_offset == MAX_OFFSET) {
|
||||
ir.maxOffset = MAX_OFFSET;
|
||||
} else {
|
||||
ir.maxOffset = g.max_offset - ir.offsetAdjust;
|
||||
}
|
||||
assert(ir.maxOffset >= ir.minOffset);
|
||||
|
||||
ir.minLength = g.min_length;
|
||||
if (g.min_length && !g.som) {
|
||||
ir.quashSom = true;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("id %u -> min_offset=%llu, max_offset=%llu, "
|
||||
"min_length=%llu\n",
|
||||
id, ir.minOffset, ir.maxOffset, ir.minLength);
|
||||
new_reports.insert(rm.getInternalId(ir));
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("swapping reports on vertex %u\n",
|
||||
g[v].index);
|
||||
reports.swap(new_reports);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool hasVirtualStarts(const NGHolder &g) {
|
||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||
if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** If the pattern is unanchored, has a max_offset and has not asked for SOM,
|
||||
* we can use that knowledge to anchor it which will limit its lifespan. Note
|
||||
* that we can't use this transformation if there's a min_length, as it's
|
||||
* currently handled using "sly SOM".
|
||||
*
|
||||
* Note that it is possible to handle graphs that have a combination of
|
||||
* anchored and unanchored paths, but it's too tricky for the moment.
|
||||
*/
|
||||
static
|
||||
bool anchorPatternWithBoundedRepeat(NGWrapper &g, const depth &minWidth,
|
||||
const depth &maxWidth) {
|
||||
assert(!g.som);
|
||||
assert(g.max_offset != MAX_OFFSET);
|
||||
assert(minWidth <= maxWidth);
|
||||
assert(maxWidth.is_reachable());
|
||||
|
||||
DEBUG_PRINTF("widths=[%s,%s], min/max offsets=[%llu,%llu]\n",
|
||||
minWidth.str().c_str(), maxWidth.str().c_str(), g.min_offset,
|
||||
g.max_offset);
|
||||
|
||||
if (g.max_offset > MAX_MAXOFFSET_TO_ANCHOR) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g.max_offset < minWidth) {
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the pattern has virtual starts, we probably don't want to touch it.
|
||||
if (hasVirtualStarts(g)) {
|
||||
DEBUG_PRINTF("virtual starts, bailing\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Similarly, bail if the pattern is vacuous. TODO: this could be done, we
|
||||
// would just need to be a little careful with reports.
|
||||
if (isVacuous(g)) {
|
||||
DEBUG_PRINTF("vacuous, bailing\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 min_bound, max_bound;
|
||||
if (maxWidth.is_infinite()) {
|
||||
min_bound = 0;
|
||||
max_bound = g.max_offset - minWidth;
|
||||
} else {
|
||||
min_bound = g.min_offset > maxWidth ? g.min_offset - maxWidth : 0;
|
||||
max_bound = g.max_offset - minWidth;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("prepending ^.{%u,%u}\n", min_bound, max_bound);
|
||||
|
||||
vector<NFAVertex> initials;
|
||||
for (auto v : adjacent_vertices_range(g.startDs, g)) {
|
||||
if (v == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
initials.push_back(v);
|
||||
}
|
||||
if (initials.empty()) {
|
||||
DEBUG_PRINTF("no initial vertices\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Wire up 'min_offset' mandatory dots from anchored start.
|
||||
NFAVertex u = g.start;
|
||||
for (u32 i = 0; i < min_bound; i++) {
|
||||
NFAVertex v = add_vertex(g);
|
||||
g[v].char_reach.setall();
|
||||
add_edge(u, v, g);
|
||||
u = v;
|
||||
}
|
||||
|
||||
NFAVertex head = u;
|
||||
|
||||
// Wire up optional dots for (max_offset - min_offset).
|
||||
for (u32 i = 0; i < max_bound - min_bound; i++) {
|
||||
NFAVertex v = add_vertex(g);
|
||||
g[v].char_reach.setall();
|
||||
if (head != u) {
|
||||
add_edge(head, v, g);
|
||||
}
|
||||
add_edge(u, v, g);
|
||||
u = v;
|
||||
}
|
||||
|
||||
// Remove edges from starts and wire both head and u to our initials.
|
||||
for (auto v : initials) {
|
||||
remove_edge(g.startDs, v, g);
|
||||
remove_edge(g.start, v, g);
|
||||
|
||||
if (head != u) {
|
||||
add_edge(head, v, g);
|
||||
}
|
||||
add_edge(u, v, g);
|
||||
}
|
||||
|
||||
g.renumberVertices();
|
||||
g.renumberEdges();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
NFAVertex findSingleCyclic(const NGHolder &g) {
|
||||
NFAVertex v = NFAGraph::null_vertex();
|
||||
for (const auto &e : edges_range(g)) {
|
||||
if (source(e, g) == target(e, g)) {
|
||||
if (source(e, g) == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
if (v != NFAGraph::null_vertex()) {
|
||||
// More than one cyclic vertex.
|
||||
return NFAGraph::null_vertex();
|
||||
}
|
||||
v = source(e, g);
|
||||
}
|
||||
}
|
||||
|
||||
if (v != NFAGraph::null_vertex()) {
|
||||
DEBUG_PRINTF("cyclic is %u\n", g[v].index);
|
||||
assert(!is_special(v, g));
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
static
|
||||
bool hasOffsetAdjust(const ReportManager &rm, NGWrapper &g,
|
||||
int *adjust) {
|
||||
const auto &reports = all_reports(g);
|
||||
if (reports.empty()) {
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
int offsetAdjust = rm.getReport(*reports.begin()).offsetAdjust;
|
||||
for (auto report : reports) {
|
||||
const Report &ir = rm.getReport(report);
|
||||
if (ir.offsetAdjust != offsetAdjust) {
|
||||
DEBUG_PRINTF("different adjusts!\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
*adjust = offsetAdjust;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** If the pattern has a min_length and is of "ratchet" form with one unbounded
|
||||
* repeat, that repeat can become a bounded repeat.
|
||||
*
|
||||
* /foo.*bar/{min_length=100} --> /foo.{94,}bar/
|
||||
*/
|
||||
static
|
||||
bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
|
||||
assert(g.min_length);
|
||||
|
||||
if (g.min_length > MAX_MINLENGTH_TO_CONVERT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the pattern has virtual starts, we probably don't want to touch it.
|
||||
if (hasVirtualStarts(g)) {
|
||||
DEBUG_PRINTF("virtual starts, bailing\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// The graph must contain a single cyclic vertex (other than startDs), and
|
||||
// that vertex can have one pred and one successor.
|
||||
NFAVertex cyclic = findSingleCyclic(g);
|
||||
if (cyclic == NFAGraph::null_vertex()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
NFAGraph::adjacency_iterator ai, ae;
|
||||
tie(ai, ae) = adjacent_vertices(g.start, g);
|
||||
if (*ai == g.startDs) {
|
||||
++ai;
|
||||
}
|
||||
NFAVertex v = *ai;
|
||||
if (++ai != ae) {
|
||||
DEBUG_PRINTF("more than one initial vertex\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 width = 0;
|
||||
|
||||
|
||||
// Walk from the start vertex to the cyclic state and ensure we have a
|
||||
// chain of vertices.
|
||||
while (v != cyclic) {
|
||||
DEBUG_PRINTF("vertex %u\n", g[v].index);
|
||||
width++;
|
||||
tie(ai, ae) = adjacent_vertices(v, g);
|
||||
set<NFAVertex> succ(ai, ae);
|
||||
if (contains(succ, cyclic)) {
|
||||
if (succ.size() == 1) {
|
||||
v = cyclic;
|
||||
} else if (succ.size() == 2) {
|
||||
// Cyclic and jump edge.
|
||||
succ.erase(cyclic);
|
||||
NFAVertex v2 = *succ.begin();
|
||||
if (!edge(cyclic, v2, g).second) {
|
||||
DEBUG_PRINTF("bad form\n");
|
||||
return false;
|
||||
}
|
||||
v = cyclic;
|
||||
} else {
|
||||
DEBUG_PRINTF("bad form\n");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (succ.size() != 1) {
|
||||
DEBUG_PRINTF("bad form\n");
|
||||
return false;
|
||||
}
|
||||
v = *succ.begin();
|
||||
}
|
||||
}
|
||||
|
||||
// Check the cyclic state is A-OK.
|
||||
v = getSoleDestVertex(g, cyclic);
|
||||
if (v == NFAGraph::null_vertex()) {
|
||||
DEBUG_PRINTF("cyclic has more than one successor\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Walk from the cyclic state to an accept and ensure we have a chain of
|
||||
// vertices.
|
||||
while (!is_any_accept(v, g)) {
|
||||
DEBUG_PRINTF("vertex %u\n", g[v].index);
|
||||
width++;
|
||||
tie(ai, ae) = adjacent_vertices(v, g);
|
||||
set<NFAVertex> succ(ai, ae);
|
||||
if (succ.size() != 1) {
|
||||
DEBUG_PRINTF("bad form\n");
|
||||
return false;
|
||||
}
|
||||
v = *succ.begin();
|
||||
}
|
||||
|
||||
int offsetAdjust = 0;
|
||||
if (!hasOffsetAdjust(rm, g, &offsetAdjust)) {
|
||||
return false;
|
||||
}
|
||||
DEBUG_PRINTF("adjusting width by %d\n", offsetAdjust);
|
||||
width += offsetAdjust;
|
||||
|
||||
DEBUG_PRINTF("width=%u, vertex %u is cyclic\n", width,
|
||||
g[cyclic].index);
|
||||
|
||||
if (width >= g.min_length) {
|
||||
DEBUG_PRINTF("min_length=%llu is guaranteed, as width=%u\n",
|
||||
g.min_length, width);
|
||||
g.min_length = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
vector<NFAVertex> preds;
|
||||
vector<NFAEdge> dead;
|
||||
for (auto u : inv_adjacent_vertices_range(cyclic, g)) {
|
||||
DEBUG_PRINTF("pred %u\n", g[u].index);
|
||||
if (u == cyclic) {
|
||||
continue;
|
||||
}
|
||||
preds.push_back(u);
|
||||
|
||||
// We want to delete the out-edges of each predecessor, but need to
|
||||
// make sure we don't delete the startDs self loop.
|
||||
for (const auto &e : out_edges_range(u, g)) {
|
||||
if (target(e, g) != g.startDs) {
|
||||
dead.push_back(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
remove_edges(dead, g);
|
||||
|
||||
assert(!preds.empty());
|
||||
|
||||
const CharReach &cr = g[cyclic].char_reach;
|
||||
|
||||
for (u32 i = 0; i < g.min_length - width - 1; ++i) {
|
||||
v = add_vertex(g);
|
||||
g[v].char_reach = cr;
|
||||
|
||||
for (auto u : preds) {
|
||||
add_edge(u, v, g);
|
||||
}
|
||||
preds.clear();
|
||||
preds.push_back(v);
|
||||
}
|
||||
assert(!preds.empty());
|
||||
for (auto u : preds) {
|
||||
add_edge(u, cyclic, g);
|
||||
}
|
||||
|
||||
g.renumberVertices();
|
||||
g.renumberEdges();
|
||||
clearReports(g);
|
||||
|
||||
g.min_length = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool hasExtParams(const NGWrapper &g) {
|
||||
if (g.min_length != 0) {
|
||||
return true;
|
||||
}
|
||||
if (g.min_offset != 0) {
|
||||
return true;
|
||||
}
|
||||
if (g.max_offset != MAX_OFFSET) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
depth maxDistFromStart(const NFAVertexBidiDepth &d) {
|
||||
if (!d.fromStartDotStar.max.is_unreachable()) {
|
||||
// A path from startDs, any path, implies we can match at any offset.
|
||||
return depth::infinity();
|
||||
}
|
||||
return d.fromStart.max;
|
||||
}
|
||||
|
||||
static
|
||||
const depth& maxDistToAccept(const NFAVertexBidiDepth &d) {
|
||||
if (d.toAccept.max.is_unreachable()) {
|
||||
return d.toAcceptEod.max;
|
||||
} else if (d.toAcceptEod.max.is_unreachable()) {
|
||||
return d.toAccept.max;
|
||||
}
|
||||
return max(d.toAccept.max, d.toAcceptEod.max);
|
||||
}
|
||||
|
||||
static
|
||||
const depth& minDistFromStart(const NFAVertexBidiDepth &d) {
|
||||
return min(d.fromStartDotStar.min, d.fromStart.min);
|
||||
}
|
||||
|
||||
static
|
||||
const depth& minDistToAccept(const NFAVertexBidiDepth &d) {
|
||||
return min(d.toAccept.min, d.toAcceptEod.min);
|
||||
}
|
||||
|
||||
static
|
||||
bool isEdgePrunable(const NGWrapper &g,
|
||||
const vector<NFAVertexBidiDepth> &depths,
|
||||
const NFAEdge &e) {
|
||||
const NFAVertex u = source(e, g);
|
||||
const NFAVertex v = target(e, g);
|
||||
|
||||
DEBUG_PRINTF("edge (%u,%u)\n", g[u].index,
|
||||
g[v].index);
|
||||
|
||||
// Leave our special-to-special edges alone.
|
||||
if (is_special(u, g) && is_special(v, g)) {
|
||||
DEBUG_PRINTF("ignoring special-to-special\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// We must be careful around start: we don't want to remove (start, v) if
|
||||
// (startDs, v) exists as well, since later code will assume the presence
|
||||
// of both edges, but other cases are OK.
|
||||
if (u == g.start && edge(g.startDs, v, g).second) {
|
||||
DEBUG_PRINTF("ignoring unanchored start edge\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 u_idx = g[u].index;
|
||||
u32 v_idx = g[v].index;
|
||||
assert(u_idx < depths.size() && v_idx < depths.size());
|
||||
|
||||
const NFAVertexBidiDepth &du = depths.at(u_idx);
|
||||
const NFAVertexBidiDepth &dv = depths.at(v_idx);
|
||||
|
||||
if (g.min_offset) {
|
||||
depth max_offset = maxDistFromStart(du) + maxDistToAccept(dv);
|
||||
if (max_offset.is_finite() && max_offset < g.min_offset) {
|
||||
DEBUG_PRINTF("max_offset=%s too small\n", max_offset.str().c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (g.max_offset != MAX_OFFSET) {
|
||||
depth min_offset = minDistFromStart(du) + minDistToAccept(dv);
|
||||
assert(min_offset.is_finite());
|
||||
|
||||
if (min_offset > g.max_offset) {
|
||||
DEBUG_PRINTF("min_offset=%s too large\n", min_offset.str().c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (g.min_length && is_any_accept(v, g)) {
|
||||
// Simple take on min_length. If we're an edge to accept and our max
|
||||
// dist from start is too small, we can be pruned.
|
||||
const depth &width = du.fromStart.max;
|
||||
if (width.is_finite() && width < g.min_length) {
|
||||
DEBUG_PRINTF("max width %s from start too small for min_length\n",
|
||||
width.str().c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
void pruneExtUnreachable(NGWrapper &g) {
|
||||
vector<NFAVertexBidiDepth> depths;
|
||||
calcDepths(g, depths);
|
||||
|
||||
vector<NFAEdge> dead;
|
||||
|
||||
for (const auto &e : edges_range(g)) {
|
||||
if (isEdgePrunable(g, depths, e)) {
|
||||
DEBUG_PRINTF("pruning\n");
|
||||
dead.push_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
remove_edges(dead, g);
|
||||
pruneUseless(g);
|
||||
}
|
||||
|
||||
/** Remove vacuous edges in graphs where the min_offset or min_length
|
||||
* constraints dictate that they can never produce a match. */
|
||||
static
|
||||
void pruneVacuousEdges(NGWrapper &g) {
|
||||
if (!g.min_length && !g.min_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<NFAEdge> dead;
|
||||
|
||||
for (const auto &e : edges_range(g)) {
|
||||
const NFAVertex u = source(e, g);
|
||||
const NFAVertex v = target(e, g);
|
||||
|
||||
// Special case: Crudely remove vacuous edges from start in graphs with a
|
||||
// min_offset.
|
||||
if (g.min_offset && u == g.start && is_any_accept(v, g)) {
|
||||
DEBUG_PRINTF("vacuous edge in graph with min_offset!\n");
|
||||
dead.push_back(e);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If a min_length is set, vacuous edges can be removed.
|
||||
if (g.min_length && is_any_start(u, g) && is_any_accept(v, g)) {
|
||||
DEBUG_PRINTF("vacuous edge in graph with min_length!\n");
|
||||
dead.push_back(e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
remove_edges(dead, g);
|
||||
pruneUseless(g);
|
||||
}
|
||||
|
||||
static
|
||||
void pruneUnmatchable(NGWrapper &g, const vector<DepthMinMax> &depths,
|
||||
const ReportManager &rm, NFAVertex accept) {
|
||||
vector<NFAEdge> dead;
|
||||
|
||||
for (const auto &e : in_edges_range(accept, g)) {
|
||||
NFAVertex v = source(e, g);
|
||||
if (v == g.accept) {
|
||||
assert(accept == g.acceptEod); // stylised edge
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 idx = g[v].index;
|
||||
DepthMinMax d = depths[idx]; // copy
|
||||
pair<s32, s32> adj = getMinMaxOffsetAdjust(rm, g, v);
|
||||
DEBUG_PRINTF("vertex %u: depths=%s, adj=[%d,%d]\n", idx,
|
||||
d.str().c_str(), adj.first, adj.second);
|
||||
d.min += adj.first;
|
||||
d.max += adj.second;
|
||||
|
||||
if (d.max.is_finite() && d.max < g.min_length) {
|
||||
DEBUG_PRINTF("prune, max match length %s < min_length=%llu\n",
|
||||
d.max.str().c_str(), g.min_length);
|
||||
dead.push_back(e);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (g.max_offset != MAX_OFFSET && d.min > g.max_offset) {
|
||||
DEBUG_PRINTF("prune, min match length %s > max_offset=%llu\n",
|
||||
d.min.str().c_str(), g.max_offset);
|
||||
dead.push_back(e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
remove_edges(dead, g);
|
||||
}
|
||||
|
||||
/** Remove edges to accepts that can never produce a match long enough to
|
||||
* satisfy our min_length and max_offset constraints. */
|
||||
static
|
||||
void pruneUnmatchable(NGWrapper &g, const ReportManager &rm) {
|
||||
if (!g.min_length) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<DepthMinMax> depths = getDistancesFromSOM(g);
|
||||
|
||||
pruneUnmatchable(g, depths, rm, g.accept);
|
||||
pruneUnmatchable(g, depths, rm, g.acceptEod);
|
||||
|
||||
pruneUseless(g);
|
||||
}
|
||||
|
||||
static
|
||||
bool isUnanchored(const NGHolder &g) {
|
||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||
if (!edge(g.startDs, v, g).second) {
|
||||
DEBUG_PRINTF("fail, %u is anchored vertex\n",
|
||||
g[v].index);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool hasOffsetAdjustments(const ReportManager &rm, const NGHolder &g) {
|
||||
for (auto report : all_reports(g)) {
|
||||
const Report &ir = rm.getReport(report);
|
||||
if (ir.offsetAdjust) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void handleExtendedParams(ReportManager &rm, NGWrapper &g,
|
||||
UNUSED const CompileContext &cc) {
|
||||
if (!hasExtParams(g)) {
|
||||
return;
|
||||
}
|
||||
|
||||
depth minWidth = findMinWidth(g);
|
||||
depth maxWidth = findMaxWidth(g);
|
||||
bool is_anchored = !has_proper_successor(g.startDs, g)
|
||||
&& out_degree(g.start, g);
|
||||
bool has_offset_adj = hasOffsetAdjustments(rm, g);
|
||||
|
||||
DEBUG_PRINTF("minWidth=%s, maxWidth=%s, anchored=%d, offset_adj=%d\n",
|
||||
minWidth.str().c_str(), maxWidth.str().c_str(), is_anchored,
|
||||
has_offset_adj);
|
||||
|
||||
DepthMinMax match_depths = findMatchLengths(rm, g);
|
||||
DEBUG_PRINTF("match depths %s\n", match_depths.str().c_str());
|
||||
|
||||
if (is_anchored && maxWidth.is_finite() && g.min_offset > maxWidth) {
|
||||
ostringstream oss;
|
||||
oss << "Expression is anchored and cannot satisfy min_offset="
|
||||
<< g.min_offset << " as it can only produce matches of length "
|
||||
<< maxWidth << " bytes at most.";
|
||||
throw CompileError(g.expressionIndex, oss.str());
|
||||
}
|
||||
|
||||
if (minWidth > g.max_offset) {
|
||||
ostringstream oss;
|
||||
oss << "Expression has max_offset=" << g.max_offset << " but requires "
|
||||
<< minWidth << " bytes to match.";
|
||||
throw CompileError(g.expressionIndex, oss.str());
|
||||
}
|
||||
|
||||
if (maxWidth.is_finite() && match_depths.max < g.min_length) {
|
||||
ostringstream oss;
|
||||
oss << "Expression has min_length=" << g.min_length << " but can "
|
||||
"only produce matches of length " << match_depths.max <<
|
||||
" bytes at most.";
|
||||
throw CompileError(g.expressionIndex, oss.str());
|
||||
}
|
||||
|
||||
if (g.min_length && g.min_length <= match_depths.min) {
|
||||
DEBUG_PRINTF("min_length=%llu constraint is unnecessary\n",
|
||||
g.min_length);
|
||||
g.min_length = 0;
|
||||
}
|
||||
|
||||
if (!hasExtParams(g)) {
|
||||
return;
|
||||
}
|
||||
|
||||
pruneVacuousEdges(g);
|
||||
pruneUnmatchable(g, rm);
|
||||
|
||||
if (!has_offset_adj) {
|
||||
pruneExtUnreachable(g);
|
||||
}
|
||||
|
||||
// We may have removed all the edges to accept, in which case this
|
||||
// expression cannot match.
|
||||
if (in_degree(g.accept, g) == 0 && in_degree(g.acceptEod, g) == 1) {
|
||||
throw CompileError(g.expressionIndex, "Extended parameter "
|
||||
"constraints can not be satisfied for any match from "
|
||||
"this expression.");
|
||||
}
|
||||
|
||||
// Remove reports on vertices without an edge to accept (which have been
|
||||
// pruned above).
|
||||
clearReports(g);
|
||||
|
||||
// Recalc.
|
||||
minWidth = findMinWidth(g);
|
||||
maxWidth = findMaxWidth(g);
|
||||
is_anchored = proper_out_degree(g.startDs, g) == 0 &&
|
||||
out_degree(g.start, g);
|
||||
has_offset_adj = hasOffsetAdjustments(rm, g);
|
||||
|
||||
// If the pattern is completely anchored and has a min_length set, this can
|
||||
// be converted to a min_offset.
|
||||
if (g.min_length && (g.min_offset <= g.min_length) && is_anchored) {
|
||||
DEBUG_PRINTF("converting min_length to min_offset=%llu for "
|
||||
"anchored case\n", g.min_length);
|
||||
g.min_offset = g.min_length;
|
||||
g.min_length = 0;
|
||||
}
|
||||
|
||||
if (g.min_offset && g.min_offset <= minWidth && !has_offset_adj) {
|
||||
DEBUG_PRINTF("min_offset=%llu constraint is unnecessary\n",
|
||||
g.min_offset);
|
||||
g.min_offset = 0;
|
||||
}
|
||||
|
||||
if (!hasExtParams(g)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the pattern has a min_length and is of "ratchet" form with one
|
||||
// unbounded repeat, that repeat can become a bounded repeat.
|
||||
// e.g. /foo.*bar/{min_length=100} --> /foo.{94,}bar/
|
||||
if (g.min_length && transformMinLengthToRepeat(rm, g)) {
|
||||
DEBUG_PRINTF("converted min_length to bounded repeat\n");
|
||||
// recalc
|
||||
minWidth = findMinWidth(g);
|
||||
}
|
||||
|
||||
// If the pattern is unanchored, has a max_offset and has not asked for
|
||||
// SOM, we can use that knowledge to anchor it which will limit its
|
||||
// lifespan. Note that we can't use this transformation if there's a
|
||||
// min_length, as it's currently handled using "sly SOM".
|
||||
|
||||
// Note that it is possible to handle graphs that have a combination of
|
||||
// anchored and unanchored paths, but it's too tricky for the moment.
|
||||
|
||||
if (g.max_offset != MAX_OFFSET && !g.som && !g.min_length &&
|
||||
!has_offset_adj && isUnanchored(g)) {
|
||||
if (anchorPatternWithBoundedRepeat(g, minWidth, maxWidth)) {
|
||||
DEBUG_PRINTF("minWidth=%s, maxWidth=%s\n", minWidth.str().c_str(),
|
||||
maxWidth.str().c_str());
|
||||
if (minWidth == maxWidth) {
|
||||
// For a fixed width pattern, we can retire the offsets as they
|
||||
// are implicit in the graph now.
|
||||
g.min_offset = 0;
|
||||
g.max_offset = MAX_OFFSET;
|
||||
}
|
||||
}
|
||||
}
|
||||
//dumpGraph("final.dot", g.g);
|
||||
|
||||
if (!hasExtParams(g)) {
|
||||
return;
|
||||
}
|
||||
|
||||
set<NFAVertex> done;
|
||||
updateReportBounds(rm, g, g.accept, done);
|
||||
updateReportBounds(rm, g, g.acceptEod, done);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
48
src/nfagraph/ng_extparam.h
Normal file
48
src/nfagraph/ng_extparam.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Propagate extended parameters to vertex reports and reduce graph if
|
||||
* possible.
|
||||
*/
|
||||
|
||||
#ifndef NG_EXTPARAM_H
|
||||
#define NG_EXTPARAM_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CompileContext;
|
||||
class NGWrapper;
|
||||
class ReportManager;
|
||||
|
||||
void handleExtendedParams(ReportManager &rm, NGWrapper &g,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
142
src/nfagraph/ng_fixed_width.cpp
Normal file
142
src/nfagraph/ng_fixed_width.cpp
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose mask construction from NGHolder.
|
||||
*/
|
||||
#include "ng_fixed_width.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "util/container.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool findMask(const NGHolder &g, vector<CharReach> *mask, bool *anchored,
|
||||
ue2::flat_set<ReportID> *reports) {
|
||||
DEBUG_PRINTF("looking for a mask pattern\n");
|
||||
set<NFAVertex> s_succ;
|
||||
insert(&s_succ, adjacent_vertices(g.start, g));
|
||||
|
||||
set<NFAVertex> sds_succ;
|
||||
insert(&sds_succ, adjacent_vertices(g.startDs, g));
|
||||
|
||||
*anchored = sds_succ.size() == 1; /* sds itself */
|
||||
bool floating = is_subset_of(s_succ, sds_succ);
|
||||
|
||||
DEBUG_PRINTF("sds %zu s %zu%s%s\n", sds_succ.size(), s_succ.size(),
|
||||
*anchored ? " anchored" : "", floating ? " floating" : "");
|
||||
if (!*anchored && !floating) {
|
||||
DEBUG_PRINTF("semi-anchored\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
set<NFAVertex> &succs = *anchored ? s_succ : sds_succ;
|
||||
succs.erase(g.startDs);
|
||||
if (succs.size() != 1) {
|
||||
DEBUG_PRINTF("branchy root\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
NFAVertex u = *anchored ? g.start : g.startDs;
|
||||
NFAVertex v = *succs.begin();
|
||||
|
||||
while (true) {
|
||||
DEBUG_PRINTF("validating vertex %u\n", g[v].index);
|
||||
|
||||
assert(v != g.acceptEod);
|
||||
|
||||
// If we've reached an accept, we MAY have found a valid Rose pattern
|
||||
if (v == g.accept) {
|
||||
DEBUG_PRINTF("accept\n");
|
||||
insert(reports, g[u].reports);
|
||||
return true;
|
||||
}
|
||||
|
||||
mask->push_back(g[v].char_reach);
|
||||
|
||||
if (out_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("out_degree != 1\n");
|
||||
return false; /* not a chain */
|
||||
}
|
||||
|
||||
u = v;
|
||||
v = *adjacent_vertices(v, g).first;
|
||||
|
||||
if (in_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
|
||||
* to case accept (large cycles),
|
||||
* ensures term */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool handleFixedWidth(RoseBuild &rose, const NGHolder &g, const Grey &grey) {
|
||||
if (!grey.roseMasks) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (in_degree(g.acceptEod,g) != 1) {
|
||||
DEBUG_PRINTF("EOD anchoring not supported\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
ue2::flat_set<ReportID> reports;
|
||||
bool anchored = false;
|
||||
vector<CharReach> mask;
|
||||
|
||||
if (!findMask(g, &mask, &anchored, &reports)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%smasky masky\n", anchored ? "anchored " : "");
|
||||
|
||||
assert(!mask.empty());
|
||||
assert(!reports.empty());
|
||||
|
||||
if (rose.add(anchored, mask, reports)) {
|
||||
DEBUG_PRINTF("added as rose mask\n");
|
||||
return true;
|
||||
} else {
|
||||
DEBUG_PRINTF("failed to add masky\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
46
src/nfagraph/ng_fixed_width.h
Normal file
46
src/nfagraph/ng_fixed_width.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose mask construction from NGHolder.
|
||||
*/
|
||||
|
||||
#ifndef NG_FIXED_WIDTH_H
|
||||
#define NG_FIXED_WIDTH_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseBuild;
|
||||
class NGHolder;
|
||||
struct Grey;
|
||||
|
||||
bool handleFixedWidth(RoseBuild &build, const NGHolder &g, const Grey &grey);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_FIXED_WIDTH_H
|
||||
114
src/nfagraph/ng_graph.h
Normal file
114
src/nfagraph/ng_graph.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Definition of the NFAGraph type used for all NFA graph
|
||||
* representations.
|
||||
*
|
||||
* Note that most of the time we don't work on a bare NFAGraph: instead
|
||||
* we use an NGHolder, which wraps the graph and defines our special vertices,
|
||||
* etc.
|
||||
*/
|
||||
|
||||
#ifndef NG_GRAPH_H
|
||||
#define NG_GRAPH_H
|
||||
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <boost/graph/adjacency_iterator.hpp>
|
||||
#include <boost/graph/adjacency_list.hpp>
|
||||
#include <boost/graph/graph_traits.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Properties associated with each vertex in an NFAGraph. */
|
||||
struct NFAGraphVertexProps {
|
||||
/** \brief Set of characters on which this vertex is reachable. */
|
||||
CharReach char_reach;
|
||||
|
||||
/** \brief Set of reports raised by this vertex. */
|
||||
ue2::flat_set<ReportID> reports;
|
||||
|
||||
/** \brief Unique index for this vertex, used for BGL algorithms. */
|
||||
u32 index = 0;
|
||||
|
||||
/** \brief Flags associated with assertions. */
|
||||
u32 assert_flags = 0;
|
||||
};
|
||||
|
||||
/** \brief Properties associated with each edge in an NFAGraph. */
|
||||
struct NFAGraphEdgeProps {
|
||||
/** \brief Unique index for this edge, used for BGL algorithms. */
|
||||
u32 index = 0;
|
||||
|
||||
/** \brief For graphs that will be implemented as multi-top engines, this
|
||||
* specifies the top event. Only used on edges from the start vertex. */
|
||||
u32 top = 0;
|
||||
|
||||
/** \brief Flags associated with assertions. */
|
||||
u32 assert_flags = 0;
|
||||
};
|
||||
|
||||
// For flexibility: boost::listS, boost::listS for out-edge and vertex lists.
|
||||
// boost::bidirectionalS for directed graph so that we can get at in-edges.
|
||||
typedef boost::adjacency_list<boost::listS,
|
||||
boost::listS,
|
||||
boost::bidirectionalS,
|
||||
NFAGraphVertexProps,
|
||||
NFAGraphEdgeProps> NFAGraph;
|
||||
|
||||
typedef NFAGraph::vertex_descriptor NFAVertex;
|
||||
typedef NFAGraph::edge_descriptor NFAEdge;
|
||||
|
||||
/** \brief vertex_index values for special nodes in the NFAGraph. */
|
||||
enum SpecialNodes {
|
||||
/** \brief Anchored start vertex. WARNING: this may be triggered at various
|
||||
* locations (not just zero) for triggered graphs. */
|
||||
NODE_START,
|
||||
|
||||
/** \brief Unanchored start-dotstar vertex. WARNING: this may not have a
|
||||
* proper self-loop. */
|
||||
NODE_START_DOTSTAR,
|
||||
|
||||
/** \brief Accept vertex. All vertices that can match at arbitrary offsets
|
||||
* must have an edge to this vertex. */
|
||||
NODE_ACCEPT,
|
||||
|
||||
/** \brief Accept-EOD vertex. Vertices that must raise a match at EOD only
|
||||
* must have an edge to this vertex. */
|
||||
NODE_ACCEPT_EOD,
|
||||
|
||||
/** \brief Sentinel, number of special vertices. */
|
||||
N_SPECIALS
|
||||
};
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
842
src/nfagraph/ng_haig.cpp
Normal file
842
src/nfagraph/ng_haig.cpp
Normal file
@@ -0,0 +1,842 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Build code for Haig SOM DFA.
|
||||
*/
|
||||
#include "ng_haig.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "nfa/goughcompile.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_mcclellan_internal.h"
|
||||
#include "ng_restructuring.h"
|
||||
#include "ng_som_util.h"
|
||||
#include "ng_squash.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/bitfield.h"
|
||||
#include "util/container.h"
|
||||
#include "util/determinise.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::dynamic_bitset;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define NFA_STATE_LIMIT 256
|
||||
|
||||
#define HAIG_MAX_NFA_STATE 600
|
||||
#define HAIG_MAX_LIVE_SOM_SLOTS 32
|
||||
|
||||
namespace {
|
||||
struct haig_too_wide {
|
||||
};
|
||||
|
||||
template<typename stateset>
|
||||
static
|
||||
void populateInit(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
stateset *init, stateset *initDS,
|
||||
vector<NFAVertex> *v_by_index) {
|
||||
DEBUG_PRINTF("graph kind: %u\n", (int)g.kind);
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 v_index = g[v].index;
|
||||
if (state_ids.at(v) == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_any_start(v, g)) {
|
||||
init->set(v_index);
|
||||
if (hasSelfLoop(v, g) || is_triggered(g)) {
|
||||
DEBUG_PRINTF("setting %u\n", v_index);
|
||||
initDS->set(v_index);
|
||||
}
|
||||
}
|
||||
assert(v_index < init->size());
|
||||
}
|
||||
|
||||
v_by_index->clear();
|
||||
v_by_index->resize(num_vertices(g), NFAGraph::null_vertex());
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 v_index = g[v].index;
|
||||
assert((*v_by_index)[v_index] == NFAGraph::null_vertex());
|
||||
(*v_by_index)[v_index] = v;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename StateSet>
|
||||
void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) {
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
accept->set(g[v].index);
|
||||
}
|
||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
if (v == g.accept) {
|
||||
continue;
|
||||
}
|
||||
acceptEod->set(g[v].index);
|
||||
}
|
||||
}
|
||||
|
||||
class Automaton_Base {
|
||||
public:
|
||||
Automaton_Base(const NGHolder &graph_in,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in)
|
||||
: graph(graph_in), state_ids(state_ids_in) {
|
||||
calculateAlphabet(graph, alpha, unalpha, &alphasize);
|
||||
assert(alphasize <= ALPHABET_SIZE);
|
||||
}
|
||||
|
||||
static bool canPrune(const flat_set<ReportID> &) { return false; }
|
||||
|
||||
const NGHolder &graph;
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids;
|
||||
|
||||
array<u16, ALPHABET_SIZE> alpha;
|
||||
array<u16, ALPHABET_SIZE> unalpha;
|
||||
u16 alphasize;
|
||||
|
||||
set<dstate_id_t> done_a;
|
||||
set<dstate_id_t> done_b;
|
||||
|
||||
u16 start_anchored;
|
||||
u16 start_floating;
|
||||
};
|
||||
|
||||
class Automaton_Big : public Automaton_Base {
|
||||
public:
|
||||
typedef dynamic_bitset<> StateSet;
|
||||
typedef map<StateSet, dstate_id_t> StateMap;
|
||||
|
||||
Automaton_Big(const NGHolder &graph_in,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
|
||||
som_type som, const vector<vector<CharReach>> &triggers,
|
||||
bool unordered_som)
|
||||
: Automaton_Base(graph_in, state_ids_in), numStates(num_vertices(graph)),
|
||||
init(numStates), initDS(numStates), squash(numStates),
|
||||
accept(numStates), acceptEod(numStates), toppable(numStates),
|
||||
dead(numStates) {
|
||||
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
|
||||
populateAccepts(graph, &accept, &acceptEod);
|
||||
|
||||
start_anchored = DEAD_STATE + 1;
|
||||
if (initDS == init) {
|
||||
start_floating = start_anchored;
|
||||
} else if (initDS.any()) {
|
||||
start_floating = start_anchored + 1;
|
||||
} else {
|
||||
start_floating = DEAD_STATE;
|
||||
}
|
||||
|
||||
if (!unordered_som) {
|
||||
for (const auto &sq : findSquashers(graph, som)) {
|
||||
NFAVertex v = sq.first;
|
||||
u32 vert_id = graph[v].index;
|
||||
squash.set(vert_id);
|
||||
squash_mask[vert_id] = shrinkStateSet(sq.second);
|
||||
}
|
||||
}
|
||||
|
||||
cr_by_index = populateCR(graph, v_by_index, alpha);
|
||||
if (is_triggered(graph)) {
|
||||
markToppableStarts(graph, state_ids, false, triggers, &toppable);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Convert an NFAStateSet (as used by the squash code) into a StateSet.
|
||||
StateSet shrinkStateSet(const NFAStateSet &in) const {
|
||||
StateSet out(dead.size());
|
||||
for (size_t i = in.find_first(); i != in.npos && i < out.size();
|
||||
i = in.find_next(i)) {
|
||||
out.set(i);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
public:
|
||||
void transition(const StateSet &in, StateSet *next) {
|
||||
transition_graph(*this, v_by_index, in, next);
|
||||
}
|
||||
|
||||
const vector<StateSet> initial() {
|
||||
vector<StateSet> rv(1, init);
|
||||
if (start_floating != DEAD_STATE && start_floating != start_anchored) {
|
||||
rv.push_back(initDS);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
private:
|
||||
void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
|
||||
StateSet acc = in & (eod ? acceptEod : accept);
|
||||
for (size_t i = acc.find_first(); i != StateSet::npos;
|
||||
i = acc.find_next(i)) {
|
||||
NFAVertex v = v_by_index[i];
|
||||
DEBUG_PRINTF("marking report\n");
|
||||
const auto &my_reports = graph[v].reports;
|
||||
rv.insert(my_reports.begin(), my_reports.end());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
void reports(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, false, rv);
|
||||
}
|
||||
void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, true, rv);
|
||||
}
|
||||
|
||||
public:
|
||||
u32 numStates;
|
||||
vector<NFAVertex> v_by_index;
|
||||
vector<CharReach> cr_by_index; /* pre alpha'ed */
|
||||
StateSet init;
|
||||
StateSet initDS;
|
||||
StateSet squash; /* states which allow us to mask out other states */
|
||||
StateSet accept;
|
||||
StateSet acceptEod;
|
||||
StateSet toppable; /* states which are allowed to be on when a top arrives,
|
||||
* triggered dfas only */
|
||||
map<u32, StateSet> squash_mask;
|
||||
StateSet dead;
|
||||
};
|
||||
|
||||
class Automaton_Graph : public Automaton_Base {
|
||||
public:
|
||||
typedef bitfield<NFA_STATE_LIMIT> StateSet;
|
||||
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
||||
|
||||
Automaton_Graph(const NGHolder &graph_in,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
|
||||
som_type som, const vector<vector<CharReach>> &triggers,
|
||||
bool unordered_som)
|
||||
: Automaton_Base(graph_in, state_ids_in) {
|
||||
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
|
||||
populateAccepts(graph, &accept, &acceptEod);
|
||||
|
||||
start_anchored = DEAD_STATE + 1;
|
||||
if (initDS == init) {
|
||||
start_floating = start_anchored;
|
||||
} else if (initDS.any()) {
|
||||
start_floating = start_anchored + 1;
|
||||
} else {
|
||||
start_floating = DEAD_STATE;
|
||||
}
|
||||
|
||||
if (!unordered_som) {
|
||||
for (const auto &sq : findSquashers(graph, som)) {
|
||||
NFAVertex v = sq.first;
|
||||
u32 vert_id = graph[v].index;
|
||||
squash.set(vert_id);
|
||||
squash_mask[vert_id] = shrinkStateSet(sq.second);
|
||||
}
|
||||
}
|
||||
|
||||
cr_by_index = populateCR(graph, v_by_index, alpha);
|
||||
if (is_triggered(graph)) {
|
||||
dynamic_bitset<> temp(NFA_STATE_LIMIT);
|
||||
markToppableStarts(graph, state_ids, false, triggers, &temp);
|
||||
toppable = bitfield<NFA_STATE_LIMIT>(temp);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Convert an NFAStateSet (as used by the squash code) into a StateSet.
|
||||
StateSet shrinkStateSet(const NFAStateSet &in) const {
|
||||
StateSet out;
|
||||
for (size_t i = in.find_first(); i != in.npos && i < out.size();
|
||||
i = in.find_next(i)) {
|
||||
out.set(i);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
public:
|
||||
void transition(const StateSet &in, StateSet *next) {
|
||||
transition_graph(*this, v_by_index, in, next);
|
||||
}
|
||||
|
||||
const vector<StateSet> initial() {
|
||||
vector<StateSet> rv(1, init);
|
||||
if (start_floating != DEAD_STATE && start_floating != start_anchored) {
|
||||
rv.push_back(initDS);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
private:
|
||||
void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
|
||||
StateSet acc = in & (eod ? acceptEod : accept);
|
||||
for (size_t i = acc.find_first(); i != StateSet::npos;
|
||||
i = acc.find_next(i)) {
|
||||
NFAVertex v = v_by_index[i];
|
||||
DEBUG_PRINTF("marking report\n");
|
||||
const auto &my_reports = graph[v].reports;
|
||||
rv.insert(my_reports.begin(), my_reports.end());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
void reports(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, false, rv);
|
||||
}
|
||||
void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, true, rv);
|
||||
}
|
||||
|
||||
public:
|
||||
vector<NFAVertex> v_by_index;
|
||||
vector<CharReach> cr_by_index; /* pre alpha'ed */
|
||||
StateSet init;
|
||||
StateSet initDS;
|
||||
StateSet squash; /* states which allow us to mask out other states */
|
||||
StateSet accept;
|
||||
StateSet acceptEod;
|
||||
StateSet toppable; /* states which are allowed to be on when a top arrives,
|
||||
* triggered dfas only */
|
||||
map<u32, StateSet> squash_mask;
|
||||
StateSet dead;
|
||||
};
|
||||
|
||||
class Automaton_Haig_Merge {
|
||||
public:
|
||||
typedef vector<u16> StateSet;
|
||||
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
||||
|
||||
explicit Automaton_Haig_Merge(const vector<const raw_som_dfa *> &in)
|
||||
: nfas(in.begin(), in.end()), dead(in.size()) {
|
||||
calculateAlphabet();
|
||||
populateAsFs();
|
||||
}
|
||||
|
||||
void populateAsFs(void) {
|
||||
bool fs_same = true;
|
||||
bool fs_dead = true;
|
||||
|
||||
as.resize(nfas.size());
|
||||
fs.resize(nfas.size());
|
||||
for (u32 i = 0; i < nfas.size(); i++) {
|
||||
as[i] = nfas[i]->start_anchored;
|
||||
fs[i] = nfas[i]->start_floating;
|
||||
|
||||
if (fs[i]) {
|
||||
fs_dead = false;
|
||||
}
|
||||
|
||||
if (as[i] != fs[i]) {
|
||||
fs_same = false;
|
||||
}
|
||||
}
|
||||
|
||||
start_anchored = DEAD_STATE + 1;
|
||||
if (fs_same) {
|
||||
start_floating = start_anchored;
|
||||
} else if (fs_dead) {
|
||||
start_floating = DEAD_STATE;
|
||||
} else {
|
||||
start_floating = start_anchored + 1;
|
||||
}
|
||||
}
|
||||
|
||||
void calculateAlphabet(void) {
|
||||
DEBUG_PRINTF("calculating alphabet\n");
|
||||
vector<CharReach> esets(1, CharReach::dot());
|
||||
|
||||
for (const auto &haig : nfas) {
|
||||
DEBUG_PRINTF("...next dfa alphabet\n");
|
||||
assert(haig);
|
||||
const auto &alpha_remap = haig->alpha_remap;
|
||||
|
||||
for (size_t i = 0; i < esets.size(); i++) {
|
||||
assert(esets[i].any());
|
||||
if (esets[i].count() == 1) {
|
||||
DEBUG_PRINTF("skipping singleton eq set\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
CharReach t;
|
||||
u8 leader_s = alpha_remap[esets[i].find_first()];
|
||||
|
||||
DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s);
|
||||
|
||||
for (size_t s = esets[i].find_first();
|
||||
s != CharReach::npos; s = esets[i].find_next(s)) {
|
||||
if (alpha_remap[s] != leader_s) {
|
||||
t.set(s);
|
||||
}
|
||||
}
|
||||
|
||||
if (t.any() && t != esets[i]) {
|
||||
esets[i] &= ~t;
|
||||
esets.push_back(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
|
||||
}
|
||||
|
||||
void transition(const StateSet &in, StateSet *next) {
|
||||
u16 t[ALPHABET_SIZE];
|
||||
|
||||
for (u32 i = 0; i < alphasize; i++) {
|
||||
next[i].resize(nfas.size());
|
||||
}
|
||||
|
||||
for (u32 j = 0; j < nfas.size(); j++) {
|
||||
getFullTransitionFromState(*nfas[j], in[j], t);
|
||||
for (u32 i = 0; i < alphasize; i++) {
|
||||
next[i][j]= t[unalpha[i]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const vector<StateSet> initial() {
|
||||
vector<StateSet> rv(1, as);
|
||||
if (start_floating != DEAD_STATE && start_floating != start_anchored) {
|
||||
rv.push_back(fs);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
private:
|
||||
void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set,
|
||||
flat_set<ReportID> &r) {
|
||||
for (u32 i = 0; i < nfas.size(); i++) {
|
||||
const auto &rs = nfas[i]->states[in[i]].*r_set;
|
||||
insert(&r, rs);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
void reports(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, &dstate::reports, rv);
|
||||
}
|
||||
void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, &dstate::reports_eod, rv);
|
||||
}
|
||||
|
||||
static bool canPrune(const flat_set<ReportID> &) { return false; }
|
||||
|
||||
private:
|
||||
vector<const raw_som_dfa *> nfas;
|
||||
vector<dstate_id_t> as;
|
||||
vector<dstate_id_t> fs;
|
||||
public:
|
||||
array<u16, ALPHABET_SIZE> alpha;
|
||||
array<u16, ALPHABET_SIZE> unalpha;
|
||||
u16 alphasize;
|
||||
StateSet dead;
|
||||
|
||||
u16 start_anchored;
|
||||
u16 start_floating;
|
||||
};
|
||||
}
|
||||
|
||||
enum bslm_mode {
|
||||
ONLY_EXISTING,
|
||||
INCLUDE_INVALID
|
||||
};
|
||||
|
||||
static
|
||||
bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) {
|
||||
return is_virtual_start(v, g) || is_any_start(v, g);
|
||||
}
|
||||
|
||||
static
|
||||
s32 getSlotID(const NGHolder &g,
|
||||
UNUSED const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
NFAVertex v) {
|
||||
if (is_triggered(g) && v == g.start) {
|
||||
assert(state_ids.at(v) != NO_STATE);
|
||||
} else if (is_any_start_inc_virtual(v, g)) {
|
||||
return CREATE_NEW_SOM;
|
||||
}
|
||||
|
||||
return g[v].index;
|
||||
}
|
||||
|
||||
template<typename stateset>
|
||||
static
|
||||
void haig_do_preds(const NGHolder &g, const stateset &nfa_states,
|
||||
const vector<NFAVertex> &state_mapping,
|
||||
som_tran_info &preds) {
|
||||
for (size_t i = nfa_states.find_first(); i != stateset::npos;
|
||||
i = nfa_states.find_next(i)) {
|
||||
NFAVertex v = state_mapping[i];
|
||||
s32 slot_id = g[v].index;
|
||||
|
||||
DEBUG_PRINTF("d vertex %u\n", g[v].index);
|
||||
vector<u32> &out_map = preds[slot_id];
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
out_map.push_back(g[u].index);
|
||||
}
|
||||
|
||||
sort(out_map.begin(), out_map.end());
|
||||
assert(!out_map.empty() || v == g.start);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename stateset>
|
||||
static
|
||||
void haig_do_report(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
NFAVertex accept_v, const stateset &source_nfa_states,
|
||||
const vector<NFAVertex> &state_mapping,
|
||||
set<som_report> &out) {
|
||||
for (size_t i = source_nfa_states.find_first(); i != stateset::npos;
|
||||
i = source_nfa_states.find_next(i)) {
|
||||
NFAVertex v = state_mapping[i];
|
||||
if (!edge(v, accept_v, g).second) {
|
||||
continue;
|
||||
}
|
||||
for (ReportID report_id : g[v].reports) {
|
||||
out.insert(som_report(report_id, getSlotID(g, state_ids, v)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void haig_note_starts(const NGHolder &g, map<u32, u32> *out) {
|
||||
if (is_triggered(g)) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("seeing who creates new som values\n");
|
||||
|
||||
vector<DepthMinMax> depths = getDistancesFromSOM(g);
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_any_start_inc_virtual(v, g)) {
|
||||
DEBUG_PRINTF("%u creates new som value\n", g[v].index);
|
||||
out->emplace(g[v].index, 0U);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_any_accept(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const DepthMinMax &d = depths[g[v].index];
|
||||
if (d.min == d.max && d.min.is_finite()) {
|
||||
DEBUG_PRINTF("%u is fixed at %u\n", g[v].index, (u32)d.min);
|
||||
out->emplace(g[v].index, d.min);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Auto>
|
||||
static
|
||||
bool doHaig(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
som_type som, const vector<vector<CharReach>> &triggers,
|
||||
bool unordered_som, raw_som_dfa *rdfa) {
|
||||
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
|
||||
a fight */
|
||||
typedef typename Auto::StateSet StateSet;
|
||||
vector<StateSet> nfa_state_map;
|
||||
Auto n(g, state_ids, som, triggers, unordered_som);
|
||||
try {
|
||||
if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
||||
DEBUG_PRINTF("state limit exceeded\n");
|
||||
return false;
|
||||
}
|
||||
} catch (haig_too_wide &) {
|
||||
DEBUG_PRINTF("too many live som states\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
rdfa->start_anchored = n.start_anchored;
|
||||
rdfa->start_floating = n.start_floating;
|
||||
rdfa->alpha_size = n.alphasize;
|
||||
rdfa->alpha_remap = n.alpha;
|
||||
|
||||
rdfa->state_som.reserve(rdfa->states.size());
|
||||
for (u32 i = 0; i < rdfa->states.size(); i++) {
|
||||
rdfa->state_som.push_back(dstate_som());
|
||||
const StateSet &source_states = nfa_state_map[i];
|
||||
if (source_states.count() > HAIG_MAX_LIVE_SOM_SLOTS) {
|
||||
DEBUG_PRINTF("too many live states\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("generating som info for %u\n", i);
|
||||
|
||||
haig_do_preds(g, source_states, n.v_by_index,
|
||||
rdfa->state_som.back().preds);
|
||||
|
||||
haig_do_report(g, state_ids, g.accept, source_states, n.v_by_index,
|
||||
rdfa->state_som.back().reports);
|
||||
haig_do_report(g, state_ids, g.acceptEod, source_states, n.v_by_index,
|
||||
rdfa->state_som.back().reports_eod);
|
||||
}
|
||||
|
||||
haig_note_starts(g, &rdfa->new_som_nfa_states);
|
||||
rdfa->trigger_nfa_state = NODE_START;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
|
||||
u32 somPrecision,
|
||||
const vector<vector<CharReach> > &triggers,
|
||||
const Grey &grey, bool unordered_som) {
|
||||
assert(is_triggered(g) != triggers.empty());
|
||||
assert(!unordered_som || is_triggered(g));
|
||||
|
||||
if (!grey.allowGough) {
|
||||
/* must be at least one engine capable of handling raw som dfas */
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto state_ids = numberStates(g);
|
||||
dropUnusedStarts(g, state_ids);
|
||||
|
||||
DEBUG_PRINTF("attempting to build haig \n");
|
||||
assert(allMatchStatesHaveReports(g));
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
u32 numStates = num_vertices(g);
|
||||
if (numStates > HAIG_MAX_NFA_STATE) {
|
||||
DEBUG_PRINTF("giving up... looks too big\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto rdfa = ue2::make_unique<raw_som_dfa>(g.kind, unordered_som);
|
||||
|
||||
DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
|
||||
bool rv;
|
||||
if (numStates <= NFA_STATE_LIMIT) {
|
||||
/* fast path */
|
||||
rv = doHaig<Automaton_Graph>(g, state_ids, som, triggers, unordered_som,
|
||||
rdfa.get());
|
||||
} else {
|
||||
/* not the fast path */
|
||||
rv = doHaig<Automaton_Big>(g, state_ids, som, triggers, unordered_som,
|
||||
rdfa.get());
|
||||
}
|
||||
|
||||
if (!rv) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("determinised, building impl dfa (a,f) = (%hu,%hu)\n",
|
||||
rdfa->start_anchored, rdfa->start_floating);
|
||||
rdfa->stream_som_loc_width = somPrecision;
|
||||
|
||||
assert(rdfa->kind == g.kind);
|
||||
return rdfa;
|
||||
}
|
||||
|
||||
static
|
||||
void haig_merge_do_preds(const vector<const raw_som_dfa *> &dfas,
|
||||
const vector<u32> &per_dfa_adj,
|
||||
const vector<dstate_id_t> &source_nfa_states,
|
||||
som_tran_info &som_tran) {
|
||||
for (u32 d = 0; d < dfas.size(); ++d) {
|
||||
u32 adj = per_dfa_adj[d];
|
||||
|
||||
const som_tran_info &som_tran_d
|
||||
= dfas[d]->state_som[source_nfa_states[d]].preds;
|
||||
for (som_tran_info::const_iterator it = som_tran_d.begin();
|
||||
it != som_tran_d.end(); ++it) {
|
||||
assert(it->first != CREATE_NEW_SOM);
|
||||
u32 dest_slot = it->first < N_SPECIALS ? it->first
|
||||
: it->first + adj;
|
||||
vector<u32> &out = som_tran[dest_slot];
|
||||
|
||||
if (!out.empty()) {
|
||||
/* stylised specials already done; it does not matter who builds
|
||||
the preds */
|
||||
assert(dest_slot < N_SPECIALS);
|
||||
continue;
|
||||
}
|
||||
for (vector<u32>::const_iterator jt = it->second.begin();
|
||||
jt != it->second.end(); ++jt) {
|
||||
if (*jt < N_SPECIALS || *jt == CREATE_NEW_SOM) {
|
||||
out.push_back(*jt);
|
||||
} else {
|
||||
out.push_back(*jt + adj);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void haig_merge_note_starts(const vector<const raw_som_dfa *> &dfas,
|
||||
const vector<u32> &per_dfa_adj,
|
||||
map<u32, u32> *out) {
|
||||
for (u32 d = 0; d < dfas.size(); ++d) {
|
||||
u32 adj = per_dfa_adj[d];
|
||||
const map<u32, u32> &new_soms = dfas[d]->new_som_nfa_states;
|
||||
for (map<u32, u32>::const_iterator it = new_soms.begin();
|
||||
it != new_soms.end(); ++it) {
|
||||
if (it->first < N_SPECIALS) {
|
||||
assert(!it->second);
|
||||
out->emplace(it->first, 0U);
|
||||
} else {
|
||||
assert(d + 1 >= per_dfa_adj.size()
|
||||
|| it->first + adj < per_dfa_adj[d + 1]);
|
||||
out->emplace(it->first + adj, it->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static never_inline
|
||||
void haig_merge_do_report(const vector<const raw_som_dfa *> &dfas,
|
||||
const vector<u32> &per_dfa_adj,
|
||||
const vector<dstate_id_t> &source_nfa_states,
|
||||
bool eod, set<som_report> &out) {
|
||||
for (u32 d = 0; d < dfas.size(); ++d) {
|
||||
u32 adj = per_dfa_adj[d];
|
||||
|
||||
const set<som_report> &reps = eod
|
||||
? dfas[d]->state_som[source_nfa_states[d]].reports_eod
|
||||
: dfas[d]->state_som[source_nfa_states[d]].reports;
|
||||
for (set<som_report>::const_iterator it = reps.begin();
|
||||
it != reps.end(); ++it) {
|
||||
u32 slot = it->slot;
|
||||
if (slot != CREATE_NEW_SOM && slot >= N_SPECIALS) {
|
||||
slot += adj;
|
||||
}
|
||||
out.insert(som_report(it->report, slot));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
u32 total_slots_used(const raw_som_dfa &rdfa) {
|
||||
u32 rv = 0;
|
||||
for (vector<dstate_som>::const_iterator it = rdfa.state_som.begin();
|
||||
it != rdfa.state_som.end(); ++it) {
|
||||
for (som_tran_info::const_iterator jt = it->preds.begin();
|
||||
jt != it->preds.end(); ++jt) {
|
||||
assert(jt->first != CREATE_NEW_SOM);
|
||||
ENSURE_AT_LEAST(&rv, jt->first + 1);
|
||||
}
|
||||
}
|
||||
const map<u32, u32> &new_soms = rdfa.new_som_nfa_states;
|
||||
for (map<u32, u32>::const_iterator it = new_soms.begin();
|
||||
it != new_soms.end(); ++it) {
|
||||
ENSURE_AT_LEAST(&rv, it->first + 1);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &dfas,
|
||||
u32 limit) {
|
||||
assert(!dfas.empty());
|
||||
|
||||
Automaton_Haig_Merge n(dfas);
|
||||
|
||||
DEBUG_PRINTF("merging %zu dfas\n", dfas.size());
|
||||
|
||||
bool unordered_som = false;
|
||||
for (const auto &haig : dfas) {
|
||||
assert(haig);
|
||||
assert(haig->kind == dfas.front()->kind);
|
||||
unordered_som |= haig->unordered_som_triggers;
|
||||
if (haig->states.size() > limit) {
|
||||
DEBUG_PRINTF("too many states!\n");
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
typedef Automaton_Haig_Merge::StateSet StateSet;
|
||||
vector<StateSet> nfa_state_map;
|
||||
auto rdfa = ue2::make_unique<raw_som_dfa>(dfas[0]->kind, unordered_som);
|
||||
|
||||
int rv = determinise(n, rdfa->states, limit, &nfa_state_map);
|
||||
if (rv) {
|
||||
DEBUG_PRINTF("%d:state limit (%u) exceeded\n", rv, limit);
|
||||
return nullptr; /* over state limit */
|
||||
}
|
||||
|
||||
rdfa->start_anchored = n.start_anchored;
|
||||
rdfa->start_floating = n.start_floating;
|
||||
rdfa->alpha_size = n.alphasize;
|
||||
rdfa->alpha_remap = n.alpha;
|
||||
|
||||
vector<u32> per_dfa_adj;
|
||||
u32 curr_adj = 0;
|
||||
for (const auto &haig : dfas) {
|
||||
per_dfa_adj.push_back(curr_adj);
|
||||
curr_adj += total_slots_used(*haig);
|
||||
if (curr_adj < per_dfa_adj.back()) {
|
||||
/* overflowed our som slot count */
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
rdfa->state_som.reserve(rdfa->states.size());
|
||||
for (u32 i = 0; i < rdfa->states.size(); i++) {
|
||||
rdfa->state_som.push_back(dstate_som());
|
||||
const vector<dstate_id_t> &source_nfa_states = nfa_state_map[i];
|
||||
DEBUG_PRINTF("finishing state %u\n", i);
|
||||
|
||||
haig_merge_do_preds(dfas, per_dfa_adj, source_nfa_states,
|
||||
rdfa->state_som.back().preds);
|
||||
|
||||
if (rdfa->state_som.back().preds.size() > HAIG_MAX_LIVE_SOM_SLOTS) {
|
||||
DEBUG_PRINTF("som slot limit exceeded (%zu)\n",
|
||||
rdfa->state_som.back().preds.size());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
|
||||
false /* not eod */,
|
||||
rdfa->state_som.back().reports);
|
||||
haig_merge_do_report(dfas, per_dfa_adj, source_nfa_states,
|
||||
true /* eod */,
|
||||
rdfa->state_som.back().reports_eod);
|
||||
}
|
||||
|
||||
haig_merge_note_starts(dfas, per_dfa_adj, &rdfa->new_som_nfa_states);
|
||||
rdfa->trigger_nfa_state = NODE_START;
|
||||
|
||||
DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n",
|
||||
rdfa->start_anchored, rdfa->start_floating);
|
||||
rdfa->stream_som_loc_width = dfas[0]->stream_som_loc_width;
|
||||
|
||||
return rdfa;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
68
src/nfagraph/ng_haig.h
Normal file
68
src/nfagraph/ng_haig.h
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Build code for Haig SOM DFA.
|
||||
*/
|
||||
|
||||
#ifndef NG_HAIG_H
|
||||
#define NG_HAIG_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "som/som.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
class NGHolder;
|
||||
struct Grey;
|
||||
struct raw_som_dfa;
|
||||
|
||||
#define HAIG_FINAL_DFA_STATE_LIMIT 16383
|
||||
#define HAIG_HARD_DFA_STATE_LIMIT 8192
|
||||
|
||||
/* unordered_som_triggers being true indicates that a live haig may be subjected
|
||||
* to later tops arriving with earlier soms (without the haig going dead in
|
||||
* between)
|
||||
*/
|
||||
|
||||
std::unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
|
||||
u32 somPrecision,
|
||||
const std::vector<std::vector<CharReach> > &triggers,
|
||||
const Grey &grey, bool unordered_som_triggers = false);
|
||||
|
||||
std::unique_ptr<raw_som_dfa>
|
||||
attemptToMergeHaig(const std::vector<const raw_som_dfa *> &dfas,
|
||||
u32 limit = HAIG_HARD_DFA_STATE_LIMIT);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
230
src/nfagraph/ng_holder.cpp
Normal file
230
src/nfagraph/ng_holder.cpp
Normal file
@@ -0,0 +1,230 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "ng_holder.h"
|
||||
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
// internal use only
|
||||
static NFAVertex addSpecialVertex(NFAGraph &g, SpecialNodes id) {
|
||||
NFAVertex v = add_vertex(g);
|
||||
g[v].index = id;
|
||||
return v;
|
||||
}
|
||||
|
||||
NGHolder::NGHolder(void)
|
||||
: g(),
|
||||
// add initial special nodes
|
||||
start(addSpecialVertex(g, NODE_START)),
|
||||
startDs(addSpecialVertex(g, NODE_START_DOTSTAR)),
|
||||
accept(addSpecialVertex(g, NODE_ACCEPT)),
|
||||
acceptEod(addSpecialVertex(g, NODE_ACCEPT_EOD)),
|
||||
// misc data
|
||||
numVertices(N_SPECIALS),
|
||||
numEdges(0),
|
||||
isValidNumEdges(true),
|
||||
isValidNumVertices(true) {
|
||||
|
||||
// wire up some fake edges for the stylized bits of the NFA
|
||||
add_edge(start, startDs, *this);
|
||||
add_edge(startDs, startDs, *this);
|
||||
add_edge(accept, acceptEod, *this);
|
||||
|
||||
g[start].char_reach.setall();
|
||||
g[startDs].char_reach.setall();
|
||||
}
|
||||
|
||||
NGHolder::NGHolder(nfa_kind k)
|
||||
: kind (k), g(),
|
||||
// add initial special nodes
|
||||
start(addSpecialVertex(g, NODE_START)),
|
||||
startDs(addSpecialVertex(g, NODE_START_DOTSTAR)),
|
||||
accept(addSpecialVertex(g, NODE_ACCEPT)),
|
||||
acceptEod(addSpecialVertex(g, NODE_ACCEPT_EOD)),
|
||||
// misc data
|
||||
numVertices(N_SPECIALS),
|
||||
numEdges(0),
|
||||
isValidNumEdges(true),
|
||||
isValidNumVertices(true) {
|
||||
|
||||
// wire up some fake edges for the stylized bits of the NFA
|
||||
add_edge(start, startDs, *this);
|
||||
add_edge(startDs, startDs, *this);
|
||||
add_edge(accept, acceptEod, *this);
|
||||
|
||||
g[start].char_reach.setall();
|
||||
g[startDs].char_reach.setall();
|
||||
}
|
||||
|
||||
NGHolder::~NGHolder(void) {
|
||||
DEBUG_PRINTF("destroying holder @ %p\n", this);
|
||||
}
|
||||
|
||||
size_t num_edges(NGHolder &h) {
|
||||
if (!h.isValidNumEdges) {
|
||||
h.numEdges = num_edges(h.g);
|
||||
h.isValidNumEdges = true;
|
||||
}
|
||||
return h.numEdges;
|
||||
}
|
||||
|
||||
size_t num_edges(const NGHolder &h) {
|
||||
if (!h.isValidNumEdges) {
|
||||
return num_edges(h.g);
|
||||
}
|
||||
return h.numEdges;
|
||||
}
|
||||
|
||||
size_t num_vertices(NGHolder &h) {
|
||||
if (!h.isValidNumVertices) {
|
||||
h.numVertices = num_vertices(h.g);
|
||||
h.isValidNumVertices = true;
|
||||
}
|
||||
return h.numVertices;
|
||||
}
|
||||
|
||||
size_t num_vertices(const NGHolder &h) {
|
||||
if (!h.isValidNumVertices) {
|
||||
return num_vertices(h.g);
|
||||
}
|
||||
return h.numVertices;
|
||||
}
|
||||
|
||||
void remove_edge(const NFAEdge &e, NGHolder &h) {
|
||||
remove_edge(e, h.g);
|
||||
assert(!h.isValidNumEdges || h.numEdges > 0);
|
||||
h.numEdges--;
|
||||
}
|
||||
|
||||
void remove_edge(NFAVertex u, NFAVertex v, NGHolder &h) {
|
||||
remove_edge(u, v, h.g);
|
||||
assert(!h.isValidNumEdges || h.numEdges > 0);
|
||||
h.numEdges--;
|
||||
}
|
||||
|
||||
void remove_vertex(NFAVertex v, NGHolder &h) {
|
||||
remove_vertex(v, h.g);
|
||||
assert(!h.isValidNumVertices || h.numVertices > 0);
|
||||
h.numVertices--;
|
||||
}
|
||||
|
||||
void clear_vertex(NFAVertex v, NGHolder &h) {
|
||||
h.isValidNumEdges = false;
|
||||
clear_vertex_faster(v, h.g);
|
||||
}
|
||||
|
||||
void clear_in_edges(NFAVertex v, NGHolder &h) {
|
||||
h.isValidNumEdges = false;
|
||||
clear_in_edges(v, h.g);
|
||||
}
|
||||
|
||||
void clear_out_edges(NFAVertex v, NGHolder &h) {
|
||||
h.isValidNumEdges = false;
|
||||
clear_out_edges(v, h.g);
|
||||
}
|
||||
|
||||
void clear_graph(NGHolder &h) {
|
||||
NFAGraph::vertex_iterator vi, ve;
|
||||
for (tie(vi, ve) = vertices(h); vi != ve;) {
|
||||
NFAVertex v = *vi;
|
||||
++vi;
|
||||
|
||||
clear_vertex(v, h);
|
||||
if (!is_special(v, h)) {
|
||||
remove_vertex(v, h);
|
||||
}
|
||||
}
|
||||
|
||||
assert(num_vertices(h) == N_SPECIALS);
|
||||
|
||||
// Recreate special stylised edges.
|
||||
add_edge(h.start, h.startDs, h);
|
||||
add_edge(h.startDs, h.startDs, h);
|
||||
add_edge(h.accept, h.acceptEod, h);
|
||||
}
|
||||
|
||||
std::pair<NFAEdge, bool> add_edge(NFAVertex u, NFAVertex v, NGHolder &h) {
|
||||
assert(edge(u, v, h.g).second == false);
|
||||
pair<NFAEdge, bool> e = add_edge(u, v, h.g);
|
||||
h.g[e.first].index = h.numEdges++;
|
||||
assert(!h.isValidNumEdges || h.numEdges > 0); // no wrapping
|
||||
h.g[e.first].top = 0;
|
||||
return e;
|
||||
}
|
||||
|
||||
std::pair<NFAEdge, bool> add_edge(NFAVertex u, NFAVertex v,
|
||||
const NFAGraph::edge_property_type &ep,
|
||||
NGHolder &h) {
|
||||
assert(edge(u, v, h.g).second == false);
|
||||
pair<NFAEdge, bool> e = add_edge(u, v, ep, h.g);
|
||||
h.g[e.first].index = h.numEdges++;
|
||||
assert(!h.isValidNumEdges || h.numEdges > 0); // no wrapping
|
||||
return e;
|
||||
}
|
||||
|
||||
NFAVertex add_vertex(NGHolder &h) {
|
||||
NFAVertex v = add_vertex(h.g);
|
||||
h[v].index = h.numVertices++;
|
||||
assert(h.numVertices > 0); // no wrapping
|
||||
return v;
|
||||
}
|
||||
|
||||
NFAVertex add_vertex(const NFAGraph::vertex_property_type &vp, NGHolder &h) {
|
||||
NFAVertex v = add_vertex(h);
|
||||
u32 i = h.g[v].index; /* preserve index */
|
||||
h.g[v] = vp;
|
||||
h.g[v].index = i;
|
||||
return v;
|
||||
}
|
||||
|
||||
void NGHolder::renumberEdges() {
|
||||
numEdges = renumberGraphEdges(g);
|
||||
isValidNumEdges = true;
|
||||
}
|
||||
|
||||
void NGHolder::renumberVertices() {
|
||||
numVertices = renumberGraphVertices(g);
|
||||
isValidNumVertices = true;
|
||||
}
|
||||
|
||||
NFAVertex NGHolder::getSpecialVertex(u32 id) const {
|
||||
switch (id) {
|
||||
case NODE_START: return start;
|
||||
case NODE_START_DOTSTAR: return startDs;
|
||||
case NODE_ACCEPT: return accept;
|
||||
case NODE_ACCEPT_EOD: return acceptEod;
|
||||
default: return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
329
src/nfagraph/ng_holder.h
Normal file
329
src/nfagraph/ng_holder.h
Normal file
@@ -0,0 +1,329 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef NG_HOLDER_H
|
||||
#define NG_HOLDER_H
|
||||
|
||||
#include "ng_graph.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/nfa_kind.h"
|
||||
|
||||
#include <boost/graph/adjacency_iterator.hpp>
|
||||
#include <boost/graph/adjacency_list.hpp>
|
||||
#include <boost/graph/graph_traits.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Encapsulates an NFAGraph, stores special vertices and other
|
||||
* metadata.
|
||||
*
|
||||
* When constructed, the graph will have the following stylised "special"
|
||||
* edges:
|
||||
*
|
||||
* - (start, startDs)
|
||||
* - (startDs, startDs) (self-loop)
|
||||
* - (accept, acceptEod)
|
||||
*/
|
||||
class NGHolder : boost::noncopyable {
|
||||
public:
|
||||
NGHolder(void);
|
||||
explicit NGHolder(nfa_kind kind);
|
||||
virtual ~NGHolder(void);
|
||||
|
||||
// Pack edge and vertex indices.
|
||||
// Note: maintaining edge index order can be expensive due to the frequency
|
||||
// of edge removal/addition, so only renumberEdges() when required by
|
||||
// operations on edge lists.
|
||||
void renumberEdges();
|
||||
void renumberVertices();
|
||||
|
||||
NFAVertex getSpecialVertex(u32 id) const;
|
||||
|
||||
nfa_kind kind = NFA_OUTFIX; /* Role that this plays in Rose */
|
||||
|
||||
/** \brief Underlying graph object */
|
||||
NFAGraph g;
|
||||
|
||||
const NFAVertex start; //!< Anchored start vertex.
|
||||
const NFAVertex startDs; //!< Unanchored start-dotstar vertex.
|
||||
const NFAVertex accept; //!< Accept vertex.
|
||||
const NFAVertex acceptEod; //!< Accept at EOD vertex.
|
||||
|
||||
using directed_category = NFAGraph::directed_category;
|
||||
using edge_parallel_category = NFAGraph::edge_parallel_category;
|
||||
using traversal_category = NFAGraph::traversal_category;
|
||||
|
||||
using vertex_descriptor = NFAGraph::vertex_descriptor;
|
||||
using edge_descriptor = NFAGraph::edge_descriptor;
|
||||
using adjacency_iterator = NFAGraph::adjacency_iterator;
|
||||
using edge_iterator = NFAGraph::edge_iterator;
|
||||
using in_edge_iterator = NFAGraph::in_edge_iterator;
|
||||
using inv_adjacency_iterator = NFAGraph::inv_adjacency_iterator;
|
||||
using out_edge_iterator = NFAGraph::out_edge_iterator;
|
||||
using vertex_iterator = NFAGraph::vertex_iterator;
|
||||
using edge_property_type = NFAGraph::edge_property_type;
|
||||
using vertex_property_type = NFAGraph::vertex_property_type;
|
||||
|
||||
// These free functions, which follow the BGL model, are the interface to
|
||||
// the graph held by this class.
|
||||
friend size_t num_vertices(NGHolder &h);
|
||||
friend size_t num_vertices(const NGHolder &h);
|
||||
friend size_t num_edges(NGHolder &h);
|
||||
friend size_t num_edges(const NGHolder &h);
|
||||
friend void remove_vertex(NFAVertex v, NGHolder &h);
|
||||
friend void clear_vertex(NFAVertex v, NGHolder &h);
|
||||
friend void clear_in_edges(NFAVertex v, NGHolder &h);
|
||||
friend void clear_out_edges(NFAVertex v, NGHolder &h);
|
||||
friend void remove_edge(const NFAEdge &e, NGHolder &h);
|
||||
friend void remove_edge(NFAVertex u, NFAVertex v, NGHolder &h);
|
||||
|
||||
template<class Predicate>
|
||||
friend void remove_out_edge_if(NFAVertex v, Predicate pred, NGHolder &h) {
|
||||
boost::remove_out_edge_if(v, pred, h.g);
|
||||
h.isValidNumEdges = false;
|
||||
}
|
||||
|
||||
template<class Predicate>
|
||||
friend void remove_in_edge_if(NFAVertex v, Predicate pred, NGHolder &h) {
|
||||
boost::remove_in_edge_if(v, pred, h.g);
|
||||
h.isValidNumEdges = false;
|
||||
}
|
||||
|
||||
template<class Predicate>
|
||||
friend void remove_edge_if(Predicate pred, NGHolder &h) {
|
||||
boost::remove_edge_if(pred, h.g);
|
||||
h.isValidNumEdges = false;
|
||||
}
|
||||
|
||||
friend std::pair<NFAEdge, bool> add_edge(NFAVertex u, NFAVertex v,
|
||||
NGHolder &h);
|
||||
friend std::pair<NFAEdge, bool> add_edge(NFAVertex u, NFAVertex v,
|
||||
const edge_property_type &ep,
|
||||
NGHolder &h);
|
||||
friend NFAVertex add_vertex(NGHolder &h);
|
||||
friend NFAVertex add_vertex(const vertex_property_type &vp, NGHolder &h);
|
||||
|
||||
static NFAVertex null_vertex(void) { return NFAGraph::null_vertex(); }
|
||||
|
||||
// Subscript operators for BGL bundled properties.
|
||||
using graph_bundled = NFAGraph::graph_bundled;
|
||||
using vertex_bundled = NFAGraph::vertex_bundled;
|
||||
using edge_bundled = NFAGraph::edge_bundled;
|
||||
|
||||
vertex_bundled &operator[](NFAVertex v) {
|
||||
return get(boost::vertex_bundle, g)[v];
|
||||
}
|
||||
const vertex_bundled &operator[](NFAVertex v) const {
|
||||
return get(boost::vertex_bundle, g)[v];
|
||||
}
|
||||
edge_bundled &operator[](const NFAEdge &e) {
|
||||
return get(boost::edge_bundle, g)[e];
|
||||
}
|
||||
const edge_bundled &operator[](const NFAEdge &e) const {
|
||||
return get(boost::edge_bundle, g)[e];
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
/* Since the NFAGraph vertex/edge list selectors are std::lists, computing
|
||||
* num_vertices and num_edges is O(N). We use these members to store a
|
||||
* cached copy of the size.
|
||||
*
|
||||
* In the future, with C++11's constant-time std::list::size, these may
|
||||
* become obsolete. */
|
||||
|
||||
u32 numVertices;
|
||||
u32 numEdges;
|
||||
bool isValidNumEdges;
|
||||
bool isValidNumVertices;
|
||||
};
|
||||
|
||||
/** \brief True if the vertex \p v is one of our special vertices. */
|
||||
template <typename GraphT>
|
||||
static really_inline
|
||||
bool is_special(const NFAVertex v, const GraphT &g) {
|
||||
return g[v].index < N_SPECIALS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
std::pair<NFAGraph::adjacency_iterator, NFAGraph::adjacency_iterator>
|
||||
adjacent_vertices(NFAVertex v, const NGHolder &h) {
|
||||
return adjacent_vertices(v, h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
std::pair<NFAEdge, bool> edge(NFAVertex u, NFAVertex v, const NGHolder &h) {
|
||||
return boost::edge(u, v, h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
std::pair<NFAGraph::edge_iterator, NFAGraph::edge_iterator>
|
||||
edges(const NGHolder &h) {
|
||||
return edges(h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
size_t in_degree(NFAVertex v, const NGHolder &h) {
|
||||
return in_degree(v, h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
std::pair<NFAGraph::in_edge_iterator, NFAGraph::in_edge_iterator>
|
||||
in_edges(NFAVertex v, const NGHolder &h) {
|
||||
return in_edges(v, h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
std::pair<NFAGraph::inv_adjacency_iterator, NFAGraph::inv_adjacency_iterator>
|
||||
inv_adjacent_vertices(NFAVertex v, const NGHolder &h) {
|
||||
return inv_adjacent_vertices(v, h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
size_t out_degree(NFAVertex v, const NGHolder &h) {
|
||||
return out_degree(v, h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
std::pair<NFAGraph::out_edge_iterator, NFAGraph::out_edge_iterator>
|
||||
out_edges(NFAVertex v, const NGHolder &h) {
|
||||
return out_edges(v, h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
NFAVertex source(const NFAEdge &e, const NGHolder &h) {
|
||||
return source(e, h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
NFAVertex target(const NFAEdge &e, const NGHolder &h) {
|
||||
return target(e, h.g);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
std::pair<NFAGraph::vertex_iterator, NFAGraph::vertex_iterator>
|
||||
vertices(const NGHolder &h) {
|
||||
return vertices(h.g);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Clears all non-special vertices and edges from the graph.
|
||||
*
|
||||
* Note: not the same as the BGL's clear() function, which removes all vertices
|
||||
* and edges.
|
||||
*/
|
||||
void clear_graph(NGHolder &h);
|
||||
|
||||
/*
|
||||
* \brief Clear and remove all of the vertices pointed to by the given iterator
|
||||
* range.
|
||||
*
|
||||
* If renumber is false, no renumbering of vertex indices is done.
|
||||
*
|
||||
* Note: should not be called with iterators that will be invalidated by vertex
|
||||
* removal (such as NFAGraph::vertex_iterator).
|
||||
*/
|
||||
template <class Iter>
|
||||
void remove_vertices(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
|
||||
if (begin == end) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (Iter it = begin; it != end; ++it) {
|
||||
NFAVertex v = *it;
|
||||
if (!is_special(v, h)) {
|
||||
clear_vertex(v, h);
|
||||
remove_vertex(v, h);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (renumber) {
|
||||
h.renumberEdges();
|
||||
h.renumberVertices();
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Clear and remove all of the vertices pointed to by the vertex
|
||||
* descriptors in the given container.
|
||||
*
|
||||
* This is a convenience wrapper around the iterator variant above.
|
||||
*/
|
||||
template <class Container>
|
||||
void remove_vertices(const Container &c, NGHolder &h, bool renumber = true) {
|
||||
remove_vertices(c.begin(), c.end(), h, renumber);
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief Clear and remove all of the edges pointed to by the given iterator
|
||||
* range.
|
||||
*
|
||||
* If renumber is false, no renumbering of vertex indices is done.
|
||||
*
|
||||
* Note: should not be called with iterators that will be invalidated by vertex
|
||||
* removal (such as NFAGraph::edge_iterator).
|
||||
*/
|
||||
template <class Iter>
|
||||
void remove_edges(Iter begin, Iter end, NGHolder &h, bool renumber = true) {
|
||||
if (begin == end) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (Iter it = begin; it != end; ++it) {
|
||||
const NFAEdge &e = *it;
|
||||
remove_edge(e, h);
|
||||
}
|
||||
|
||||
if (renumber) {
|
||||
h.renumberEdges();
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Clear and remove all of the edges pointed to by the edge descriptors
|
||||
* in the given container.
|
||||
*
|
||||
* This is a convenience wrapper around the iterator variant above.
|
||||
*/
|
||||
template <class Container>
|
||||
void remove_edges(const Container &c, NGHolder &h, bool renumber = true) {
|
||||
remove_edges(c.begin(), c.end(), h, renumber);
|
||||
}
|
||||
|
||||
static UNUSED
|
||||
bool is_triggered(const NGHolder &g) {
|
||||
return is_triggered(g.kind);
|
||||
}
|
||||
|
||||
static UNUSED
|
||||
bool generates_callbacks(const NGHolder &g) {
|
||||
return generates_callbacks(g.kind);
|
||||
}
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
215
src/nfagraph/ng_is_equal.cpp
Normal file
215
src/nfagraph/ng_is_equal.cpp
Normal file
@@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Loose equality testing for NGHolder graphs.
|
||||
*
|
||||
* Loose equality check for holders' graph structure and vertex_index,
|
||||
* vertex_char_reach and (optionally reports).
|
||||
*/
|
||||
#include "ng_is_equal.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
#include <boost/functional/hash/hash.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
struct check_report {
|
||||
virtual ~check_report() {}
|
||||
virtual bool operator()(const flat_set<ReportID> &reports_a,
|
||||
const flat_set<ReportID> &reports_b) const = 0;
|
||||
};
|
||||
|
||||
struct full_check_report : public check_report {
|
||||
bool operator()(const flat_set<ReportID> &reports_a,
|
||||
const flat_set<ReportID> &reports_b) const override {
|
||||
return reports_a == reports_b;
|
||||
}
|
||||
};
|
||||
|
||||
struct equiv_check_report : public check_report {
|
||||
equiv_check_report(ReportID a_in, ReportID b_in)
|
||||
: a_rep(a_in), b_rep(b_in) {}
|
||||
|
||||
bool operator()(const flat_set<ReportID> &reports_a,
|
||||
const flat_set<ReportID> &reports_b) const override {
|
||||
return contains(reports_a, a_rep) == contains(reports_b, b_rep);
|
||||
}
|
||||
private:
|
||||
ReportID a_rep;
|
||||
ReportID b_rep;
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
bool is_equal_i(const NGHolder &a, const NGHolder &b,
|
||||
const check_report &check_rep) {
|
||||
assert(hasCorrectlyNumberedVertices(a));
|
||||
assert(hasCorrectlyNumberedVertices(b));
|
||||
|
||||
size_t num_verts = num_vertices(a);
|
||||
if (num_verts != num_vertices(b)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<NFAVertex> vert_a;
|
||||
vector<NFAVertex> vert_b;
|
||||
vector<NFAVertex> adj_a;
|
||||
vector<NFAVertex> adj_b;
|
||||
|
||||
vert_a.reserve(num_verts);
|
||||
vert_b.reserve(num_verts);
|
||||
adj_a.reserve(num_verts);
|
||||
adj_b.reserve(num_verts);
|
||||
|
||||
insert(&vert_a, vert_a.end(), vertices(a));
|
||||
insert(&vert_b, vert_b.end(), vertices(b));
|
||||
|
||||
sort(vert_a.begin(), vert_a.end(), make_index_ordering(a));
|
||||
sort(vert_b.begin(), vert_b.end(), make_index_ordering(b));
|
||||
|
||||
for (size_t i = 0; i < vert_a.size(); i++) {
|
||||
NFAVertex va = vert_a[i];
|
||||
NFAVertex vb = vert_b[i];
|
||||
DEBUG_PRINTF("vertex %u\n", a[va].index);
|
||||
|
||||
// Vertex index must be the same.
|
||||
if (a[va].index != b[vb].index) {
|
||||
DEBUG_PRINTF("bad index\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reach must be the same.
|
||||
if (a[va].char_reach != b[vb].char_reach) {
|
||||
DEBUG_PRINTF("bad reach\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!check_rep(a[va].reports, b[vb].reports)) {
|
||||
DEBUG_PRINTF("bad reports\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Other vertex properties may vary.
|
||||
|
||||
/* Check successors */
|
||||
adj_a.clear();
|
||||
adj_b.clear();
|
||||
insert(&adj_a, adj_a.end(), adjacent_vertices(va, a));
|
||||
insert(&adj_b, adj_b.end(), adjacent_vertices(vb, b));
|
||||
|
||||
if (adj_a.size() != adj_b.size()) {
|
||||
DEBUG_PRINTF("bad adj\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
sort(adj_a.begin(), adj_a.end(), make_index_ordering(a));
|
||||
sort(adj_b.begin(), adj_b.end(), make_index_ordering(b));
|
||||
|
||||
for (size_t j = 0; j < adj_a.size(); j++) {
|
||||
if (a[adj_a[j]].index != b[adj_b[j]].index) {
|
||||
DEBUG_PRINTF("bad adj\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check top for edges out of start */
|
||||
vector<pair<u32, u32>> top_a;
|
||||
vector<pair<u32, u32>> top_b;
|
||||
|
||||
for (const auto &e : out_edges_range(a.start, a)) {
|
||||
top_a.emplace_back(a[target(e, a)].index, a[e].top);
|
||||
}
|
||||
for (const auto &e : out_edges_range(b.start, b)) {
|
||||
top_b.emplace_back(b[target(e, b)].index, b[e].top);
|
||||
}
|
||||
|
||||
sort(top_a.begin(), top_a.end());
|
||||
sort(top_b.begin(), top_b.end());
|
||||
|
||||
if (top_a != top_b) {
|
||||
DEBUG_PRINTF("bad top\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("good\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \brief loose hash of an NGHolder; equal if is_equal would return true. */
|
||||
u64a hash_holder(const NGHolder &g) {
|
||||
size_t rv = 0;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
boost::hash_combine(rv, g[v].index);
|
||||
boost::hash_combine(rv, g[v].char_reach);
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
boost::hash_combine(rv, g[w].index);
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
bool is_equal(const NGHolder &a, const NGHolder &b) {
|
||||
DEBUG_PRINTF("testing %p %p\n", &a, &b);
|
||||
|
||||
if (&a == &b) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return is_equal_i(a, b, full_check_report());
|
||||
}
|
||||
|
||||
bool is_equal(const NGHolder &a, ReportID a_rep,
|
||||
const NGHolder &b, ReportID b_rep) {
|
||||
DEBUG_PRINTF("testing %p %p\n", &a, &b);
|
||||
|
||||
if (&a == &b && a_rep == b_rep) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return is_equal_i(a, b, equiv_check_report(a_rep, b_rep));
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
69
src/nfagraph/ng_is_equal.h
Normal file
69
src/nfagraph/ng_is_equal.h
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Loose equality testing for NGHolder graphs.
|
||||
*
|
||||
* Loose equality check for holders' graph structure and vertex_index,
|
||||
* vertex_char_reach and (optionally reports).
|
||||
*/
|
||||
|
||||
#ifndef NG_IS_EQUAL_H
|
||||
#define NG_IS_EQUAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <memory>
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
bool is_equal(const NGHolder &a, const NGHolder &b);
|
||||
bool is_equal(const NGHolder &a, ReportID a_r, const NGHolder &b, ReportID b_r);
|
||||
|
||||
u64a hash_holder(const NGHolder &g);
|
||||
|
||||
// Util Functors
|
||||
struct NGHolderHasher {
|
||||
size_t operator()(const std::shared_ptr<const NGHolder> &h) const {
|
||||
return hash_holder(*h);
|
||||
}
|
||||
};
|
||||
|
||||
struct NGHolderEqual {
|
||||
bool operator()(const std::shared_ptr<const NGHolder> &a,
|
||||
const std::shared_ptr<const NGHolder> &b) const {
|
||||
return is_equal(*a, *b);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_IS_EQUAL_H
|
||||
363
src/nfagraph/ng_lbr.cpp
Normal file
363
src/nfagraph/ng_lbr.cpp
Normal file
@@ -0,0 +1,363 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Large Bounded Repeat (LBR) engine build code.
|
||||
*/
|
||||
|
||||
#include "ng_lbr.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_repeat.h"
|
||||
#include "ng_reports.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "nfa/trufflecompile.h"
|
||||
#include "nfa/lbr_internal.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfa/repeatcompile.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h" // for lg2
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
u32 depth_to_u32(const depth &d) {
|
||||
assert(d.is_reachable());
|
||||
if (d.is_infinite()) {
|
||||
return REPEAT_INF;
|
||||
}
|
||||
|
||||
u32 d_val = d;
|
||||
assert(d_val < REPEAT_INF);
|
||||
return d_val;
|
||||
}
|
||||
|
||||
template<class LbrStruct> static
|
||||
u64a* getTable(NFA *nfa) {
|
||||
char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) +
|
||||
sizeof(RepeatInfo);
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(u64a));
|
||||
return (u64a *)ptr;
|
||||
}
|
||||
|
||||
template <class LbrStruct> static
|
||||
void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) {
|
||||
assert(nfa);
|
||||
|
||||
RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod);
|
||||
|
||||
DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n",
|
||||
repeatTypeName(rtype), repeatMin.str().c_str(),
|
||||
repeatMax.str().c_str());
|
||||
|
||||
// Fill the lbr_common structure first. Note that the RepeatInfo structure
|
||||
// directly follows the LbrStruct.
|
||||
const u32 info_offset = sizeof(LbrStruct);
|
||||
c->repeatInfoOffset = info_offset;
|
||||
c->report = report;
|
||||
|
||||
RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset);
|
||||
info->type = verify_u8(rtype);
|
||||
info->repeatMin = depth_to_u32(repeatMin);
|
||||
info->repeatMax = depth_to_u32(repeatMax);
|
||||
info->stateSize = rsi.stateSize;
|
||||
info->packedCtrlSize = rsi.packedCtrlSize;
|
||||
info->horizon = rsi.horizon;
|
||||
info->minPeriod = minPeriod;
|
||||
memcpy(&info->packedFieldSizes, rsi.packedFieldSizes.data(),
|
||||
byte_length(rsi.packedFieldSizes));
|
||||
info->patchCount = rsi.patchCount;
|
||||
info->patchSize = rsi.patchSize;
|
||||
info->encodingSize = rsi.encodingSize;
|
||||
info->patchesOffset = rsi.patchesOffset;
|
||||
|
||||
// Fill the NFA structure.
|
||||
nfa->nPositions = repeatMin;
|
||||
nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize);
|
||||
nfa->scratchStateSize = (u32)sizeof(lbr_state);
|
||||
nfa->minWidth = verify_u32(repeatMin);
|
||||
nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0;
|
||||
|
||||
// Fill the lbr table for sparse lbr model.
|
||||
if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
|
||||
u64a *table = getTable<LbrStruct>(nfa);
|
||||
// Adjust table length according to the optimal patch length.
|
||||
size_t len = nfa->length;
|
||||
assert((u32)repeatMax >= rsi.patchSize);
|
||||
len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize);
|
||||
nfa->length = verify_u32(len);
|
||||
info->length = verify_u32(sizeof(RepeatInfo)
|
||||
+ sizeof(u64a) * (rsi.patchSize + 1));
|
||||
memcpy(table, rsi.table.data(), byte_length(rsi.table));
|
||||
}
|
||||
}
|
||||
|
||||
template <class LbrStruct> static
|
||||
aligned_unique_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type,
|
||||
enum RepeatType rtype,
|
||||
const depth &repeatMax) {
|
||||
size_t tableLen = 0;
|
||||
if (rtype == REPEAT_SPARSE_OPTIMAL_P) {
|
||||
tableLen = sizeof(u64a) * (repeatMax + 1);
|
||||
}
|
||||
size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) +
|
||||
tableLen + sizeof(u64a);
|
||||
aligned_unique_ptr<NFA> nfa = aligned_zmalloc_unique<NFA>(len);
|
||||
nfa->type = verify_u8(nfa_type);
|
||||
nfa->length = verify_u32(len);
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
if (!cr.all()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_dot>(LBR_NFA_Dot, rtype, repeatMax);
|
||||
struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get());
|
||||
|
||||
fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
DEBUG_PRINTF("built dot lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrVerm(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
const CharReach escapes(~cr);
|
||||
|
||||
if (escapes.count() != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_verm>(LBR_NFA_Verm, rtype, repeatMax);
|
||||
struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
|
||||
lv->c = escapes.find_first();
|
||||
|
||||
fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
DEBUG_PRINTF("built verm lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrNVerm(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
const CharReach escapes(cr);
|
||||
|
||||
if (escapes.count() != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_verm>(LBR_NFA_NVerm, rtype, repeatMax);
|
||||
struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get());
|
||||
lv->c = escapes.find_first();
|
||||
|
||||
fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
DEBUG_PRINTF("built negated verm lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrShuf(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_shuf>(LBR_NFA_Shuf, rtype, repeatMax);
|
||||
struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get());
|
||||
|
||||
fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
if (shuftiBuildMasks(~cr, &ls->mask_lo, &ls->mask_hi) == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("built shuf lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> buildLbrTruf(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod,
|
||||
is_reset);
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= makeLbrNfa<lbr_truf>(LBR_NFA_Truf, rtype, repeatMax);
|
||||
struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get());
|
||||
|
||||
fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax,
|
||||
minPeriod, rtype);
|
||||
|
||||
truffleBuildMasks(~cr, &lc->mask1, &lc->mask2);
|
||||
|
||||
DEBUG_PRINTF("built truffle lbr\n");
|
||||
return nfa;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> constructLBR(const CharReach &cr,
|
||||
const depth &repeatMin,
|
||||
const depth &repeatMax, u32 minPeriod,
|
||||
bool is_reset, ReportID report) {
|
||||
DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n",
|
||||
repeatMin.str().c_str(), repeatMax.str().c_str(),
|
||||
describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(),
|
||||
report);
|
||||
assert(repeatMin <= repeatMax);
|
||||
assert(repeatMax.is_reachable());
|
||||
|
||||
aligned_unique_ptr<NFA> nfa
|
||||
= buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report);
|
||||
|
||||
if (!nfa) {
|
||||
nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
|
||||
report);
|
||||
}
|
||||
if (!nfa) {
|
||||
nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset,
|
||||
report);
|
||||
}
|
||||
if (!nfa) {
|
||||
nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset,
|
||||
report);
|
||||
}
|
||||
if (!nfa) {
|
||||
nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset,
|
||||
report);
|
||||
}
|
||||
|
||||
if (!nfa) {
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return nfa;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> constructLBR(const PureRepeat &repeat,
|
||||
const vector<vector<CharReach>> &triggers,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowLbr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(!repeat.reach.none());
|
||||
|
||||
if (repeat.reports.size() != 1) {
|
||||
DEBUG_PRINTF("too many reports\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool is_reset;
|
||||
u32 min_period = minPeriod(triggers, repeat.reach, &is_reset);
|
||||
|
||||
if (depth(min_period) > repeat.bounds.max) {
|
||||
DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n");
|
||||
is_reset = true;
|
||||
}
|
||||
|
||||
ReportID report = *repeat.reports.begin();
|
||||
|
||||
DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str());
|
||||
return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max,
|
||||
min_period, is_reset, report);
|
||||
}
|
||||
|
||||
/** \brief Construct an LBR engine from the given graph \p g. */
|
||||
aligned_unique_ptr<NFA> constructLBR(const NGHolder &g,
|
||||
const vector<vector<CharReach>> &triggers,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowLbr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
PureRepeat repeat;
|
||||
if (!isPureRepeat(g, repeat)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return constructLBR(repeat, triggers, cc);
|
||||
}
|
||||
|
||||
/** \brief True if graph \p g could be turned into an LBR engine. */
|
||||
bool isLBR(const NGHolder &g, const Grey &grey) {
|
||||
if (!grey.allowLbr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
PureRepeat repeat;
|
||||
if (!isPureRepeat(g, repeat)) {
|
||||
DEBUG_PRINTF("not pure bounded repeat\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (repeat.reports.size() != 1) {
|
||||
DEBUG_PRINTF("too many reports\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
71
src/nfagraph/ng_lbr.h
Normal file
71
src/nfagraph/ng_lbr.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Large Bounded Repeat (LBR) engine build code.
|
||||
*/
|
||||
|
||||
#ifndef NG_LBR_H
|
||||
#define NG_LBR_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
struct CompileContext;
|
||||
struct DepthMinMax;
|
||||
struct Grey;
|
||||
struct PureRepeat;
|
||||
|
||||
/** \brief Construct an LBR engine from the given graph \p g. */
|
||||
aligned_unique_ptr<NFA>
|
||||
constructLBR(const NGHolder &g,
|
||||
const std::vector<std::vector<CharReach>> &triggers,
|
||||
const CompileContext &cc);
|
||||
|
||||
/** \brief Construct an LBR engine from the given PureRepeat. */
|
||||
aligned_unique_ptr<NFA>
|
||||
constructLBR(const PureRepeat &repeat,
|
||||
const std::vector<std::vector<CharReach>> &triggers,
|
||||
const CompileContext &cc);
|
||||
|
||||
/** \brief True if graph \p g could be turned into an LBR engine. */
|
||||
bool isLBR(const NGHolder &g, const Grey &grey);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_LBR_H
|
||||
571
src/nfagraph/ng_limex.cpp
Normal file
571
src/nfagraph/ng_limex.cpp
Normal file
@@ -0,0 +1,571 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Limex NFA construction code.
|
||||
*/
|
||||
#include "ng_limex.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_equivalence.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_misc_opt.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_redundancy.h"
|
||||
#include "ng_repeat.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_restructuring.h"
|
||||
#include "ng_squash.h"
|
||||
#include "ng_util.h"
|
||||
#include "ng_width.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/limex_compile.h"
|
||||
#include "nfa/limex_limits.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/verify_types.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Some sanity checking for the graph; returns false if something is wrong.
|
||||
// Only used in assertions.
|
||||
static
|
||||
bool sanityCheckGraph(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
||||
ue2::unordered_set<u32> seen_states;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
// Non-specials should have non-empty reachability.
|
||||
if (!is_special(v, g)) {
|
||||
if (g[v].char_reach.none()) {
|
||||
DEBUG_PRINTF("vertex %u has empty reach\n",
|
||||
g[v].index);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Vertices with edges to accept or acceptEod must have reports.
|
||||
if (is_match_vertex(v, g) && v != g.accept) {
|
||||
if (g[v].reports.empty()) {
|
||||
DEBUG_PRINTF("vertex %u has no reports\n",
|
||||
g[v].index);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Participant vertices should have distinct state indices.
|
||||
if (!contains(state_ids, v)) {
|
||||
DEBUG_PRINTF("vertex %u has no state index!\n",
|
||||
g[v].index);
|
||||
return false;
|
||||
}
|
||||
u32 s = state_ids.at(v);
|
||||
if (s != NO_STATE && !seen_states.insert(s).second) {
|
||||
DEBUG_PRINTF("vertex %u has dupe state %u\n",
|
||||
g[v].index, s);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void findSquashStates(const NGHolder &g,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
map<NFAVertex, NFAStateSet> &squashMap) {
|
||||
squashMap = findSquashers(g);
|
||||
filterSquashers(g, squashMap);
|
||||
|
||||
/* We also filter out the cyclic states representing bounded repeats, as
|
||||
* they are not really cyclic. */
|
||||
for (const auto &br : repeats) {
|
||||
squashMap.erase(br.cyclic);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Drop edges from start to vertices that also have an edge from
|
||||
* startDs.
|
||||
*
|
||||
* Note that this also includes the (start, startDs) edge, which is not
|
||||
* necessary for actual NFA implementation (and is actually something we don't
|
||||
* want to affect state numbering, etc).
|
||||
*/
|
||||
static
|
||||
void dropRedundantStartEdges(NGHolder &g) {
|
||||
remove_out_edge_if(g.start, [&](const NFAEdge &e) {
|
||||
return edge(g.startDs, target(e, g), g).second;
|
||||
}, g);
|
||||
|
||||
// Ensure that we always remove (start, startDs), even if startDs has had
|
||||
// its self-loop removed as an optimization.
|
||||
remove_edge(g.start, g.startDs, g);
|
||||
}
|
||||
|
||||
static
|
||||
void makeTopStates(NGHolder &g, map<u32, NFAVertex> &tops,
|
||||
const map<u32, CharReach> &top_reach) {
|
||||
map<u32, vector<NFAVertex>> top_succs;
|
||||
for (const auto &e : out_edges_range(g.start, g)) {
|
||||
NFAVertex v = target(e, g);
|
||||
if (v == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
u32 t = g[e].top;
|
||||
top_succs[t].push_back(v);
|
||||
}
|
||||
|
||||
for (const auto &top : top_succs) {
|
||||
u32 t = top.first;
|
||||
|
||||
CharReach top_cr;
|
||||
if (contains(top_reach, t)) {
|
||||
top_cr = top_reach.at(t);
|
||||
} else {
|
||||
top_cr = CharReach::dot();
|
||||
}
|
||||
|
||||
assert(!contains(tops, t));
|
||||
|
||||
NFAVertex s = NFAGraph::null_vertex();
|
||||
flat_set<NFAVertex> succs;
|
||||
insert(&succs, top.second);
|
||||
|
||||
for (auto v : top.second) {
|
||||
if (!top_cr.isSubsetOf(g[v].char_reach)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
flat_set<NFAVertex> vsuccs;
|
||||
insert(&vsuccs, adjacent_vertices(v, g));
|
||||
|
||||
if (succs != vsuccs) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (g[v].reports != g[g.start].reports) {
|
||||
continue;
|
||||
}
|
||||
s = v;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!s) {
|
||||
s = add_vertex(g[g.start], g);
|
||||
g[s].char_reach = top_cr;
|
||||
for (auto v : top.second) {
|
||||
add_edge(s, v, g);
|
||||
}
|
||||
}
|
||||
tops[t] = s;
|
||||
}
|
||||
|
||||
// We are completely replacing the start vertex, so clear its reports.
|
||||
clear_out_edges(g.start, g);
|
||||
add_edge(g.start, g.startDs, g);
|
||||
g[g.start].reports.clear();
|
||||
|
||||
// Only retain reports (which we copied on add_vertex above) for new top
|
||||
// vertices connected to accepts.
|
||||
for (const auto &m : tops) {
|
||||
NFAVertex v = m.second;
|
||||
if (!edge(v, g.accept, g).second && !edge(v, g.acceptEod, g).second) {
|
||||
g[v].reports.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
set<NFAVertex> findZombies(const NGHolder &h,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
const CompileContext &cc) {
|
||||
set<NFAVertex> zombies;
|
||||
if (!cc.grey.allowZombies) {
|
||||
return zombies;
|
||||
}
|
||||
|
||||
// We only use zombie masks in streaming mode.
|
||||
if (!cc.streaming) {
|
||||
return zombies;
|
||||
}
|
||||
|
||||
if (in_degree(h.acceptEod, h) != 1 || all_reports(h).size() != 1) {
|
||||
DEBUG_PRINTF("can be made undead - bad reports\n");
|
||||
return zombies;
|
||||
}
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
|
||||
assert(h[u].reports.size() == 1);
|
||||
for (auto v : adjacent_vertices_range(u, h)) {
|
||||
if (edge(v, h.accept, h).second
|
||||
&& h[v].char_reach.all()) {
|
||||
if (!contains(br_cyclic, v)) {
|
||||
goto ok;
|
||||
}
|
||||
|
||||
const BoundedRepeatSummary &sum = br_cyclic.at(v);
|
||||
|
||||
if (u == v && sum.repeatMax.is_infinite()) {
|
||||
goto ok;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("does not go to dot accept\n");
|
||||
return zombies;
|
||||
ok:;
|
||||
}
|
||||
|
||||
for (const auto &v : inv_adjacent_vertices_range(h.accept, h)) {
|
||||
if (state_ids.at(v) != NO_STATE) {
|
||||
zombies.insert(v);
|
||||
}
|
||||
}
|
||||
return zombies;
|
||||
}
|
||||
|
||||
static
|
||||
void reverseStateOrdering(ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
||||
vector<NFAVertex> ordering;
|
||||
for (auto &e : state_ids) {
|
||||
if (e.second == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
ordering.push_back(e.first);
|
||||
}
|
||||
|
||||
// Sort in reverse order by state ID.
|
||||
sort(ordering.begin(), ordering.end(),
|
||||
[&state_ids](NFAVertex a, NFAVertex b) {
|
||||
return state_ids.at(a) > state_ids.at(b);
|
||||
});
|
||||
|
||||
u32 stateNum = 0;
|
||||
|
||||
for (const auto &v : ordering) {
|
||||
DEBUG_PRINTF("renumber, %u -> %u\n", state_ids.at(v), stateNum);
|
||||
state_ids[v] = stateNum++;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
map<u32, CharReach>
|
||||
findTopReach(const map<u32, vector<vector<CharReach>>> &triggers) {
|
||||
map<u32, CharReach> top_reach;
|
||||
|
||||
for (const auto &m : triggers) {
|
||||
const auto top = m.first;
|
||||
CharReach cr;
|
||||
for (const auto &trigger : m.second) {
|
||||
if (trigger.empty()) {
|
||||
// We don't know anything about this trigger. Assume it can
|
||||
// have any reach.
|
||||
cr.setall();
|
||||
break;
|
||||
}
|
||||
cr |= *trigger.rbegin();
|
||||
}
|
||||
|
||||
top_reach.emplace(top, cr);
|
||||
}
|
||||
|
||||
return top_reach;
|
||||
}
|
||||
|
||||
static
|
||||
unique_ptr<NGHolder>
|
||||
prepareGraph(const NGHolder &h_in, const ReportManager *rm,
|
||||
const map<u32, u32> &fixed_depth_tops,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||
bool impl_test_only, const CompileContext &cc,
|
||||
ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
vector<BoundedRepeatData> &repeats, map<u32, NFAVertex> &tops) {
|
||||
assert(is_triggered(h_in) || fixed_depth_tops.empty());
|
||||
|
||||
unique_ptr<NGHolder> h = cloneHolder(h_in);
|
||||
|
||||
// Bounded repeat handling.
|
||||
analyseRepeats(*h, rm, fixed_depth_tops, triggers, &repeats, cc.streaming,
|
||||
impl_test_only, cc.grey);
|
||||
|
||||
// If we're building a rose/suffix, do the top dance.
|
||||
if (is_triggered(*h)) {
|
||||
makeTopStates(*h, tops, findTopReach(triggers));
|
||||
}
|
||||
|
||||
dropRedundantStartEdges(*h);
|
||||
|
||||
// Do state numbering
|
||||
state_ids = numberStates(*h, tops);
|
||||
dropUnusedStarts(*h, state_ids);
|
||||
|
||||
// In debugging, we sometimes like to reverse the state numbering to stress
|
||||
// the NFA construction code.
|
||||
if (cc.grey.numberNFAStatesWrong) {
|
||||
reverseStateOrdering(state_ids);
|
||||
}
|
||||
|
||||
assert(sanityCheckGraph(*h, state_ids));
|
||||
return h;
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA>
|
||||
constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
||||
const map<u32, u32> &fixed_depth_tops,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||
bool compress_state, bool do_accel, bool impl_test_only, u32 hint,
|
||||
const CompileContext &cc) {
|
||||
if (!generates_callbacks(h_in)) {
|
||||
rm = nullptr;
|
||||
} else {
|
||||
assert(rm);
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> state_ids;
|
||||
vector<BoundedRepeatData> repeats;
|
||||
map<u32, NFAVertex> tops;
|
||||
unique_ptr<NGHolder> h
|
||||
= prepareGraph(h_in, rm, fixed_depth_tops, triggers, impl_test_only, cc,
|
||||
state_ids, repeats, tops);
|
||||
|
||||
// Quick exit: if we've got an embarrassment of riches, i.e. more states
|
||||
// than we can implement in our largest NFA model, bail here.
|
||||
u32 numStates = countStates(*h, state_ids, false);
|
||||
if (numStates > NFA_MAX_STATES) {
|
||||
DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
map<NFAVertex, BoundedRepeatSummary> br_cyclic;
|
||||
for (const auto &br : repeats) {
|
||||
br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
|
||||
}
|
||||
|
||||
map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
// build map of squashed and squashers
|
||||
if (cc.grey.squashNFA) {
|
||||
findSquashStates(*h, repeats, squashMap);
|
||||
|
||||
if (rm && cc.grey.highlanderSquash) {
|
||||
reportSquashMap = findHighlanderSquashers(*h, *rm);
|
||||
}
|
||||
}
|
||||
|
||||
set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
|
||||
|
||||
if (!cc.streaming || !cc.grey.compressNFAState) {
|
||||
compress_state = false;
|
||||
}
|
||||
|
||||
return generate(*h, state_ids, repeats, reportSquashMap, squashMap, tops,
|
||||
zombies, do_accel, compress_state, hint, cc);
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA>
|
||||
constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
||||
const map<u32, u32> &fixed_depth_tops,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||
bool compress_state, const CompileContext &cc) {
|
||||
const u32 hint = INVALID_NFA;
|
||||
const bool do_accel = cc.grey.accelerateNFA;
|
||||
const bool impl_test_only = false;
|
||||
return constructNFA(h_in, rm, fixed_depth_tops, triggers, compress_state,
|
||||
do_accel, impl_test_only, hint, cc);
|
||||
}
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
// Variant that allows a hint to be specified.
|
||||
aligned_unique_ptr<NFA>
|
||||
constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
||||
const map<u32, u32> &fixed_depth_tops,
|
||||
const map<u32, vector<vector<CharReach>>> &triggers,
|
||||
bool compress_state, u32 hint, const CompileContext &cc) {
|
||||
const bool do_accel = cc.grey.accelerateNFA;
|
||||
const bool impl_test_only = false;
|
||||
return constructNFA(h_in, rm, fixed_depth_tops, triggers,
|
||||
compress_state, do_accel, impl_test_only, hint, cc);
|
||||
}
|
||||
#endif // RELEASE_BUILD
|
||||
|
||||
static
|
||||
aligned_unique_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
|
||||
const CompileContext &cc) {
|
||||
// Make a mutable copy of the graph that we can renumber etc.
|
||||
NGHolder h;
|
||||
cloneHolder(h, h_in);
|
||||
assert(h.kind == NFA_REV_PREFIX); /* triggered, raises internal callbacks */
|
||||
|
||||
// Do state numbering.
|
||||
auto state_ids = numberStates(h);
|
||||
|
||||
dropUnusedStarts(h, state_ids);
|
||||
|
||||
// Quick exit: if we've got an embarrassment of riches, i.e. more states
|
||||
// than we can implement in our largest NFA model, bail here.
|
||||
u32 numStates = countStates(h, state_ids, false);
|
||||
if (numStates > NFA_MAX_STATES) {
|
||||
DEBUG_PRINTF("Can't build an NFA with %u states\n", numStates);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(sanityCheckGraph(h, state_ids));
|
||||
|
||||
map<u32, NFAVertex> tops; /* only the standards tops for nfas */
|
||||
set<NFAVertex> zombies;
|
||||
vector<BoundedRepeatData> repeats;
|
||||
map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
|
||||
zombies, false, false, hint, cc);
|
||||
}
|
||||
|
||||
aligned_unique_ptr<NFA> constructReversedNFA(const NGHolder &h_in,
|
||||
const CompileContext &cc) {
|
||||
u32 hint = INVALID_NFA; // no hint
|
||||
return constructReversedNFA_i(h_in, hint, cc);
|
||||
}
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
// Variant that allows a hint to be specified.
|
||||
aligned_unique_ptr<NFA> constructReversedNFA(const NGHolder &h_in, u32 hint,
|
||||
const CompileContext &cc) {
|
||||
return constructReversedNFA_i(h_in, hint, cc);
|
||||
}
|
||||
#endif // RELEASE_BUILD
|
||||
|
||||
u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
|
||||
const CompileContext &cc) {
|
||||
// Quick check: we can always implement an NFA with less than NFA_MAX_STATES
|
||||
// states. Note that top masks can generate extra states, so we account for
|
||||
// those here too.
|
||||
if (num_vertices(g) + NFA_MAX_TOP_MASKS < NFA_MAX_STATES) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!generates_callbacks(g)) {
|
||||
rm = nullptr;
|
||||
} else {
|
||||
assert(rm);
|
||||
}
|
||||
|
||||
// The BEST way to tell if an NFA is implementable is to implement it!
|
||||
const bool impl_test_only = true;
|
||||
const map<u32, u32> fixed_depth_tops; // empty
|
||||
const map<u32, vector<vector<CharReach>>> triggers; // empty
|
||||
|
||||
/* Perform the first part of the construction process and see if the
|
||||
* resultant NGHolder has <= NFA_MAX_STATES. If it does, we know we can
|
||||
* implement it as an NFA. */
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> state_ids;
|
||||
vector<BoundedRepeatData> repeats;
|
||||
map<u32, NFAVertex> tops;
|
||||
unique_ptr<NGHolder> h
|
||||
= prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
|
||||
state_ids, repeats, tops);
|
||||
assert(h);
|
||||
u32 numStates = countStates(*h, state_ids, false);
|
||||
if (numStates <= NFA_MAX_STATES) {
|
||||
return numStates;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm,
|
||||
const CompileContext &cc) {
|
||||
NGHolder g_pristine;
|
||||
cloneHolder(g_pristine, g);
|
||||
|
||||
reduceGraphEquivalences(g, cc);
|
||||
|
||||
removeRedundancy(g, som);
|
||||
|
||||
if (rm && generates_callbacks(g)) {
|
||||
pruneHighlanderDominated(g, *rm);
|
||||
}
|
||||
|
||||
if (!isImplementableNFA(g, rm, cc)) {
|
||||
DEBUG_PRINTF("reductions made graph unimplementable, roll back\n");
|
||||
clear_graph(g);
|
||||
cloneHolder(g, g_pristine);
|
||||
}
|
||||
}
|
||||
|
||||
u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
|
||||
const CompileContext &cc) {
|
||||
if (!generates_callbacks(g)) {
|
||||
rm = nullptr;
|
||||
} else {
|
||||
assert(rm);
|
||||
}
|
||||
|
||||
const bool impl_test_only = true;
|
||||
const map<u32, u32> fixed_depth_tops; // empty
|
||||
const map<u32, vector<vector<CharReach>>> triggers; // empty
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> state_ids;
|
||||
vector<BoundedRepeatData> repeats;
|
||||
map<u32, NFAVertex> tops;
|
||||
unique_ptr<NGHolder> h
|
||||
= prepareGraph(g, rm, fixed_depth_tops, triggers, impl_test_only, cc,
|
||||
state_ids, repeats, tops);
|
||||
|
||||
if (!h || countStates(*h, state_ids, false) > NFA_MAX_STATES) {
|
||||
DEBUG_PRINTF("not constructible\n");
|
||||
return NFA_MAX_ACCEL_STATES + 1;
|
||||
}
|
||||
|
||||
assert(h->kind == g.kind);
|
||||
|
||||
// Should have no bearing on accel calculation, so we leave these empty.
|
||||
const set<NFAVertex> zombies;
|
||||
const map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
const map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
|
||||
tops, zombies, cc);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
138
src/nfagraph/ng_limex.h
Normal file
138
src/nfagraph/ng_limex.h
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Limex NFA construction code.
|
||||
*/
|
||||
|
||||
#ifndef NG_LIMEX_H
|
||||
#define NG_LIMEX_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "som/som.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
class NG;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
struct CompileContext;
|
||||
|
||||
/** \brief Determine if the given graph is implementable as an NFA.
|
||||
*
|
||||
* Returns zero if the NFA is not implementable (usually because it has too
|
||||
* many states for any of our models). Otherwise returns the number of states.
|
||||
*
|
||||
* ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
|
||||
* NFA_INFIX use unmanaged rose-local reports.
|
||||
*/
|
||||
u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
|
||||
const CompileContext &cc);
|
||||
|
||||
/** \brief Late-stage graph reductions.
|
||||
*
|
||||
* This will call \ref removeRedundancy and apply its changes to the given
|
||||
* holder only if it is implementable afterwards. */
|
||||
void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm,
|
||||
const CompileContext &cc);
|
||||
|
||||
/**
|
||||
* \brief For a given graph, count the number of accel states it will have in
|
||||
* an implementation.
|
||||
*
|
||||
* \return the number of accel states, or NFA_MAX_ACCEL_STATES + 1 if an
|
||||
* implementation would not be constructible.
|
||||
*/
|
||||
u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
|
||||
const CompileContext &cc);
|
||||
|
||||
/** \brief Construct an NFA from the given NFAGraph.
|
||||
*
|
||||
* Returns zero if the NFA is not implementable (usually because it has too
|
||||
* many states for any of our models). Otherwise returns the number of states.
|
||||
*
|
||||
* ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
|
||||
* NFA_INFIX use unmanaged rose-local reports.
|
||||
*
|
||||
* Note: this variant of the function allows a model to be specified with the
|
||||
* \a hint parameter.
|
||||
*/
|
||||
aligned_unique_ptr<NFA>
|
||||
constructNFA(const NGHolder &g, const ReportManager *rm,
|
||||
const std::map<u32, u32> &fixed_depth_tops,
|
||||
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
||||
bool compress_state, const CompileContext &cc);
|
||||
|
||||
/** \brief Build a reverse NFA from the graph given, which should have already
|
||||
* been reversed.
|
||||
*
|
||||
* Used for reverse NFAs used in SOM mode.
|
||||
*/
|
||||
aligned_unique_ptr<NFA> constructReversedNFA(const NGHolder &h,
|
||||
const CompileContext &cc);
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
|
||||
/** \brief Construct an NFA (with model type hint) from the given NFAGraph.
|
||||
*
|
||||
* Returns zero if the NFA is not implementable (usually because it has too
|
||||
* many states for any of our models). Otherwise returns the number of states.
|
||||
*
|
||||
* ReportManager is used by NFA_SUFFIX and NFA_OUTFIX only. NFA_PREFIX and
|
||||
* NFA_INFIX use unmanaged rose-local reports.
|
||||
*
|
||||
* Note: this variant of the function allows a model to be specified with the
|
||||
* \a hint parameter.
|
||||
*/
|
||||
aligned_unique_ptr<NFA>
|
||||
constructNFA(const NGHolder &g, const ReportManager *rm,
|
||||
const std::map<u32, u32> &fixed_depth_tops,
|
||||
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
||||
bool compress_state, u32 hint, const CompileContext &cc);
|
||||
|
||||
/** \brief Build a reverse NFA (with model type hint) from the graph given,
|
||||
* which should have already been reversed.
|
||||
*
|
||||
* Used for reverse NFAs used in SOM mode.
|
||||
*/
|
||||
aligned_unique_ptr<NFA> constructReversedNFA(const NGHolder &h, u32 hint,
|
||||
const CompileContext &cc);
|
||||
|
||||
#endif // RELEASE_BUILD
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_METEOR_H
|
||||
778
src/nfagraph/ng_limex_accel.cpp
Normal file
778
src/nfagraph/ng_limex_accel.cpp
Normal file
@@ -0,0 +1,778 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA acceleration analysis code.
|
||||
*/
|
||||
#include "ng_limex_accel.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_misc_opt.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "nfa/accel.h"
|
||||
|
||||
#include "util/bitutils.h" // for CASE_CLEAR
|
||||
#include "util/charreach.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define WIDE_FRIEND_MIN 200
|
||||
|
||||
static
|
||||
void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
|
||||
const flat_set<NFAVertex> &cands,
|
||||
const flat_set<NFAVertex> &preds,
|
||||
flat_set<NFAVertex> *next_cands,
|
||||
flat_set<NFAVertex> *next_preds,
|
||||
flat_set<NFAVertex> *friends) {
|
||||
for (auto v : cands) {
|
||||
if (contains(preds, v)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const CharReach &acr = g[v].char_reach;
|
||||
DEBUG_PRINTF("checking %u\n", g[v].index);
|
||||
|
||||
if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
|
||||
DEBUG_PRINTF("bad reach %zu\n", acr.count());
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (!contains(preds, u)) {
|
||||
DEBUG_PRINTF("bad pred\n");
|
||||
goto next_cand;
|
||||
}
|
||||
}
|
||||
|
||||
next_preds->insert(v);
|
||||
insert(next_cands, adjacent_vertices(v, g));
|
||||
|
||||
DEBUG_PRINTF("%u is a friend indeed\n", g[v].index);
|
||||
friends->insert(v);
|
||||
next_cand:;
|
||||
}
|
||||
}
|
||||
|
||||
void findAccelFriends(const NGHolder &g, NFAVertex v,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
u32 offset, flat_set<NFAVertex> *friends) {
|
||||
/* A friend of an accel state is a successor state which can only be on when
|
||||
* the accel is on. This requires that it has a subset of the accel state's
|
||||
* preds and a charreach which is a subset of the accel state.
|
||||
*
|
||||
* A friend can be safely ignored when accelerating provided there is
|
||||
* sufficient back-off. A friend is useful if it has a wide reach.
|
||||
*/
|
||||
|
||||
/* BR cyclic states which may go stale cannot have friends as they may
|
||||
* suddenly turn off leading their so-called friends stranded and alone.
|
||||
* TODO: restrict to only stale going BR cyclics
|
||||
*/
|
||||
if (contains(br_cyclic, v) && !br_cyclic.at(v).unbounded()) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 friend_depth = offset + 1;
|
||||
|
||||
flat_set<NFAVertex> preds;
|
||||
insert(&preds, inv_adjacent_vertices(v, g));
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
|
||||
flat_set<NFAVertex> cands;
|
||||
insert(&cands, adjacent_vertices(v, g));
|
||||
|
||||
flat_set<NFAVertex> next_preds;
|
||||
flat_set<NFAVertex> next_cands;
|
||||
for (u32 i = 0; i < friend_depth; i++) {
|
||||
findAccelFriendGeneration(g, cr, cands, preds, &next_cands, &next_preds,
|
||||
friends);
|
||||
preds.insert(next_preds.begin(), next_preds.end());
|
||||
next_preds.clear();
|
||||
cands.swap(next_cands);
|
||||
next_cands.clear();
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void buildTwoByteStops(flat_set<pair<u8, u8>> &twobyte, const CharReach &cr1,
|
||||
const CharReach &cr2) {
|
||||
for (size_t c1 = cr1.find_first(); c1 != cr1.npos; c1 = cr1.find_next(c1)) {
|
||||
for (size_t c2 = cr2.find_first(); c2 != cr2.npos;
|
||||
c2 = cr2.find_next(c2)) {
|
||||
twobyte.emplace((u8)c1, (u8)c2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void findStopLiteralsAtVertex(NFAVertex v, const NGHolder &g,
|
||||
DoubleAccelInfo &build) {
|
||||
DEBUG_PRINTF("state %u\n", g[v].index);
|
||||
|
||||
// double-byte accel is possible: calculate all single- and double-byte
|
||||
// accel literals.
|
||||
const CharReach &cr1 = g[v].char_reach;
|
||||
|
||||
if (edge(v, g.accept, g).second) {
|
||||
// If this first byte is an accept state, it must contribute a
|
||||
// single-byte escape. We can still go on and calculate additional
|
||||
// double-byte ones, though.
|
||||
/* TODO: fix for rose */
|
||||
build.stop1 |= cr1;
|
||||
}
|
||||
|
||||
flat_set<pair<u8, u8>> twobyte; // for just this starting state
|
||||
bool single = false;
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w == g.accept || w == g.acceptEod) {
|
||||
continue;
|
||||
}
|
||||
const CharReach &cr2 = g[w].char_reach;
|
||||
size_t count = cr1.count() * cr2.count() + build.stop2.size();
|
||||
if (count > 0 && count <= 8) { // can't do more than 8 two-byte
|
||||
buildTwoByteStops(twobyte, cr1, cr2);
|
||||
} else {
|
||||
// two many two-byte literals, add the first byte as single
|
||||
single = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (single || twobyte.empty()) {
|
||||
assert(!cr1.none());
|
||||
build.stop1 |= cr1;
|
||||
} else {
|
||||
assert(!twobyte.empty());
|
||||
build.stop2.insert(twobyte.begin(), twobyte.end());
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool is_bit5_insensitive(const flat_set<pair<u8, u8>> &stop) {
|
||||
if (stop.size() != 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const u8 a = stop.begin()->first & CASE_CLEAR;
|
||||
const u8 b = stop.begin()->second & CASE_CLEAR;
|
||||
|
||||
for (flat_set<pair<u8, u8>>::const_iterator it = stop.begin();
|
||||
it != stop.end(); ++it) {
|
||||
if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool is_dverm(const DoubleAccelInfo &a) {
|
||||
if (a.stop1.any()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a.stop2.size() == 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return is_bit5_insensitive(a.stop2);
|
||||
}
|
||||
|
||||
static
|
||||
bool is_double_better(const DoubleAccelInfo &a, const DoubleAccelInfo &b) {
|
||||
/* Note: this is not an operator< */
|
||||
|
||||
if (a.stop2.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (b.stop2.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a.stop1.count() > b.stop1.count()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a.stop1.count() < b.stop1.count()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool a_dvm = is_dverm(a);
|
||||
bool b_dvm = is_dverm(b);
|
||||
|
||||
if (b_dvm && !a_dvm) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!b_dvm && a_dvm) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (a.stop2.size() > b.stop2.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (a.stop2.size() < b.stop2.size()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return a.offset < b.offset;
|
||||
}
|
||||
|
||||
/** \brief Find the escape literals for a two byte accel at the given accel
|
||||
* offset */
|
||||
static
|
||||
void findDoubleAccel(const NGHolder &g, NFAVertex v, u32 accel_offset,
|
||||
DoubleAccelInfo &build) {
|
||||
DEBUG_PRINTF("find double accel +%u for vertex %u\n", accel_offset,
|
||||
g[v].index);
|
||||
build.offset = accel_offset;
|
||||
|
||||
// Our accel state contributes single-byte escapes
|
||||
build.stop1 |= ~g[v].char_reach;
|
||||
|
||||
flat_set<NFAVertex> searchStates; // states that contribute stop literals
|
||||
searchStates.insert(v); /* TODO: verify */
|
||||
|
||||
/* Note: We cannot search past an accepting state */
|
||||
/* TODO: remove restriction for non-callback generating */
|
||||
flat_set<NFAVertex> nextStates;
|
||||
|
||||
insert(&nextStates, adjacent_vertices(v, g));
|
||||
nextStates.erase(v);
|
||||
nextStates.erase(g.accept);
|
||||
nextStates.erase(g.acceptEod);
|
||||
|
||||
searchStates.swap(nextStates);
|
||||
nextStates.clear();
|
||||
|
||||
// subsequent iterations are simpler, just follow all edges
|
||||
for (u32 j = 1; j <= accel_offset; j++) {
|
||||
for (auto u : searchStates) {
|
||||
insert(&nextStates, adjacent_vertices(u, g));
|
||||
if (edge(u, g.accept, g).second) {
|
||||
nextStates.clear();
|
||||
break;
|
||||
}
|
||||
nextStates.erase(g.accept);
|
||||
nextStates.erase(g.acceptEod);
|
||||
}
|
||||
|
||||
searchStates.swap(nextStates);
|
||||
nextStates.clear();
|
||||
}
|
||||
|
||||
vector<NFAVertex> sorted;
|
||||
insert(&sorted, sorted.end(), searchStates);
|
||||
sort(sorted.begin(), sorted.end(), make_index_ordering(g));
|
||||
for (auto sv : sorted) {
|
||||
findStopLiteralsAtVertex(sv, g, build);
|
||||
}
|
||||
}
|
||||
|
||||
DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v) {
|
||||
DoubleAccelInfo rv;
|
||||
for (u32 offset = 0; offset <= MAX_ACCEL_DEPTH; offset++) {
|
||||
DoubleAccelInfo b_temp;
|
||||
findDoubleAccel(g, v, offset, b_temp);
|
||||
if (is_double_better(b_temp, rv)) {
|
||||
rv = b_temp;
|
||||
}
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
void findPaths(const NGHolder &g, NFAVertex v,
|
||||
const vector<CharReach> &refined_cr,
|
||||
vector<vector<CharReach> > *paths,
|
||||
const flat_set<NFAVertex> &forbidden, u32 depth) {
|
||||
static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
|
||||
if (!depth) {
|
||||
paths->push_back(vector<CharReach>());
|
||||
return;
|
||||
}
|
||||
if (v == g.accept || v == g.acceptEod) {
|
||||
paths->push_back(vector<CharReach>());
|
||||
if (!generates_callbacks(g) || v == g.acceptEod) {
|
||||
paths->back().push_back(CharReach()); /* red tape options */
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* for the escape 'literals' we want to use the minimal cr so we
|
||||
* can be more selective */
|
||||
const CharReach &cr = refined_cr[g[v].index];
|
||||
|
||||
if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
|
||||
|| hasSelfLoop(v, g)) {
|
||||
/* give up on pushing past this point */
|
||||
paths->push_back(vector<CharReach>());
|
||||
vector<CharReach> &p = paths->back();
|
||||
p.push_back(cr);
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (contains(forbidden, w)) {
|
||||
/* path has looped back to one of the active+boring acceleration
|
||||
* states. We can ignore this path if we have sufficient back-
|
||||
* off. */
|
||||
paths->push_back(vector<CharReach>());
|
||||
paths->back().push_back(CharReach());
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 new_depth = depth - 1;
|
||||
vector<vector<CharReach> > curr;
|
||||
do {
|
||||
curr.clear();
|
||||
findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
|
||||
} while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
|
||||
|
||||
for (vector<vector<CharReach> >::iterator it = curr.begin();
|
||||
it != curr.end(); ++it) {
|
||||
paths->push_back(vector<CharReach>());
|
||||
vector<CharReach> &p = paths->back();
|
||||
p.swap(*it);
|
||||
p.push_back(cr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
AccelScheme merge(const AccelScheme &a, const AccelScheme &b) {
|
||||
return AccelScheme(a.cr | b.cr, MAX(a.offset, b.offset));
|
||||
}
|
||||
|
||||
static
|
||||
void findBest(vector<vector<CharReach> >::const_iterator pb,
|
||||
vector<vector<CharReach> >::const_iterator pe,
|
||||
const AccelScheme &curr, AccelScheme *best) {
|
||||
assert(curr.offset <= MAX_ACCEL_DEPTH);
|
||||
DEBUG_PRINTF("paths left %zu\n", pe - pb);
|
||||
if (pb == pe) {
|
||||
*best = curr;
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
|
||||
|
||||
vector<AccelScheme> priority_path;
|
||||
u32 i = 0;
|
||||
for (vector<CharReach>::const_iterator p = pb->begin(); p != pb->end();
|
||||
++p, i++) {
|
||||
priority_path.push_back(AccelScheme(*p & ~curr.cr, i));
|
||||
}
|
||||
|
||||
sort(priority_path.begin(), priority_path.end());
|
||||
for (vector<AccelScheme>::iterator it = priority_path.begin();
|
||||
it != priority_path.end(); ++it) {
|
||||
vector<AccelScheme>::iterator jt = it + 1;
|
||||
for (; jt != priority_path.end(); ++jt) {
|
||||
if (!it->cr.isSubsetOf(jt->cr)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
priority_path.erase(it + 1, jt);
|
||||
DEBUG_PRINTF("||%zu\n", it->cr.count());
|
||||
}
|
||||
DEBUG_PRINTF("---\n");
|
||||
|
||||
for (vector<AccelScheme>::const_iterator it = priority_path.begin();
|
||||
it != priority_path.end(); ++it) {
|
||||
DEBUG_PRINTF("%u:|| = %zu; p remaining len %zu\n", i, it->cr.count(),
|
||||
priority_path.end() - it);
|
||||
|
||||
AccelScheme in = merge(curr, *it);
|
||||
|
||||
if (in > *best) {
|
||||
DEBUG_PRINTF("worse\n");
|
||||
continue;
|
||||
}
|
||||
AccelScheme temp = *best;
|
||||
findBest(pb + 1, pe, in, &temp);
|
||||
if (temp < *best) {
|
||||
DEBUG_PRINTF("new best\n");
|
||||
*best = temp;
|
||||
if (curr.cr == best->cr) {
|
||||
return; /* could only get better by offset */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
static
|
||||
void dumpPaths(const vector<vector<CharReach> > &paths) {
|
||||
for (vector<vector<CharReach> >::const_iterator p = paths.begin();
|
||||
p != paths.end(); ++p) {
|
||||
DEBUG_PRINTF("path: [");
|
||||
for (vector<CharReach>::const_iterator it = p->begin(); it != p->end();
|
||||
++it) {
|
||||
printf(" [");
|
||||
describeClass(stdout, *it, 20, CC_OUT_TEXT);
|
||||
printf("]");
|
||||
}
|
||||
printf(" ]\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void blowoutPathsLessStrictSegment(vector<vector<CharReach> > *paths) {
|
||||
/* paths segments which are a superset of an earlier segment should never be
|
||||
* picked as an acceleration segment -> to improve processing just replace
|
||||
* with dot */
|
||||
for (vector<vector<CharReach> >::iterator p = paths->begin();
|
||||
p != paths->end(); ++p) {
|
||||
for (vector<CharReach>::iterator it = p->begin(); it != p->end();
|
||||
++it) {
|
||||
vector<CharReach>::iterator jt = it;
|
||||
for (++jt; jt != p->end(); ++jt) {
|
||||
if (it->isSubsetOf(*jt)) {
|
||||
*jt = CharReach::dot();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void unifyPathsLastSegment(vector<vector<CharReach> > *paths) {
|
||||
/* try to unify paths which only differ in the last segment */
|
||||
for (vector<vector<CharReach> >::iterator p = paths->begin();
|
||||
p != paths->end() && p + 1 != paths->end();) {
|
||||
vector<CharReach> &a = *p;
|
||||
vector<CharReach> &b = *(p + 1);
|
||||
|
||||
if (a.size() != b.size()) {
|
||||
++p;
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 i = 0;
|
||||
for (; i < a.size() - 1; i++) {
|
||||
if (a[i] != b[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == a.size() - 1) {
|
||||
/* we can unify these paths */
|
||||
a[i] |= b[i];
|
||||
paths->erase(p + 1);
|
||||
} else {
|
||||
++p;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void improvePaths(vector<vector<CharReach> > *paths) {
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("orig paths\n");
|
||||
dumpPaths(*paths);
|
||||
#endif
|
||||
blowoutPathsLessStrictSegment(paths);
|
||||
|
||||
sort(paths->begin(), paths->end());
|
||||
|
||||
unifyPathsLastSegment(paths);
|
||||
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("opt paths\n");
|
||||
dumpPaths(*paths);
|
||||
#endif
|
||||
}
|
||||
|
||||
AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
|
||||
const vector<CharReach> &refined_cr,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
bool allow_wide) {
|
||||
CharReach terminating;
|
||||
for (auto v : verts) {
|
||||
if (!hasSelfLoop(v, g)) {
|
||||
DEBUG_PRINTF("no self loop\n");
|
||||
return AccelScheme(); /* invalid scheme */
|
||||
}
|
||||
|
||||
// check that this state is reachable on most characters
|
||||
terminating |= ~g[v].char_reach;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("set vertex has %zu stop chars\n", terminating.count());
|
||||
size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
|
||||
: ACCEL_MAX_STOP_CHAR;
|
||||
if (terminating.count() > limit) {
|
||||
return AccelScheme(); /* invalid scheme */
|
||||
}
|
||||
|
||||
vector<vector<CharReach> > paths;
|
||||
flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
|
||||
|
||||
/* Note: we can not in general (TODO: ignore when possible) ignore entries
|
||||
* into the bounded repeat cyclic states as that is when the magic happens
|
||||
*/
|
||||
for (map<NFAVertex, BoundedRepeatSummary>::const_iterator it
|
||||
= br_cyclic.begin();
|
||||
it != br_cyclic.end(); ++it) {
|
||||
/* TODO: can allow if repeatMin <= 1 ? */
|
||||
ignore_vert_set.erase(it->first);
|
||||
}
|
||||
|
||||
for (auto v : verts) {
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w != v) {
|
||||
findPaths(g, w, refined_cr, &paths, ignore_vert_set,
|
||||
MAX_ACCEL_DEPTH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* paths built wrong: reverse them */
|
||||
for (vector<vector<CharReach> >::iterator it = paths.begin();
|
||||
it != paths.end(); ++it) {
|
||||
reverse(it->begin(), it->end());
|
||||
}
|
||||
|
||||
improvePaths(&paths);
|
||||
DEBUG_PRINTF("we have %zu paths\n", paths.size());
|
||||
if (paths.size() > 40) {
|
||||
return AccelScheme(); /* too many paths to explore */
|
||||
}
|
||||
|
||||
/* if we were smart we would do something netflowy on the paths to find the
|
||||
* best cut. But we aren't, so we will just brute force it.
|
||||
*/
|
||||
AccelScheme curr(terminating, 0U);
|
||||
AccelScheme best;
|
||||
findBest(paths.begin(), paths.end(), curr, &best);
|
||||
|
||||
/* find best is a bit lazy in terms of minimising the offset, see if we can
|
||||
* make it better. need to find the min max offset that we need.*/
|
||||
u32 offset = 0;
|
||||
for (vector<vector<CharReach> >::iterator p = paths.begin();
|
||||
p != paths.end(); ++p) {
|
||||
u32 i = 0;
|
||||
for (vector<CharReach>::iterator it = p->begin(); it != p->end();
|
||||
++it, i++) {
|
||||
if (it->isSubsetOf(best.cr)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
offset = MAX(offset, i);
|
||||
}
|
||||
assert(offset <= best.offset);
|
||||
best.offset = offset;
|
||||
return best;
|
||||
}
|
||||
|
||||
NFAVertex get_sds_or_proxy(const NGHolder &g) {
|
||||
DEBUG_PRINTF("looking for sds proxy\n");
|
||||
if (proper_out_degree(g.startDs, g)) {
|
||||
return g.startDs;
|
||||
}
|
||||
|
||||
NFAVertex v = NFAGraph::null_vertex();
|
||||
for (auto w : adjacent_vertices_range(g.start, g)) {
|
||||
if (w != g.startDs) {
|
||||
if (!v) {
|
||||
v = w;
|
||||
} else {
|
||||
return g.startDs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!v) {
|
||||
return g.startDs;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
if (hasSelfLoop(v, g)) {
|
||||
DEBUG_PRINTF("woot %u\n", g[v].index);
|
||||
return v;
|
||||
}
|
||||
if (out_degree(v, g) != 1) {
|
||||
break;
|
||||
}
|
||||
NFAVertex u = getSoleDestVertex(g, v);
|
||||
if (!g[u].char_reach.all()) {
|
||||
break;
|
||||
}
|
||||
v = u;
|
||||
}
|
||||
|
||||
return g.startDs;
|
||||
}
|
||||
|
||||
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
|
||||
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
|
||||
const vector<CharReach> &refined_cr,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
AccelScheme *as, bool allow_wide) {
|
||||
// For a state to be accelerable, our current criterion is that it be a
|
||||
// large character class with a self-loop and narrow set of possible other
|
||||
// successors (i.e. no special successors, union of successor reachability
|
||||
// is small).
|
||||
if (!hasSelfLoop(v, g)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// check that this state is reachable on most characters
|
||||
/* we want to use the maximal reach here (in the graph) */
|
||||
CharReach terminating = g[v].char_reach;
|
||||
terminating.flip();
|
||||
|
||||
DEBUG_PRINTF("vertex %u is cyclic and has %zu stop chars%s\n",
|
||||
g[v].index, terminating.count(),
|
||||
allow_wide ? " (w)" : "");
|
||||
|
||||
size_t limit = allow_wide ? ACCEL_MAX_FLOATING_STOP_CHAR
|
||||
: ACCEL_MAX_STOP_CHAR;
|
||||
if (terminating.count() > limit) {
|
||||
DEBUG_PRINTF("too leaky\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
flat_set<NFAVertex> curr, next;
|
||||
|
||||
insert(&curr, adjacent_vertices(v, g));
|
||||
curr.erase(v); // erase self-loop
|
||||
|
||||
// We consider offsets of zero through three; this is fairly arbitrary at
|
||||
// present and could probably be increased (FIXME)
|
||||
/* WARNING: would/could do horrible things to compile time */
|
||||
bool stop = false;
|
||||
vector<CharReach> depthReach(MAX_ACCEL_DEPTH);
|
||||
unsigned int depth;
|
||||
for (depth = 0; !stop && depth < MAX_ACCEL_DEPTH; depth++) {
|
||||
CharReach &cr = depthReach[depth];
|
||||
for (auto t : curr) {
|
||||
if (is_special(t, g)) {
|
||||
// We've bumped into the edge of the graph, so we should stop
|
||||
// searching.
|
||||
// Exception: iff our cyclic state is not a dot, than we can
|
||||
// safely accelerate towards an EOD accept.
|
||||
|
||||
/* Exception: nfas that don't generate callbacks so accepts are
|
||||
* fine too */
|
||||
if (t == g.accept && !generates_callbacks(g)) {
|
||||
stop = true; // don't search beyond this depth
|
||||
continue;
|
||||
} else if (t == g.accept) {
|
||||
goto depth_done;
|
||||
}
|
||||
|
||||
assert(t == g.acceptEod);
|
||||
stop = true; // don't search beyond this depth
|
||||
} else {
|
||||
// Non-special vertex
|
||||
insert(&next, adjacent_vertices(t, g));
|
||||
/* for the escape 'literals' we want to use the minimal cr so we
|
||||
* can be more selective */
|
||||
cr |= refined_cr[g[t].index];
|
||||
}
|
||||
}
|
||||
|
||||
cr |= terminating;
|
||||
DEBUG_PRINTF("depth %u has unioned reach %zu\n", depth, cr.count());
|
||||
|
||||
curr.swap(next);
|
||||
next.clear();
|
||||
}
|
||||
|
||||
depth_done:
|
||||
|
||||
if (depth == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("selecting from depth 0..%u\n", depth);
|
||||
|
||||
/* Look for the most awesome acceleration evar */
|
||||
for (unsigned int i = 0; i < depth; i++) {
|
||||
if (depthReach[i].none()) {
|
||||
DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
|
||||
*as = AccelScheme(CharReach(), i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// First, loop over our depths and see if we have a suitable 2-byte
|
||||
// caseful vermicelli option: this is the (second) fastest accel we have
|
||||
if (depth > 1) {
|
||||
for (unsigned int i = 0; i < (depth - 1); i++) {
|
||||
const CharReach &cra = depthReach[i];
|
||||
const CharReach &crb = depthReach[i + 1];
|
||||
if ((cra.count() == 1 && crb.count() == 1)
|
||||
|| (cra.count() == 2 && crb.count() == 2
|
||||
&& cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
|
||||
DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
|
||||
*as = AccelScheme(CharReach::dot(), i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Second option: a two-byte shufti (i.e. less than eight 2-byte
|
||||
// literals)
|
||||
if (depth > 1) {
|
||||
for (unsigned int i = 0; i < (depth - 1); i++) {
|
||||
if (depthReach[i].count()*depthReach[i+1].count() <= 8) {
|
||||
DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
|
||||
*as = AccelScheme(CharReach::dot(), i);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for one byte accel schemes verm/shufti;
|
||||
vector<NFAVertex> verts(1, v);
|
||||
*as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide);
|
||||
DEBUG_PRINTF("as width %zu\n", as->cr.count());
|
||||
return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
114
src/nfagraph/ng_limex_accel.h
Normal file
114
src/nfagraph/ng_limex_accel.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA acceleration analysis code.
|
||||
*/
|
||||
|
||||
#ifndef NG_LIMEX_ACCEL_H
|
||||
#define NG_LIMEX_ACCEL_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_misc_opt.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/* compile time accel defs */
|
||||
#define MAX_ACCEL_DEPTH 4
|
||||
#define MAX_MERGED_ACCEL_STOPS 200
|
||||
#define ACCEL_MAX_STOP_CHAR 24
|
||||
#define ACCEL_MAX_FLOATING_STOP_CHAR 192 /* accelerating sds is important */
|
||||
|
||||
void findAccelFriends(const NGHolder &g, NFAVertex v,
|
||||
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
u32 offset,
|
||||
ue2::flat_set<NFAVertex> *friends);
|
||||
|
||||
struct DoubleAccelInfo {
|
||||
DoubleAccelInfo() : offset(0) {}
|
||||
u32 offset; //!< offset correction to apply
|
||||
CharReach stop1; //!< single-byte accel stop literals
|
||||
flat_set<std::pair<u8, u8>> stop2; //!< double-byte accel stop literals
|
||||
};
|
||||
|
||||
DoubleAccelInfo findBestDoubleAccelInfo(const NGHolder &g, NFAVertex v);
|
||||
|
||||
struct AccelScheme {
|
||||
AccelScheme(const CharReach &cr_in, u32 offset_in)
|
||||
: cr(cr_in), offset(offset_in) {
|
||||
assert(offset <= MAX_ACCEL_DEPTH);
|
||||
}
|
||||
AccelScheme() : cr(CharReach::dot()), offset(MAX_ACCEL_DEPTH + 1) {}
|
||||
|
||||
bool operator<(const AccelScheme &b) const {
|
||||
const AccelScheme &a = *this;
|
||||
|
||||
// Don't use ORDER_CHECK as it will (stupidly) eval count() too many
|
||||
// times.
|
||||
const size_t a_count = cr.count(), b_count = b.cr.count();
|
||||
if (a_count != b_count) {
|
||||
return a_count < b_count;
|
||||
}
|
||||
|
||||
/* TODO: give bonus if one is a 'caseless' character */
|
||||
ORDER_CHECK(offset);
|
||||
ORDER_CHECK(cr);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool operator>(const AccelScheme &b) const {
|
||||
return b < *this;
|
||||
}
|
||||
|
||||
CharReach cr;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
NFAVertex get_sds_or_proxy(const NGHolder &g);
|
||||
|
||||
AccelScheme nfaFindAccel(const NGHolder &g, const std::vector<NFAVertex> &verts,
|
||||
const std::vector<CharReach> &refined_cr,
|
||||
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
bool allow_wide);
|
||||
|
||||
/** \brief Check if vertex \a v is an accelerable state (for a limex NFA). */
|
||||
bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
|
||||
const std::vector<CharReach> &refined_cr,
|
||||
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
|
||||
AccelScheme *as, bool allow_wide);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
852
src/nfagraph/ng_literal_analysis.cpp
Normal file
852
src/nfagraph/ng_literal_analysis.cpp
Normal file
@@ -0,0 +1,852 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Literal analysis and scoring.
|
||||
*/
|
||||
#include "ng_literal_analysis.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_split.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "rose/rose_common.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <queue>
|
||||
|
||||
#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::vertex_index;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Maximum number of paths to generate. */
|
||||
static const u32 MAX_WIDTH = 11;
|
||||
|
||||
/** Scoring adjustment for 'uniqueness' in literal. */
|
||||
static const u64a WEIGHT_OF_UNIQUENESS = 250;
|
||||
|
||||
namespace {
|
||||
|
||||
/* Small literal graph type used for the suffix tree used in
|
||||
* compressAndScore. */
|
||||
|
||||
typedef boost::adjacency_list_traits<boost::vecS, boost::vecS,
|
||||
boost::bidirectionalS> LitGraphTraits;
|
||||
typedef LitGraphTraits::vertex_descriptor LitVertex;
|
||||
typedef LitGraphTraits::edge_descriptor LitEdge;
|
||||
|
||||
struct LitGraphVertexProps {
|
||||
LitGraphVertexProps() {}
|
||||
explicit LitGraphVertexProps(const ue2_literal::elem &c_in) : c(c_in) {}
|
||||
ue2_literal::elem c; // string element (char + bool)
|
||||
};
|
||||
|
||||
struct LitGraphEdgeProps {
|
||||
LitGraphEdgeProps() {}
|
||||
explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {}
|
||||
u64a score = NO_LITERAL_AT_EDGE_SCORE;
|
||||
};
|
||||
|
||||
typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS,
|
||||
LitGraphVertexProps, LitGraphEdgeProps,
|
||||
boost::no_property> LitGraph;
|
||||
|
||||
typedef pair<LitVertex, NFAVertex> VertexPair;
|
||||
typedef std::queue<VertexPair> LitVertexQ;
|
||||
|
||||
} // namespace
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
/** \brief Dump the literal graph in Graphviz format. */
|
||||
static UNUSED
|
||||
void dumpGraph(const char *filename, const LitGraph &lg, const LitVertex &root,
|
||||
const LitVertex &sink) {
|
||||
ofstream fout(filename);
|
||||
|
||||
fout << "digraph G {" << endl;
|
||||
|
||||
for (auto v : vertices_range(lg)) {
|
||||
fout << boost::get(vertex_index, lg, v);
|
||||
if (v == root) {
|
||||
fout << "[label=\"ROOT\"];";
|
||||
} else if (v == sink) {
|
||||
fout << "[label=\"SINK\"];";
|
||||
} else {
|
||||
ue2_literal s;
|
||||
s.push_back(lg[v].c);
|
||||
fout << "[label=\"" << dumpString(s) << "\"];";
|
||||
}
|
||||
fout << endl;
|
||||
}
|
||||
|
||||
for (const auto &e : edges_range(lg)) {
|
||||
LitVertex u = source(e, lg), v = target(e, lg);
|
||||
fout << boost::get(vertex_index, lg, u) << " -> " <<
|
||||
boost::get(vertex_index, lg, v) <<
|
||||
"[label=\"" << lg[e].score << "\"]" <<
|
||||
";" << endl;
|
||||
}
|
||||
|
||||
fout << "}" << endl;
|
||||
}
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
|
||||
static
|
||||
bool allowExpand(size_t numItems, size_t totalPathsSoFar) {
|
||||
if (numItems == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (numItems + totalPathsSoFar > MAX_WIDTH) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
LitVertex addToLitGraph(LitGraph &lg, LitVertex sink,
|
||||
LitVertex pred, const ue2_literal::elem &c) {
|
||||
// Check if we already have this in the graph.
|
||||
for (auto v : adjacent_vertices_range(pred, lg)) {
|
||||
if (v == sink) {
|
||||
continue;
|
||||
}
|
||||
if (lg[v].c == c) {
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
||||
LitVertex lv = add_vertex(LitGraphVertexProps(c), lg);
|
||||
add_edge(pred, lv, lg);
|
||||
return lv;
|
||||
}
|
||||
|
||||
static
|
||||
void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex sink,
|
||||
LitVertex pred, const CharReach &cr, NFAVertex v) {
|
||||
for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) {
|
||||
if (myisupper(i) && cr.test(mytolower(i))) {
|
||||
// ignore upper half of a nocase pair
|
||||
continue;
|
||||
}
|
||||
|
||||
bool nocase = myislower(i) && cr.test(mytoupper(i));
|
||||
ue2_literal::elem c((char)i, nocase);
|
||||
LitVertex lv = addToLitGraph(lg, sink, pred, c);
|
||||
workQ.push(VertexPair(lv, v));
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex root,
|
||||
LitVertex sink, const NGHolder &g, const NFAEdge &e) {
|
||||
NFAVertex u = source(e, g);
|
||||
NFAVertex v = target(e, g);
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
|
||||
if (!allowExpand(cr.count(), 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
addToQueue(workQ, lg, sink, root, cr, u);
|
||||
}
|
||||
|
||||
static
|
||||
u32 crCardinality(const CharReach &cr) {
|
||||
// Special-case for handling dots, much faster than running the find_next
|
||||
// loop below.
|
||||
if (cr.all()) {
|
||||
return 230; // [^A-Z]
|
||||
}
|
||||
|
||||
u32 rv = 0;
|
||||
for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) {
|
||||
if (myisupper(i) && cr.test(mytolower(i))) {
|
||||
// ignore upper half of a nocase pair
|
||||
continue;
|
||||
}
|
||||
rv++;
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
/** Filter out literals that include other literals as suffixes. We do this by
|
||||
* identifying vertices connected to the sink and removing their other
|
||||
* out-edges. */
|
||||
static
|
||||
void filterLitGraph(LitGraph &lg, const LitVertex sink) {
|
||||
for (auto v : inv_adjacent_vertices_range(sink, lg)) {
|
||||
remove_out_edge_if(v, [&lg, &sink](const LitEdge &e) {
|
||||
return target(e, lg) != sink;
|
||||
}, lg);
|
||||
}
|
||||
|
||||
// We could do a DFS-and-prune here, if we wanted. Right now, we just
|
||||
// handle it in extractLiterals by throwing away paths that don't run all
|
||||
// the way from sink to root.
|
||||
}
|
||||
|
||||
/** Extracts all the literals from the given literal graph. Walks the graph
|
||||
* from each predecessor of the sink (note: it's a suffix tree except for this
|
||||
* convenience) towards the source, storing each string as we go. */
|
||||
static
|
||||
void extractLiterals(const LitGraph &lg, const LitVertex root,
|
||||
const LitVertex sink, set<ue2_literal> &s) {
|
||||
ue2_literal lit;
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(sink, lg)) {
|
||||
lit.clear();
|
||||
while (u != root) {
|
||||
lit.push_back(lg[u].c);
|
||||
assert(in_degree(u, lg) <= 1);
|
||||
LitGraph::inv_adjacency_iterator ai2, ae2;
|
||||
tie(ai2, ae2) = inv_adjacent_vertices(u, lg);
|
||||
if (ai2 == ae2) {
|
||||
// Path has been cut, time for the next literal.
|
||||
goto next_literal;
|
||||
}
|
||||
u = *ai2;
|
||||
}
|
||||
s.insert(lit);
|
||||
next_literal:
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
static
|
||||
bool hasSuffixLiterals(const set<ue2_literal> &s) {
|
||||
for (auto it = s.begin(), ite = s.end(); it != ite; ++it) {
|
||||
for (auto jt = std::next(it); jt != ite; ++jt) {
|
||||
if (isSuffix(*it, *jt) || isSuffix(*jt, *it)) {
|
||||
DEBUG_PRINTF("'%s' and '%s' have suffix issues\n",
|
||||
dumpString(*it).c_str(),
|
||||
dumpString(*jt).c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void processWorkQueue(const NGHolder &g, const NFAEdge &e,
|
||||
set<ue2_literal> &s) {
|
||||
if (is_special(target(e, g), g)) {
|
||||
return;
|
||||
}
|
||||
|
||||
LitGraph lg;
|
||||
LitVertex root = add_vertex(lg);
|
||||
LitVertex sink = add_vertex(lg);
|
||||
|
||||
LitVertexQ workQ;
|
||||
initWorkQueue(workQ, lg, root, sink, g, e);
|
||||
|
||||
while (!workQ.empty()) {
|
||||
const LitVertex lv = workQ.front().first;
|
||||
const NFAVertex &t = workQ.front().second;
|
||||
const CharReach &cr = g[t].char_reach;
|
||||
|
||||
u32 cr_card = crCardinality(cr);
|
||||
size_t numItems = cr_card * in_degree(t, g);
|
||||
size_t committed_count = workQ.size() + in_degree(sink, lg) - 1;
|
||||
|
||||
if (g[t].index == NODE_START) {
|
||||
// reached start, add to literal set
|
||||
add_edge_if_not_present(lv, sink, lg);
|
||||
goto next_work_elem;
|
||||
}
|
||||
|
||||
// Expand next vertex
|
||||
if (allowExpand(numItems, committed_count)) {
|
||||
for (auto u : inv_adjacent_vertices_range(t, g)) {
|
||||
addToQueue(workQ, lg, sink, lv, cr, u);
|
||||
}
|
||||
goto next_work_elem;
|
||||
}
|
||||
|
||||
// Expand this vertex
|
||||
if (allowExpand(cr_card, committed_count)) {
|
||||
for (size_t i = cr.find_first(); i != CharReach::npos;
|
||||
i = cr.find_next(i)) {
|
||||
if (myisupper(i) && cr.test(mytolower(i))) {
|
||||
// ignore upper half of a nocase pair
|
||||
continue;
|
||||
}
|
||||
|
||||
bool nocase = myislower(i) && cr.test(mytoupper(i));
|
||||
ue2_literal::elem c((char)i, nocase);
|
||||
LitVertex lt = addToLitGraph(lg, sink, lv, c);
|
||||
add_edge_if_not_present(lt, sink, lg);
|
||||
}
|
||||
goto next_work_elem;
|
||||
}
|
||||
|
||||
// add to literal set
|
||||
add_edge_if_not_present(lv, sink, lg);
|
||||
next_work_elem:
|
||||
workQ.pop();
|
||||
}
|
||||
|
||||
filterLitGraph(lg, sink);
|
||||
//dumpGraph("litgraph.dot", lg, root, sink);
|
||||
extractLiterals(lg, root, sink, s);
|
||||
|
||||
// Our literal set should contain no literal that is a suffix of another.
|
||||
assert(!hasSuffixLiterals(s));
|
||||
|
||||
DEBUG_PRINTF("edge %u (%u->%u) produced %zu literals\n", g[e].index,
|
||||
g[source(e, g)].index, g[target(e, g)].index, s.size());
|
||||
}
|
||||
|
||||
static
|
||||
u64a litUniqueness(const string &s) {
|
||||
CharReach seen(s);
|
||||
return seen.count();
|
||||
}
|
||||
|
||||
/** Count the significant bits of this literal (i.e. seven for nocase alpha,
|
||||
* eight for everything else). */
|
||||
static
|
||||
u64a litCountBits(const ue2_literal &lit) {
|
||||
u64a n = 0;
|
||||
for (const auto &c : lit) {
|
||||
n += c.nocase ? 7 : 8;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/** Returns a fairly arbitrary score for the given literal, used to compare the
|
||||
* suitability of different candidates. */
|
||||
static
|
||||
u64a scoreLiteral(const ue2_literal &s) {
|
||||
// old scoring scheme: SUM(s in S: 1/s.len()^2)
|
||||
// now weight (currently 75/25) with number of unique chars
|
||||
// in the string
|
||||
u64a len = litCountBits(s);
|
||||
u64a lenUnique = litUniqueness(s.get_string()) * 8;
|
||||
|
||||
u64a weightedLen = (1000ULL - WEIGHT_OF_UNIQUENESS) * len +
|
||||
WEIGHT_OF_UNIQUENESS * lenUnique;
|
||||
weightedLen /= 8;
|
||||
|
||||
DEBUG_PRINTF("scored literal '%s' %llu\n",
|
||||
escapeString(s.get_string()).c_str(), weightedLen);
|
||||
|
||||
return weightedLen;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* calculateScore has the following properties:
|
||||
* - score of literal is the same as the score of the reversed literal;
|
||||
* - score of substring of literal is worse than the original literal's score;
|
||||
* - score of any literal should be non-zero.
|
||||
*/
|
||||
static
|
||||
u64a calculateScore(const ue2_literal &s) {
|
||||
if (s.empty()) {
|
||||
return NO_LITERAL_AT_EDGE_SCORE;
|
||||
}
|
||||
|
||||
u64a weightedLen = scoreLiteral(s);
|
||||
|
||||
DEBUG_PRINTF("len %zu, wl %llu\n", s.length(), weightedLen);
|
||||
u64a rv = 1000000000000000ULL/(weightedLen * weightedLen * weightedLen);
|
||||
|
||||
if (!rv) {
|
||||
rv = 1;
|
||||
}
|
||||
DEBUG_PRINTF("len %zu, score %llu\n", s.length(), rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
/** Adds a literal in reverse order, building up a suffix tree. */
|
||||
static
|
||||
void addReversedLiteral(const ue2_literal &lit, LitGraph &lg,
|
||||
const LitVertex &root, const LitVertex &sink) {
|
||||
DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str());
|
||||
ue2_literal suffix;
|
||||
LitVertex v = root;
|
||||
for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
|
||||
suffix.push_back(*it);
|
||||
LitVertex w;
|
||||
for (auto v2 : adjacent_vertices_range(v, lg)) {
|
||||
if (v2 != sink && lg[v2].c == *it) {
|
||||
w = v2;
|
||||
goto next_char;
|
||||
}
|
||||
}
|
||||
w = add_vertex(LitGraphVertexProps(*it), lg);
|
||||
add_edge(v, w, LitGraphEdgeProps(calculateScore(suffix)), lg);
|
||||
next_char:
|
||||
v = w;
|
||||
}
|
||||
|
||||
// Wire the last vertex to the sink.
|
||||
add_edge(v, sink, lg);
|
||||
}
|
||||
|
||||
static
|
||||
void extractLiterals(const vector<LitEdge> &cutset, const LitGraph &lg,
|
||||
const LitVertex &root, set<ue2_literal> &s) {
|
||||
for (const auto &e : cutset) {
|
||||
LitVertex u = source(e, lg), v = target(e, lg);
|
||||
ue2_literal lit;
|
||||
lit.push_back(lg[v].c);
|
||||
while (u != root) {
|
||||
lit.push_back(lg[u].c);
|
||||
assert(in_degree(u, lg) == 1);
|
||||
LitGraph::inv_adjacency_iterator ai, ae;
|
||||
tie(ai, ae) = inv_adjacent_vertices(u, lg);
|
||||
if (ai == ae) {
|
||||
// Path has been cut, time for the next literal.
|
||||
goto next_literal;
|
||||
}
|
||||
u = *ai;
|
||||
}
|
||||
DEBUG_PRINTF("extracted: '%s'\n", escapeString(lit).c_str());
|
||||
s.insert(lit);
|
||||
next_literal:
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static UNUSED
|
||||
const char *describeColor(boost::default_color_type c) {
|
||||
switch (c) {
|
||||
case boost::white_color:
|
||||
return "white";
|
||||
case boost::gray_color:
|
||||
return "gray";
|
||||
case boost::green_color:
|
||||
return "green";
|
||||
case boost::red_color:
|
||||
return "red";
|
||||
case boost::black_color:
|
||||
return "black";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* The BGL's boykov_kolmogorov_max_flow requires that all edges have their
|
||||
* reverse edge in the graph. This function adds them, returning the new edges
|
||||
* and constructing a map of (edge, rev edge).
|
||||
*/
|
||||
static
|
||||
vector<LitEdge> addReverseEdges(LitGraph &lg,
|
||||
ue2::unordered_map<LitEdge, LitEdge> &reverse_edge_map) {
|
||||
vector<LitEdge> reverseMe;
|
||||
|
||||
reverse_edge_map.clear();
|
||||
reverse_edge_map.reserve(num_edges(lg) * 2);
|
||||
|
||||
for (const auto &e : edges_range(lg)) {
|
||||
LitVertex u = source(e, lg), v = target(e, lg);
|
||||
assert(u != v);
|
||||
|
||||
bool exists;
|
||||
LitEdge rev;
|
||||
tie(rev, exists) = edge(v, u, lg);
|
||||
if (exists) {
|
||||
reverse_edge_map[e] = rev;
|
||||
} else {
|
||||
reverseMe.push_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
vector<LitEdge> reverseEdges;
|
||||
reverseEdges.reserve(reverseMe.size());
|
||||
|
||||
for (const auto &e : reverseMe) {
|
||||
LitVertex u = source(e, lg), v = target(e, lg);
|
||||
LitEdge rev = add_edge(v, u, lg[e], lg).first;
|
||||
reverseEdges.push_back(rev);
|
||||
reverse_edge_map[e] = rev;
|
||||
reverse_edge_map[rev] = e;
|
||||
}
|
||||
|
||||
return reverseEdges;
|
||||
}
|
||||
|
||||
static
|
||||
void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink,
|
||||
vector<LitEdge> &cutset) {
|
||||
cutset.clear();
|
||||
|
||||
//dumpGraph("litgraph.dot", lg, root, sink);
|
||||
|
||||
assert(!in_degree(root, lg));
|
||||
assert(!out_degree(sink, lg));
|
||||
|
||||
// Add reverse edges for the convenience of the BGL's max flow algorithm.
|
||||
ue2::unordered_map<LitEdge, LitEdge> reverse_edge_map;
|
||||
vector<LitEdge> tempEdges = addReverseEdges(lg, reverse_edge_map);
|
||||
|
||||
const auto v_index_map = get(vertex_index, lg);
|
||||
const size_t num_verts = num_vertices(lg);
|
||||
vector<boost::default_color_type> colors(num_verts);
|
||||
vector<s32> distances(num_verts);
|
||||
vector<LitEdge> predecessors(num_verts);
|
||||
ue2::unordered_map<LitEdge, u64a> residuals;
|
||||
residuals.reserve(num_edges(lg));
|
||||
|
||||
UNUSED u64a flow = boykov_kolmogorov_max_flow(lg,
|
||||
get(&LitGraphEdgeProps::score, lg),
|
||||
make_assoc_property_map(residuals),
|
||||
make_assoc_property_map(reverse_edge_map),
|
||||
make_iterator_property_map(predecessors.begin(), v_index_map),
|
||||
make_iterator_property_map(colors.begin(), v_index_map),
|
||||
make_iterator_property_map(distances.begin(), v_index_map),
|
||||
get(vertex_index, lg), root, sink);
|
||||
DEBUG_PRINTF("done, flow = %llu\n", flow);
|
||||
|
||||
// Remove temporary reverse edges.
|
||||
for (const auto &e : tempEdges) {
|
||||
remove_edge(e, lg);
|
||||
}
|
||||
|
||||
vector<LitEdge> white_cut, black_cut;
|
||||
u64a white_flow = 0, black_flow = 0;
|
||||
|
||||
for (const auto &e : edges_range(lg)) {
|
||||
const LitVertex u = source(e, lg), v = target(e, lg);
|
||||
const auto ucolor = colors[boost::get(vertex_index, lg, u)];
|
||||
const auto vcolor = colors[boost::get(vertex_index, lg, v)];
|
||||
|
||||
DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n",
|
||||
boost::get(vertex_index, lg, u), describeColor(ucolor),
|
||||
boost::get(vertex_index, lg, v), describeColor(vcolor),
|
||||
lg[e].score);
|
||||
|
||||
if (ucolor != boost::white_color && vcolor == boost::white_color) {
|
||||
assert(target(e, lg) != sink);
|
||||
white_cut.push_back(e);
|
||||
white_flow += lg[e].score;
|
||||
}
|
||||
if (ucolor == boost::black_color && vcolor != boost::black_color) {
|
||||
assert(target(e, lg) != sink);
|
||||
black_cut.push_back(e);
|
||||
black_flow += lg[e].score;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("white flow = %llu, black flow = %llu\n",
|
||||
white_flow, black_flow);
|
||||
assert(white_flow && black_flow);
|
||||
|
||||
if (white_flow <= black_flow) {
|
||||
DEBUG_PRINTF("selected white cut\n");
|
||||
cutset.swap(white_cut);
|
||||
} else {
|
||||
DEBUG_PRINTF("selected black cut\n");
|
||||
cutset.swap(black_cut);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("min cut has %zu edges\n", cutset.size());
|
||||
assert(!cutset.empty());
|
||||
}
|
||||
|
||||
/** Takes a set of literals and derives a better one from them, returning its
|
||||
* score. Literals with a common suffix S will be replaced with S. (for
|
||||
* example, {foobar, fooobar} -> {oobar}).
|
||||
*/
|
||||
u64a compressAndScore(set<ue2_literal> &s) {
|
||||
if (s.empty()) {
|
||||
return NO_LITERAL_AT_EDGE_SCORE;
|
||||
}
|
||||
|
||||
if (s.size() == 1) {
|
||||
return calculateScore(*s.begin());
|
||||
}
|
||||
|
||||
UNUSED u64a initialScore = scoreSet(s);
|
||||
DEBUG_PRINTF("begin, initial literals have score %llu\n",
|
||||
initialScore);
|
||||
|
||||
LitGraph lg;
|
||||
const LitVertex root = add_vertex(lg);
|
||||
const LitVertex sink = add_vertex(lg);
|
||||
|
||||
for (const auto &lit : s) {
|
||||
addReversedLiteral(lit, lg, root, sink);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n",
|
||||
num_vertices(lg), num_edges(lg));
|
||||
|
||||
vector<LitEdge> cutset;
|
||||
findMinCut(lg, root, sink, cutset);
|
||||
|
||||
s.clear();
|
||||
extractLiterals(cutset, lg, root, s);
|
||||
|
||||
u64a score = scoreSet(s);
|
||||
DEBUG_PRINTF("compressed score is %llu\n", score);
|
||||
assert(score <= initialScore);
|
||||
return score;
|
||||
}
|
||||
|
||||
u64a scoreSet(const set<ue2_literal> &s) {
|
||||
if (s.empty()) {
|
||||
return NO_LITERAL_AT_EDGE_SCORE;
|
||||
}
|
||||
|
||||
u64a score = 1ULL;
|
||||
|
||||
for (const auto &lit : s) {
|
||||
score += calculateScore(lit);
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e) {
|
||||
set<ue2_literal> s;
|
||||
processWorkQueue(g, e, s);
|
||||
return s;
|
||||
}
|
||||
|
||||
set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
|
||||
bool only_first_encounter) {
|
||||
set<ue2_literal> s;
|
||||
|
||||
if (is_special(v, g)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
set<ue2_literal> ls;
|
||||
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
if (source(e, g) == v && only_first_encounter) {
|
||||
continue; /* ignore self loop on root vertex as we are interested in
|
||||
* the first time we visit the vertex on the way to
|
||||
* accept. In fact, we can ignore any back edges - but
|
||||
* they would require a bit of effort to discover. */
|
||||
}
|
||||
|
||||
ls = getLiteralSet(g, e);
|
||||
if (ls.empty()) {
|
||||
s.clear();
|
||||
return s;
|
||||
} else {
|
||||
s.insert(ls.begin(), ls.end());
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
vector<u64a> scoreEdges(const NGHolder &g) {
|
||||
assert(hasCorrectlyNumberedEdges(g));
|
||||
|
||||
vector<u64a> scores(num_edges(g));
|
||||
|
||||
for (const auto &e : edges_range(g)) {
|
||||
u32 eidx = g[e].index;
|
||||
assert(eidx < scores.size());
|
||||
set<ue2_literal> ls = getLiteralSet(g, e);
|
||||
scores[eidx] = compressAndScore(ls);
|
||||
}
|
||||
|
||||
return scores;
|
||||
}
|
||||
|
||||
static
|
||||
bool splitOffLeadingLiteral_i(const NGHolder &g, bool anch,
|
||||
ue2_literal *lit_out,
|
||||
NGHolder *rhs) {
|
||||
NFAVertex u;
|
||||
NFAVertex v;
|
||||
|
||||
if (!anch) {
|
||||
DEBUG_PRINTF("looking for leading floating literal\n");
|
||||
set<NFAVertex> s_succ;
|
||||
insert(&s_succ, adjacent_vertices(g.start, g));
|
||||
|
||||
set<NFAVertex> sds_succ;
|
||||
insert(&sds_succ, adjacent_vertices(g.startDs, g));
|
||||
|
||||
bool floating = is_subset_of(s_succ, sds_succ);
|
||||
if (!floating) {
|
||||
DEBUG_PRINTF("not floating\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
sds_succ.erase(g.startDs);
|
||||
if (sds_succ.size() != 1) {
|
||||
DEBUG_PRINTF("branchy root\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
u = g.startDs;
|
||||
v = *sds_succ.begin();
|
||||
} else {
|
||||
DEBUG_PRINTF("looking for leading anchored literal\n");
|
||||
|
||||
if (proper_out_degree(g.startDs, g)) {
|
||||
DEBUG_PRINTF("not anchored\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
set<NFAVertex> s_succ;
|
||||
insert(&s_succ, adjacent_vertices(g.start, g));
|
||||
s_succ.erase(g.startDs);
|
||||
if (s_succ.size() != 1) {
|
||||
DEBUG_PRINTF("branchy root\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
u = g.start;
|
||||
v = *s_succ.begin();
|
||||
}
|
||||
|
||||
while (true) {
|
||||
DEBUG_PRINTF("validating vertex %u\n", g[v].index);
|
||||
|
||||
assert(v != g.acceptEod && v != g.accept);
|
||||
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
if (cr.count() != 1 && !cr.isCaselessChar()) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Rose can only handle mixed-sensitivity literals up to the max mask
|
||||
// length.
|
||||
if (lit_out->length() >= MAX_MASK2_WIDTH) {
|
||||
if (mixed_sensitivity(*lit_out)) {
|
||||
DEBUG_PRINTF("long and mixed sensitivity\n");
|
||||
break;
|
||||
}
|
||||
if (ourisalpha((char)cr.find_first())) {
|
||||
if (cr.isCaselessChar() != lit_out->any_nocase()) {
|
||||
DEBUG_PRINTF("stop at mixed sensitivity on '%c'\n",
|
||||
(char)cr.find_first());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
|
||||
DEBUG_PRINTF("connection to accept\n");
|
||||
break;
|
||||
}
|
||||
|
||||
lit_out->push_back(cr.find_first(), cr.isCaselessChar());
|
||||
u = v;
|
||||
|
||||
if (out_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("out_degree != 1\n");
|
||||
break;
|
||||
}
|
||||
|
||||
v = *adjacent_vertices(v, g).first;
|
||||
|
||||
if (in_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("blargh\n"); /* picks up cases where there is no path
|
||||
* to case accept (large cycles),
|
||||
* ensures term */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (lit_out->empty()) {
|
||||
return false;
|
||||
}
|
||||
assert(u != g.startDs);
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> rhs_map;
|
||||
vector<NFAVertex> pivots;
|
||||
insert(&pivots, pivots.end(), adjacent_vertices(u, g));
|
||||
splitRHS(g, pivots, rhs, &rhs_map);
|
||||
|
||||
DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(*lit_out).c_str(),
|
||||
lit_out->length());
|
||||
assert(is_triggered(*rhs));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
|
||||
NGHolder *rhs) {
|
||||
return splitOffLeadingLiteral_i(g, false, lit_out, rhs);
|
||||
}
|
||||
|
||||
bool splitOffAnchoredLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
|
||||
NGHolder *rhs) {
|
||||
return splitOffLeadingLiteral_i(g, true, lit_out, rhs);
|
||||
}
|
||||
|
||||
|
||||
bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out) {
|
||||
if (in_degree(g.acceptEod, g) != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
NFAVertex v = getSoleSourceVertex(g, g.accept);
|
||||
|
||||
if (!v) {
|
||||
return false;
|
||||
}
|
||||
|
||||
set<ue2_literal> s = getLiteralSet(g, v, false);
|
||||
|
||||
if (s.size() != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ue2_literal &lit = *s.begin();
|
||||
|
||||
if (lit.length() > MAX_MASK2_WIDTH && mixed_sensitivity(lit)) {
|
||||
DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
*lit_out = lit;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
82
src/nfagraph/ng_literal_analysis.h
Normal file
82
src/nfagraph/ng_literal_analysis.h
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Literal analysis and scoring.
|
||||
*/
|
||||
|
||||
#ifndef NG_LITERAL_ANALYSIS_H
|
||||
#define NG_LITERAL_ANALYSIS_H
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define NO_LITERAL_AT_EDGE_SCORE 10000000ULL
|
||||
|
||||
/* Score for special-to-special edges */
|
||||
#define INVALID_EDGE_CAP 100000000ULL
|
||||
|
||||
class NGHolder;
|
||||
|
||||
/**
|
||||
* Fetch the literal set for a given vertex, returning it in \p s. Note: does
|
||||
* NOT take into account any constraints due to streaming mode requirements.
|
||||
*
|
||||
* if only_first_encounter is requested, the output set may drop literals
|
||||
* generated by revisiting the destination vertex.
|
||||
*/
|
||||
std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAVertex &v,
|
||||
bool only_first_encounter = true);
|
||||
std::set<ue2_literal> getLiteralSet(const NGHolder &g, const NFAEdge &e);
|
||||
|
||||
/** Score all the edges in the given graph, returning them in \p scores indexed
|
||||
* by edge_index. */
|
||||
std::vector<u64a> scoreEdges(const NGHolder &h);
|
||||
|
||||
/** Returns a score for a literal set. Lower scores are better. */
|
||||
u64a scoreSet(const std::set<ue2_literal> &s);
|
||||
|
||||
/** Compress a literal set to fewer literals. */
|
||||
u64a compressAndScore(std::set<ue2_literal> &s);
|
||||
|
||||
bool splitOffLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
|
||||
NGHolder *rhs);
|
||||
|
||||
bool splitOffAnchoredLeadingLiteral(const NGHolder &g, ue2_literal *lit_out,
|
||||
NGHolder *rhs);
|
||||
|
||||
bool getTrailingLiteral(const NGHolder &g, ue2_literal *lit_out);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
222
src/nfagraph/ng_literal_component.cpp
Normal file
222
src/nfagraph/ng_literal_component.cpp
Normal file
@@ -0,0 +1,222 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Literal Component Splitting. Identifies literals that span the
|
||||
* graph and moves them into Rose.
|
||||
*/
|
||||
#include "grey.h"
|
||||
#include "ng.h"
|
||||
#include "ng_literal_component.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool isLiteralChar(const NGWrapper &g, NFAVertex v,
|
||||
bool &nocase, bool &casefixed) {
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
const size_t num = cr.count();
|
||||
if (num > 2) {
|
||||
return false; // char class
|
||||
}
|
||||
|
||||
if (!casefixed) {
|
||||
if (num == 2 && cr.isCaselessChar()) {
|
||||
nocase = true;
|
||||
casefixed = true;
|
||||
return true;
|
||||
} else if (num == 1) {
|
||||
if (cr.isAlpha()) {
|
||||
nocase = false;
|
||||
casefixed = true;
|
||||
}
|
||||
// otherwise, still acceptable but we can't fix caselessness yet
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// nocase property is fixed
|
||||
if (nocase) {
|
||||
if ((num == 2 && cr.isCaselessChar()) ||
|
||||
(num == 1 && !cr.isAlpha())) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return (num == 1);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
void addToString(string &s, const NGHolder &g, NFAVertex v) {
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
assert(cr.count() == 1 || cr.isCaselessChar());
|
||||
|
||||
char c = (char)cr.find_first();
|
||||
s.push_back(c);
|
||||
}
|
||||
|
||||
static
|
||||
bool splitOffLiteral(NG &ng, NGWrapper &g, NFAVertex v, const bool anchored,
|
||||
set<NFAVertex> &dead) {
|
||||
DEBUG_PRINTF("examine vertex %u\n", g[v].index);
|
||||
bool nocase = false, casefixed = false;
|
||||
|
||||
assert(!is_special(v, g));
|
||||
|
||||
size_t reqInDegree;
|
||||
if (anchored) {
|
||||
reqInDegree = 1;
|
||||
assert(edge(g.start, v, g).second);
|
||||
} else {
|
||||
reqInDegree = 2;
|
||||
assert(edge(g.start, v, g).second);
|
||||
assert(edge(g.startDs, v, g).second);
|
||||
}
|
||||
if (hasGreaterInDegree(reqInDegree, v, g)) {
|
||||
DEBUG_PRINTF("extra in-edges\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isLiteralChar(g, v, nocase, casefixed)) {
|
||||
DEBUG_PRINTF("not literal\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
string literal;
|
||||
addToString(literal, g, v);
|
||||
|
||||
// Remaining vertices must come in a chain, each with one in-edge and one
|
||||
// out-edge only.
|
||||
NFAVertex u;
|
||||
while (1) {
|
||||
if (out_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("branches, not literal\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
u = v; // previous vertex
|
||||
v = *(adjacent_vertices(v, g).first);
|
||||
|
||||
DEBUG_PRINTF("loop, v=%u\n", g[v].index);
|
||||
|
||||
if (is_special(v, g)) {
|
||||
if (v == g.accept || v == g.acceptEod) {
|
||||
break; // OK
|
||||
} else {
|
||||
assert(0); // start?
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Ordinary, must be literal
|
||||
if (!isLiteralChar(g, v, nocase, casefixed)) {
|
||||
DEBUG_PRINTF("not literal\n");
|
||||
return false;
|
||||
}
|
||||
if (in_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("branches, not literal\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
addToString(literal, g, v);
|
||||
}
|
||||
|
||||
// Successfully found a literal; there might be multiple report IDs, in
|
||||
// which case we add all the reports.
|
||||
assert(!is_special(u, g));
|
||||
bool eod = v == g.acceptEod;
|
||||
assert(eod || v == g.accept);
|
||||
|
||||
DEBUG_PRINTF("success: found %s literal '%s'\n",
|
||||
anchored ? "anchored" : "unanchored",
|
||||
escapeString(literal).c_str());
|
||||
|
||||
// Literals of length 1 are better served going through later optimisation
|
||||
// passes, where they might be combined together into a character class.
|
||||
if (literal.length() == 1) {
|
||||
DEBUG_PRINTF("skipping literal of length 1\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
ng.rose->add(anchored, eod, ue2_literal(literal, nocase), g[u].reports);
|
||||
|
||||
// Remove the terminal vertex. Later, we rely on pruneUseless to remove the
|
||||
// other vertices in this chain, since they'll no longer lead to an accept.
|
||||
dead.insert(u);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \brief Split off literals. True if any changes were made to the graph. */
|
||||
bool splitOffLiterals(NG &ng, NGWrapper &g) {
|
||||
if (!ng.cc.grey.allowRose) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool changed = false;
|
||||
set<NFAVertex> dead;
|
||||
|
||||
ue2::unordered_set<NFAVertex> unanchored; // for faster lookup.
|
||||
insert(&unanchored, adjacent_vertices(g.startDs, g));
|
||||
|
||||
// Anchored literals.
|
||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||
if (!is_special(v, g) && !contains(unanchored, v)) {
|
||||
changed |= splitOffLiteral(ng, g, v, true, dead);
|
||||
}
|
||||
}
|
||||
|
||||
// Unanchored literals.
|
||||
for (auto v : adjacent_vertices_range(g.startDs, g)) {
|
||||
if (!is_special(v, g)) {
|
||||
changed |= splitOffLiteral(ng, g, v, false, dead);
|
||||
}
|
||||
}
|
||||
|
||||
if (changed) {
|
||||
remove_vertices(dead, g);
|
||||
pruneUseless(g);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
47
src/nfagraph/ng_literal_component.h
Normal file
47
src/nfagraph/ng_literal_component.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Literal Component Splitting. Identifies literals that span the
|
||||
* graph and moves them into Rose.
|
||||
*/
|
||||
|
||||
#ifndef NG_LITERAL_COMPONENT_H
|
||||
#define NG_LITERAL_COMPONENT_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NG;
|
||||
class NGWrapper;
|
||||
|
||||
/** \brief Split off literals. True if any changes were made to the graph. */
|
||||
bool splitOffLiterals(NG &ng, NGWrapper &graph);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_LITERAL_COMPONENT_H
|
||||
232
src/nfagraph/ng_literal_decorated.cpp
Normal file
232
src/nfagraph/ng_literal_decorated.cpp
Normal file
@@ -0,0 +1,232 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Analysis for literals decorated by leading/trailing assertions or
|
||||
* character classes.
|
||||
*/
|
||||
#include "ng_literal_decorated.h"
|
||||
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_in_graph.h"
|
||||
#include "rose/rose_in_util.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
/** \brief Max fixed-width paths to generate from a graph. */
|
||||
static constexpr size_t MAX_PATHS = 10;
|
||||
|
||||
/** \brief Max degree for any non-special vertex in the graph. */
|
||||
static constexpr size_t MAX_VERTEX_DEGREE = 6;
|
||||
|
||||
using Path = vector<NFAVertex>;
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
bool findPaths(const NGHolder &g, vector<Path> &paths) {
|
||||
vector<NFAVertex> order = getTopoOrdering(g);
|
||||
|
||||
vector<vector<Path>> built(num_vertices(g));
|
||||
|
||||
for (auto it = order.rbegin(); it != order.rend(); ++it) {
|
||||
NFAVertex v = *it;
|
||||
auto &out = built[g[v].index];
|
||||
assert(out.empty());
|
||||
|
||||
if (v == g.start || v == g.startDs) {
|
||||
out.push_back({v});
|
||||
continue;
|
||||
}
|
||||
|
||||
// The paths to v are the paths to v's predecessors, with v added to
|
||||
// the end of each.
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
// We have a stylized connection from start -> startDs, but we
|
||||
// don't need anchored and unanchored versions of the same path.
|
||||
if (u == g.start && edge(g.startDs, v, g).second) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Similarly, avoid the accept->acceptEod edge.
|
||||
if (u == g.accept) {
|
||||
assert(v == g.acceptEod);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto &p : built[g[u].index]) {
|
||||
out.push_back(p);
|
||||
out.back().push_back(v);
|
||||
|
||||
if (out.size() > MAX_PATHS) {
|
||||
// All these paths should eventually end up at a sink, so
|
||||
// we've blown past our limit.
|
||||
DEBUG_PRINTF("path limit exceeded\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
insert(&paths, paths.end(), built[NODE_ACCEPT]);
|
||||
insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]);
|
||||
|
||||
DEBUG_PRINTF("%zu paths generated\n", paths.size());
|
||||
|
||||
return paths.size() <= MAX_PATHS;
|
||||
}
|
||||
|
||||
static
|
||||
bool hasLargeDegreeVertex(const NGHolder &g) {
|
||||
for (const auto &v : vertices_range(g)) {
|
||||
if (is_special(v, g)) { // specials can have large degree
|
||||
continue;
|
||||
}
|
||||
if (has_greater_degree(MAX_VERTEX_DEGREE, v, g)) {
|
||||
DEBUG_PRINTF("vertex %u has degree %zu\n", g[v].index,
|
||||
boost::degree(v, g.g));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(DEBUG) || defined(DUMP_SUPPORT)
|
||||
static UNUSED
|
||||
string dumpPath(const NGHolder &g, const Path &path) {
|
||||
ostringstream oss;
|
||||
for (const auto &v : path) {
|
||||
switch (g[v].index) {
|
||||
case NODE_START:
|
||||
oss << "<start>";
|
||||
break;
|
||||
case NODE_START_DOTSTAR:
|
||||
oss << "<startDs>";
|
||||
break;
|
||||
case NODE_ACCEPT:
|
||||
oss << "<accept>";
|
||||
break;
|
||||
case NODE_ACCEPT_EOD:
|
||||
oss << "<acceptEod>";
|
||||
break;
|
||||
default:
|
||||
oss << describeClass(g[v].char_reach);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
struct PathMask {
|
||||
PathMask(const NGHolder &g, const Path &path)
|
||||
: is_anchored(path.front() == g.start),
|
||||
is_eod(path.back() == g.acceptEod) {
|
||||
assert(path.size() >= 2);
|
||||
mask.reserve(path.size() - 2);
|
||||
for (const auto &v : path) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
mask.push_back(g[v].char_reach);
|
||||
}
|
||||
|
||||
// Reports are attached to the second-to-last vertex.
|
||||
reports = g[*next(path.rbegin())].reports;
|
||||
assert(!reports.empty());
|
||||
}
|
||||
|
||||
vector<CharReach> mask;
|
||||
ue2::flat_set<ReportID> reports;
|
||||
bool is_anchored;
|
||||
bool is_eod;
|
||||
};
|
||||
|
||||
bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowDecoratedLiteral) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isAcyclic(g)) {
|
||||
DEBUG_PRINTF("not acyclic\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (hasLargeDegreeVertex(g)) {
|
||||
DEBUG_PRINTF("large degree\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<Path> paths;
|
||||
if (!findPaths(g, paths)) {
|
||||
DEBUG_PRINTF("couldn't split into a small number of paths\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(!paths.empty());
|
||||
assert(paths.size() <= MAX_PATHS);
|
||||
|
||||
vector<PathMask> masks;
|
||||
masks.reserve(paths.size());
|
||||
|
||||
for (const auto &path : paths) {
|
||||
DEBUG_PRINTF("path: %s\n", dumpPath(g, path).c_str());
|
||||
PathMask pm(g, path);
|
||||
if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored,
|
||||
pm.is_eod)) {
|
||||
DEBUG_PRINTF("failed validation\n");
|
||||
return false;
|
||||
}
|
||||
masks.push_back(move(pm));
|
||||
}
|
||||
|
||||
for (const auto &pm : masks) {
|
||||
rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("all ok, %zu masks added\n", masks.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
52
src/nfagraph/ng_literal_decorated.h
Normal file
52
src/nfagraph/ng_literal_decorated.h
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Analysis for literals decorated by leading/trailing assertions or
|
||||
* character classes.
|
||||
*/
|
||||
|
||||
#ifndef NFAGRAPH_NG_LITERAL_DECORATED_H
|
||||
#define NFAGRAPH_NG_LITERAL_DECORATED_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseBuild;
|
||||
class NGHolder;
|
||||
struct CompileContext;
|
||||
|
||||
/**
|
||||
* \brief If the graph contains only a decorated literal, feed it to the Rose
|
||||
* builder. Returns true on success.
|
||||
*/
|
||||
bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NFAGRAPH_NG_LITERAL_DECORATED_H
|
||||
665
src/nfagraph/ng_mcclellan.cpp
Normal file
665
src/nfagraph/ng_mcclellan.cpp
Normal file
@@ -0,0 +1,665 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Build code for McClellan DFA.
|
||||
*/
|
||||
#include "ng_mcclellan.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "nfa/dfa_min.h"
|
||||
#include "nfa/rdfa.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_mcclellan_internal.h"
|
||||
#include "ng_restructuring.h"
|
||||
#include "ng_squash.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitfield.h"
|
||||
#include "util/determinise.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::dynamic_bitset;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define FINAL_DFA_STATE_LIMIT 16383
|
||||
#define DFA_STATE_LIMIT 1024
|
||||
#define NFA_STATE_LIMIT 256
|
||||
|
||||
u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
|
||||
array<u16, ALPHABET_SIZE> &alpha,
|
||||
array<u16, ALPHABET_SIZE> &unalpha) {
|
||||
u16 i = 0;
|
||||
for (; i < esets.size(); i++) {
|
||||
const CharReach &cr = esets[i];
|
||||
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("eq set: ");
|
||||
for (size_t s = cr.find_first(); s != CharReach::npos;
|
||||
s = cr.find_next(s)) {
|
||||
printf("%02hhx ", (u8)s);
|
||||
}
|
||||
printf("-> %u\n", i);
|
||||
#endif
|
||||
u16 leader = cr.find_first();
|
||||
for (size_t s = cr.find_first(); s != CharReach::npos;
|
||||
s = cr.find_next(s)) {
|
||||
alpha[s] = i;
|
||||
}
|
||||
unalpha[i] = leader;
|
||||
}
|
||||
|
||||
for (u16 j = N_CHARS; j < ALPHABET_SIZE; j++, i++) {
|
||||
alpha[j] = i;
|
||||
unalpha[i] = j;
|
||||
}
|
||||
|
||||
return i; // alphabet size
|
||||
}
|
||||
|
||||
void calculateAlphabet(const NGHolder &g, array<u16, ALPHABET_SIZE> &alpha,
|
||||
array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize) {
|
||||
vector<CharReach> esets(1, CharReach::dot());
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
|
||||
for (size_t i = 0; i < esets.size(); i++) {
|
||||
if (esets[i].count() == 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
CharReach t = cr & esets[i];
|
||||
if (t.any() && t != esets[i]) {
|
||||
esets[i] &= ~t;
|
||||
esets.push_back(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
// for deterministic compiles
|
||||
sort(esets.begin(), esets.end());
|
||||
|
||||
assert(alphasize);
|
||||
*alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
|
||||
}
|
||||
|
||||
static
|
||||
bool allExternalReports(const ReportManager &rm,
|
||||
const flat_set<ReportID> &reports) {
|
||||
for (auto report_id : reports) {
|
||||
if (!isExternalReport(rm.getReport(report_id))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
dstate_id_t successor(const vector<dstate> &dstates, dstate_id_t c,
|
||||
const array<u16, ALPHABET_SIZE> &alpha, symbol_t s) {
|
||||
return dstates[c].next[alpha[s]];
|
||||
}
|
||||
|
||||
void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state,
|
||||
dstate_id_t *out_table) {
|
||||
for (u32 i = 0; i < ALPHABET_SIZE; i++) {
|
||||
out_table[i] = successor(n.states, state, n.alpha_remap, i);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename stateset>
|
||||
static
|
||||
void populateInit(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
stateset *init, stateset *init_deep,
|
||||
vector<NFAVertex> *v_by_index) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (state_ids.at(v) == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 vert_id = g[v].index;
|
||||
assert(vert_id < init->size());
|
||||
|
||||
if (is_any_start(v, g)) {
|
||||
init->set(vert_id);
|
||||
if (hasSelfLoop(v, g) || is_triggered(g)) {
|
||||
DEBUG_PRINTF("setting %u\n", vert_id);
|
||||
init_deep->set(vert_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
v_by_index->clear();
|
||||
v_by_index->resize(num_vertices(g), NFAGraph::null_vertex());
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 vert_id = g[v].index;
|
||||
assert((*v_by_index)[vert_id] == NFAGraph::null_vertex());
|
||||
(*v_by_index)[vert_id] = v;
|
||||
}
|
||||
|
||||
if (is_triggered(g)) {
|
||||
*init_deep = *init;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename StateSet>
|
||||
void populateAccepts(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
StateSet *accept, StateSet *acceptEod) {
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
if (state_ids.at(v) != NO_STATE) {
|
||||
accept->set(g[v].index);
|
||||
}
|
||||
}
|
||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
if (v == g.accept) {
|
||||
continue;
|
||||
}
|
||||
if (state_ids.at(v) != NO_STATE) {
|
||||
acceptEod->set(g[v].index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool canPruneEdgesFromAccept(const ReportManager &rm, const NGHolder &g) {
|
||||
bool seen = false;
|
||||
u32 ekey = 0;
|
||||
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto report_id : g[v].reports) {
|
||||
const Report &ir = rm.getReport(report_id);
|
||||
|
||||
if (!isSimpleExhaustible(ir)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!seen) {
|
||||
seen = true;
|
||||
ekey = ir.ekey;
|
||||
} else if (ekey != ir.ekey) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* need to check accept eod does not have any unseen reports as well */
|
||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto report_id : g[v].reports) {
|
||||
const Report &ir = rm.getReport(report_id);
|
||||
|
||||
if (!isSimpleExhaustible(ir)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!seen) {
|
||||
seen = true;
|
||||
ekey = ir.ekey;
|
||||
} else if (ekey != ir.ekey) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool overhangMatchesTrigger(const vector<vector<CharReach> > &all_triggers,
|
||||
vector<CharReach>::const_reverse_iterator itb,
|
||||
vector<CharReach>::const_reverse_iterator ite) {
|
||||
for (const auto &trigger : all_triggers) {
|
||||
vector<CharReach>::const_reverse_iterator it = itb;
|
||||
vector<CharReach>::const_reverse_iterator kt = trigger.rbegin();
|
||||
for (; it != ite && kt != trigger.rend(); ++it, ++kt) {
|
||||
if ((*it & *kt).none()) {
|
||||
/* this trigger does not match the overhang, try next */
|
||||
goto try_next_trigger;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
try_next_trigger:;
|
||||
}
|
||||
|
||||
return false; /* no trigger matches the over hang */
|
||||
}
|
||||
|
||||
static
|
||||
bool triggerAllowed(const NGHolder &g, const NFAVertex v,
|
||||
const vector<vector<CharReach> > &all_triggers,
|
||||
const vector<CharReach> &trigger) {
|
||||
set<NFAVertex> curr;
|
||||
set<NFAVertex> next;
|
||||
|
||||
curr.insert(v);
|
||||
|
||||
for (auto it = trigger.rbegin(); it != trigger.rend(); ++it) {
|
||||
next.clear();
|
||||
|
||||
for (auto u : curr) {
|
||||
assert(u != g.startDs); /* triggered graphs should not use sds */
|
||||
if (u == g.start) {
|
||||
if (overhangMatchesTrigger(all_triggers, it, trigger.rend())) {
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((g[u].char_reach & *it).none()) {
|
||||
continue;
|
||||
}
|
||||
insert(&next, inv_adjacent_vertices(u, g));
|
||||
}
|
||||
|
||||
if (next.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
next.swap(curr);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void markToppableStarts(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
bool single_trigger,
|
||||
const vector<vector<CharReach>> &triggers,
|
||||
dynamic_bitset<> *out) {
|
||||
if (single_trigger) {
|
||||
return; /* no live states can lead to new states */
|
||||
}
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (state_ids.at(v) == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
u32 vert_id = g[v].index;
|
||||
for (const auto &trigger : triggers) {
|
||||
if (triggerAllowed(g, v, triggers, trigger)) {
|
||||
DEBUG_PRINTF("idx %u is valid location for top\n", vert_id);
|
||||
out->set(vert_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(out->test(g[g.start].index));
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class Automaton_Big {
|
||||
public:
|
||||
typedef dynamic_bitset<> StateSet;
|
||||
typedef map<StateSet, dstate_id_t> StateMap;
|
||||
|
||||
Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
|
||||
bool single_trigger,
|
||||
const vector<vector<CharReach>> &triggers, bool prunable_in)
|
||||
: rm(rm_in), graph(graph_in), state_ids(state_ids_in),
|
||||
numStates(num_vertices(graph)), init(numStates), initDS(numStates),
|
||||
squash(numStates), accept(numStates), acceptEod(numStates),
|
||||
toppable(numStates), prunable(prunable_in), dead(numStates) {
|
||||
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
|
||||
populateAccepts(graph, state_ids, &accept, &acceptEod);
|
||||
|
||||
start_anchored = DEAD_STATE + 1;
|
||||
if (initDS == init) {
|
||||
start_floating = start_anchored;
|
||||
} else if (initDS.any()) {
|
||||
start_floating = start_anchored + 1;
|
||||
} else {
|
||||
start_floating = DEAD_STATE;
|
||||
}
|
||||
|
||||
calculateAlphabet(graph, alpha, unalpha, &alphasize);
|
||||
|
||||
for (const auto &sq : findSquashers(graph)) {
|
||||
NFAVertex v = sq.first;
|
||||
u32 vert_id = graph[v].index;
|
||||
squash.set(vert_id);
|
||||
squash_mask[vert_id] = shrinkStateSet(sq.second);
|
||||
}
|
||||
|
||||
cr_by_index = populateCR(graph, v_by_index, alpha);
|
||||
if (is_triggered(graph)) {
|
||||
markToppableStarts(graph, state_ids, single_trigger, triggers,
|
||||
&toppable);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Convert an NFAStateSet (as used by the squash code) into a StateSet
|
||||
StateSet shrinkStateSet(const NFAStateSet &in) const {
|
||||
StateSet out(dead.size());
|
||||
for (size_t i = in.find_first(); i != in.npos && i < out.size();
|
||||
i = in.find_next(i)) {
|
||||
out.set(i);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
public:
|
||||
void transition(const StateSet &in, StateSet *next) {
|
||||
transition_graph(*this, v_by_index, in, next);
|
||||
}
|
||||
|
||||
const vector<StateSet> initial() {
|
||||
vector<StateSet> rv(1, init);
|
||||
if (start_floating != DEAD_STATE && start_floating != start_anchored) {
|
||||
rv.push_back(initDS);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
private:
|
||||
void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
|
||||
StateSet acc = in & (eod ? acceptEod : accept);
|
||||
for (size_t i = acc.find_first(); i != StateSet::npos;
|
||||
i = acc.find_next(i)) {
|
||||
NFAVertex v = v_by_index[i];
|
||||
DEBUG_PRINTF("marking report\n");
|
||||
const auto &my_reports = graph[v].reports;
|
||||
rv.insert(my_reports.begin(), my_reports.end());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
void reports(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, false, rv);
|
||||
}
|
||||
void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, true, rv);
|
||||
}
|
||||
|
||||
bool canPrune(const flat_set<ReportID> &test_reports) const {
|
||||
if (!rm || !prunable || !canPruneEdgesFromAccept(*rm, graph)) {
|
||||
return false;
|
||||
}
|
||||
return allExternalReports(*rm, test_reports);
|
||||
}
|
||||
private:
|
||||
const ReportManager *rm;
|
||||
public:
|
||||
const NGHolder &graph;
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids;
|
||||
u32 numStates;
|
||||
vector<NFAVertex> v_by_index;
|
||||
vector<CharReach> cr_by_index; /* pre alpha'ed */
|
||||
StateSet init;
|
||||
StateSet initDS;
|
||||
StateSet squash; /* states which allow us to mask out other states */
|
||||
StateSet accept;
|
||||
StateSet acceptEod;
|
||||
StateSet toppable; /* states which are allowed to be on when a top arrives,
|
||||
* triggered dfas only */
|
||||
map<u32, StateSet> squash_mask;
|
||||
bool prunable;
|
||||
StateSet dead;
|
||||
array<u16, ALPHABET_SIZE> alpha;
|
||||
array<u16, ALPHABET_SIZE> unalpha;
|
||||
u16 alphasize;
|
||||
|
||||
u16 start_anchored;
|
||||
u16 start_floating;
|
||||
};
|
||||
|
||||
class Automaton_Graph {
|
||||
public:
|
||||
typedef bitfield<NFA_STATE_LIMIT> StateSet;
|
||||
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
||||
|
||||
Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
|
||||
bool single_trigger,
|
||||
const vector<vector<CharReach>> &triggers, bool prunable_in)
|
||||
: rm(rm_in), graph(graph_in), state_ids(state_ids_in),
|
||||
prunable(prunable_in) {
|
||||
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
|
||||
populateAccepts(graph, state_ids, &accept, &acceptEod);
|
||||
|
||||
start_anchored = DEAD_STATE + 1;
|
||||
if (initDS == init) {
|
||||
start_floating = start_anchored;
|
||||
} else if (initDS.any()) {
|
||||
start_floating = start_anchored + 1;
|
||||
} else {
|
||||
start_floating = DEAD_STATE;
|
||||
}
|
||||
|
||||
calculateAlphabet(graph, alpha, unalpha, &alphasize);
|
||||
assert(alphasize <= ALPHABET_SIZE);
|
||||
|
||||
for (const auto &sq : findSquashers(graph)) {
|
||||
NFAVertex v = sq.first;
|
||||
u32 vert_id = graph[v].index;
|
||||
squash.set(vert_id);
|
||||
squash_mask[vert_id] = shrinkStateSet(sq.second);
|
||||
}
|
||||
|
||||
cr_by_index = populateCR(graph, v_by_index, alpha);
|
||||
if (is_triggered(graph)) {
|
||||
dynamic_bitset<> temp(NFA_STATE_LIMIT);
|
||||
markToppableStarts(graph, state_ids, single_trigger, triggers,
|
||||
&temp);
|
||||
toppable = bitfield<NFA_STATE_LIMIT>(temp);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Convert an NFAStateSet (as used by the squash code) into a StateSet
|
||||
StateSet shrinkStateSet(const NFAStateSet &in) const {
|
||||
StateSet out;
|
||||
for (size_t i = in.find_first(); i != in.npos && i < out.size();
|
||||
i = in.find_next(i)) {
|
||||
out.set(i);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
public:
|
||||
void transition(const StateSet &in, StateSet *next) {
|
||||
transition_graph(*this, v_by_index, in, next);
|
||||
}
|
||||
|
||||
const vector<StateSet> initial() {
|
||||
vector<StateSet> rv(1, init);
|
||||
if (start_floating != DEAD_STATE && start_floating != start_anchored) {
|
||||
rv.push_back(initDS);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
private:
|
||||
void reports_i(const StateSet &in, bool eod, flat_set<ReportID> &rv) {
|
||||
StateSet acc = in & (eod ? acceptEod : accept);
|
||||
for (size_t i = acc.find_first(); i != StateSet::npos;
|
||||
i = acc.find_next(i)) {
|
||||
NFAVertex v = v_by_index[i];
|
||||
DEBUG_PRINTF("marking report\n");
|
||||
const auto &my_reports = graph[v].reports;
|
||||
rv.insert(my_reports.begin(), my_reports.end());
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
void reports(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, false, rv);
|
||||
}
|
||||
void reportsEod(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
reports_i(in, true, rv);
|
||||
}
|
||||
|
||||
bool canPrune(const flat_set<ReportID> &test_reports) const {
|
||||
if (!rm || !prunable || !canPruneEdgesFromAccept(*rm, graph)) {
|
||||
return false;
|
||||
}
|
||||
return allExternalReports(*rm, test_reports);
|
||||
}
|
||||
|
||||
private:
|
||||
const ReportManager *rm;
|
||||
public:
|
||||
const NGHolder &graph;
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids;
|
||||
vector<NFAVertex> v_by_index;
|
||||
vector<CharReach> cr_by_index; /* pre alpha'ed */
|
||||
StateSet init;
|
||||
StateSet initDS;
|
||||
StateSet squash; /* states which allow us to mask out other states */
|
||||
StateSet accept;
|
||||
StateSet acceptEod;
|
||||
StateSet toppable; /* states which are allowed to be on when a top arrives,
|
||||
* triggered dfas only */
|
||||
map<u32, StateSet> squash_mask;
|
||||
bool prunable;
|
||||
StateSet dead;
|
||||
array<u16, ALPHABET_SIZE> alpha;
|
||||
array<u16, ALPHABET_SIZE> unalpha;
|
||||
u16 alphasize;
|
||||
|
||||
u16 start_anchored;
|
||||
u16 start_floating;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
|
||||
bool single_trigger,
|
||||
const vector<vector<CharReach>> &triggers,
|
||||
const Grey &grey, bool finalChance) {
|
||||
if (!grey.allowMcClellan) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Construct a mutable copy of the graph so that we can drop unused starts.
|
||||
auto g_copy = cloneHolder(g);
|
||||
NGHolder &graph = *g_copy;
|
||||
|
||||
auto state_ids = numberStates(graph);
|
||||
dropUnusedStarts(graph, state_ids);
|
||||
|
||||
DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind);
|
||||
assert(allMatchStatesHaveReports(graph));
|
||||
|
||||
bool prunable = grey.highlanderPruneDFA && generates_callbacks(graph);
|
||||
assert(rm || !generates_callbacks(graph));
|
||||
if (!generates_callbacks(graph)) {
|
||||
rm = nullptr;
|
||||
}
|
||||
|
||||
assert(triggers.empty() == !is_triggered(graph));
|
||||
|
||||
/* We must be getting desperate if it is an outfix, so use the final chance
|
||||
* state limit logic */
|
||||
u32 state_limit
|
||||
= (graph.kind == NFA_OUTFIX || finalChance) ? FINAL_DFA_STATE_LIMIT
|
||||
: DFA_STATE_LIMIT;
|
||||
|
||||
unique_ptr<raw_dfa> rdfa = ue2::make_unique<raw_dfa>(graph.kind);
|
||||
|
||||
const u32 numStates = num_vertices(graph);
|
||||
DEBUG_PRINTF("determinising nfa with %u vertices\n", numStates);
|
||||
|
||||
if (numStates <= NFA_STATE_LIMIT) {
|
||||
/* Fast path. Automaton_Graph uses a bitfield internally to represent
|
||||
* states and is quicker than Automaton_Big. */
|
||||
Automaton_Graph n(rm, graph, state_ids, single_trigger, triggers,
|
||||
prunable);
|
||||
if (determinise(n, rdfa->states, state_limit)) {
|
||||
DEBUG_PRINTF("state limit exceeded\n");
|
||||
return nullptr; /* over state limit */
|
||||
}
|
||||
|
||||
rdfa->start_anchored = n.start_anchored;
|
||||
rdfa->start_floating = n.start_floating;
|
||||
rdfa->alpha_size = n.alphasize;
|
||||
rdfa->alpha_remap = n.alpha;
|
||||
} else {
|
||||
/* Slow path. Too many states to use Automaton_Graph. */
|
||||
Automaton_Big n(rm, graph, state_ids, single_trigger, triggers,
|
||||
prunable);
|
||||
if (determinise(n, rdfa->states, state_limit)) {
|
||||
DEBUG_PRINTF("state limit exceeded\n");
|
||||
return nullptr; /* over state limit */
|
||||
}
|
||||
|
||||
rdfa->start_anchored = n.start_anchored;
|
||||
rdfa->start_floating = n.start_floating;
|
||||
rdfa->alpha_size = n.alphasize;
|
||||
rdfa->alpha_remap = n.alpha;
|
||||
}
|
||||
|
||||
minimize_hopcroft(*rdfa, grey);
|
||||
|
||||
DEBUG_PRINTF("after determinised into %zu states, building impl dfa "
|
||||
"(a,f) = (%hu,%hu)\n", rdfa->states.size(),
|
||||
rdfa->start_anchored, rdfa->start_floating);
|
||||
|
||||
return rdfa;
|
||||
}
|
||||
|
||||
unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
|
||||
const Grey &grey) {
|
||||
assert(!is_triggered(g));
|
||||
vector<vector<CharReach>> triggers;
|
||||
return buildMcClellan(g, rm, false, triggers, grey);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
81
src/nfagraph/ng_mcclellan.h
Normal file
81
src/nfagraph/ng_mcclellan.h
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Build code for McClellan DFA.
|
||||
*/
|
||||
|
||||
#ifndef NG_MCCLELLAN_H
|
||||
#define NG_MCCLELLAN_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
struct Grey;
|
||||
struct raw_dfa;
|
||||
|
||||
/**
|
||||
* \brief Determinises an NFA Graph into a raw_dfa.
|
||||
*
|
||||
* \param g
|
||||
* The NGHolder.
|
||||
* \param rm
|
||||
* A pointer to the ReportManager, if managed reports are used (e.g.
|
||||
* for outfixes/suffixes). Otherwise nullptr.
|
||||
* \param single_trigger
|
||||
* True if it is known that the nfa will only ever be trigger once.
|
||||
* \param triggers
|
||||
* Representing when tops may arrive. Only used by NFA_INFIX and
|
||||
* NFA_SUFFIX, should be empty for other types.
|
||||
* \param grey
|
||||
* Grey box object.
|
||||
* \param finalChance
|
||||
* Allows us to build bigger DFAs as the only alternative is an outfix.
|
||||
*
|
||||
* \return A raw_dfa, or nullptr on failure (state limit blown).
|
||||
*/
|
||||
std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
|
||||
const ReportManager *rm, bool single_trigger,
|
||||
const std::vector<std::vector<CharReach>> &triggers,
|
||||
const Grey &grey, bool finalChance = false);
|
||||
|
||||
/** Convenience wrapper for non-triggered engines */
|
||||
std::unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g,
|
||||
const ReportManager *rm,
|
||||
const Grey &grey);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_MCCLELLAN_H
|
||||
144
src/nfagraph/ng_mcclellan_internal.h
Normal file
144
src/nfagraph/ng_mcclellan_internal.h
Normal file
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Shared build code for DFAs (McClellan, Haig).
|
||||
*/
|
||||
|
||||
#ifndef NG_MCCLELLAN_INTERNAL_H
|
||||
#define NG_MCCLELLAN_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "nfa/mcclellancompile.h"
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "nfagraph/ng_restructuring.h" // for NO_STATE
|
||||
#include "util/charreach.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct raw_dfa;
|
||||
|
||||
/** Fills alpha, unalpha and returns alphabet size. */
|
||||
u16 buildAlphabetFromEquivSets(const std::vector<CharReach> &esets,
|
||||
std::array<u16, ALPHABET_SIZE> &alpha,
|
||||
std::array<u16, ALPHABET_SIZE> &unalpha);
|
||||
|
||||
/** \brief Calculates an alphabet remapping based on the symbols which the
|
||||
* graph discriminates on. Throws in some special DFA symbols as well. */
|
||||
void calculateAlphabet(const NGHolder &g, std::array<u16, ALPHABET_SIZE> &alpha,
|
||||
std::array<u16, ALPHABET_SIZE> &unalpha, u16 *alphasize);
|
||||
|
||||
void getFullTransitionFromState(const raw_dfa &n, u16 state,
|
||||
u16 *out_table);
|
||||
|
||||
/** produce a map of states on which it is valid to receive tops */
|
||||
void markToppableStarts(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
bool single_trigger,
|
||||
const std::vector<std::vector<CharReach>> &triggers,
|
||||
boost::dynamic_bitset<> *out);
|
||||
|
||||
template<typename autom>
|
||||
void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
|
||||
const typename autom::StateSet &in,
|
||||
typename autom::StateSet *next) {
|
||||
typedef typename autom::StateSet StateSet;
|
||||
const NGHolder &graph = nfa.graph;
|
||||
const auto &state_ids = nfa.state_ids;
|
||||
const auto &alpha = nfa.alpha;
|
||||
const StateSet &squash = nfa.squash;
|
||||
const std::map<u32, StateSet> &squash_mask = nfa.squash_mask;
|
||||
const std::vector<CharReach> &cr_by_index = nfa.cr_by_index;
|
||||
|
||||
for (symbol_t s = 0; s < nfa.alphasize; s++) {
|
||||
next[s].reset();
|
||||
}
|
||||
|
||||
/* generate top transitions, false -> top = selfloop */
|
||||
bool top_allowed = is_triggered(graph);
|
||||
|
||||
StateSet succ = nfa.dead;
|
||||
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
||||
NFAVertex u = vByStateId[i];
|
||||
|
||||
for (const auto &v : adjacent_vertices_range(u, graph)) {
|
||||
if (state_ids.at(v) == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
succ.set(graph[v].index);
|
||||
}
|
||||
|
||||
if (top_allowed && !nfa.toppable.test(i)) {
|
||||
/* we don't need to generate a top at this location as we are in
|
||||
* an nfa state which cannot be on when a trigger arrives. */
|
||||
top_allowed = false;
|
||||
}
|
||||
}
|
||||
|
||||
StateSet active_squash = succ & squash;
|
||||
if (active_squash.any()) {
|
||||
for (size_t j = active_squash.find_first(); j != active_squash.npos;
|
||||
j = active_squash.find_next(j)) {
|
||||
succ &= squash_mask.find(j)->second;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t j = succ.find_first(); j != succ.npos; j = succ.find_next(j)) {
|
||||
const CharReach &cr = cr_by_index[j];
|
||||
for (size_t s = cr.find_first(); s != cr.npos; s = cr.find_next(s)) {
|
||||
next[s].set(j); /* already alpha'ed */
|
||||
}
|
||||
}
|
||||
|
||||
next[alpha[TOP]] = in;
|
||||
|
||||
if (top_allowed) {
|
||||
/* we don't add in the anchored starts as the only case as the only
|
||||
* time it is appropriate is if no characters have been consumed.*/
|
||||
next[alpha[TOP]] |= nfa.initDS;
|
||||
|
||||
active_squash = next[alpha[TOP]] & squash;
|
||||
if (active_squash.any()) {
|
||||
for (size_t j = active_squash.find_first(); j != active_squash.npos;
|
||||
j = active_squash.find_next(j)) {
|
||||
next[alpha[TOP]] &= squash_mask.find(j)->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
549
src/nfagraph/ng_misc_opt.cpp
Normal file
549
src/nfagraph/ng_misc_opt.cpp
Normal file
@@ -0,0 +1,549 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Miscellaneous optimisations.
|
||||
*
|
||||
* We sometimes see patterns of the form:
|
||||
*
|
||||
* /^.*<[^<]*foobaz/s
|
||||
*
|
||||
* This is bad for Rose as the escapes from the cyclic state are the same as
|
||||
* the trigger. However, we can transform this into:
|
||||
*
|
||||
* /^.*<.*foobaz/s
|
||||
*
|
||||
* ... as the first dot star can eat all but the last '<'.
|
||||
*
|
||||
* Slightly more formally:
|
||||
*
|
||||
* Given a cyclic state v with character reachability v_cr and proper preds
|
||||
* {p1 .. pn} with character reachability {p1_cr .. pn_cr}.
|
||||
*
|
||||
* let v_cr' = union(intersection(p1_cr .. pn_cr), v_cr)
|
||||
*
|
||||
* v_cr can be replaced with v_cr' without changing the behaviour of the system
|
||||
* if:
|
||||
*
|
||||
* for any given proper pred pi: if pi is set in the nfa then after consuming
|
||||
* any symbol in v_cr', pi will still be set in the nfa and every successor of
|
||||
* v is a successor of pi.
|
||||
*
|
||||
* The easiest way for this condition to be satisfied is for each proper pred
|
||||
* pi to have all its preds all have an edge to a pred of pi with a character
|
||||
* reachability containing v_cr'. There are, however, other ways to establish
|
||||
* the condition holds.
|
||||
*
|
||||
* Note: a similar transformation can be applied in reverse, details left as an
|
||||
* exercise for the interested reader. */
|
||||
#include "ng_misc_opt.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering,
|
||||
vector<NFAVertex> *cand) {
|
||||
for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
|
||||
NFAVertex v = *it;
|
||||
|
||||
if (is_special(v, g)
|
||||
|| !hasSelfLoop(v, g)
|
||||
|| g[v].char_reach.all()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// For `v' to be a candidate, its predecessors must all have the same
|
||||
// successor set as `v'.
|
||||
|
||||
set<NFAVertex> succ_v, succ_u;
|
||||
succ(g, v, &succ_v);
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
succ_u.clear();
|
||||
succ(g, u, &succ_u);
|
||||
if (succ_v != succ_u) {
|
||||
goto next_cand;
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("vertex %u is a candidate\n", g[v].index);
|
||||
cand->push_back(v);
|
||||
next_cand:;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering,
|
||||
vector<NFAVertex> *cand) {
|
||||
for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
|
||||
NFAVertex v = *it;
|
||||
|
||||
if (is_special(v, g)
|
||||
|| !hasSelfLoop(v, g)
|
||||
|| g[v].char_reach.all()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// For `v' to be a candidate, its predecessors must all have the same
|
||||
// successor set as `v'.
|
||||
|
||||
set<NFAVertex> pred_v, pred_u;
|
||||
pred(g, v, &pred_v);
|
||||
|
||||
for (auto u : adjacent_vertices_range(v, g)) {
|
||||
pred_u.clear();
|
||||
pred(g, u, &pred_u);
|
||||
if (pred_v != pred_u) {
|
||||
goto next_cand;
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("vertex %u is a candidate\n", g[v].index);
|
||||
cand->push_back(v);
|
||||
next_cand:;
|
||||
}
|
||||
}
|
||||
|
||||
/** Find the intersection of the reachability of the predecessors of \p v. */
|
||||
static
|
||||
void predCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
|
||||
add.setall();
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u != v) {
|
||||
add &= g[u].char_reach;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Find the intersection of the reachability of the successors of \p v. */
|
||||
static
|
||||
void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
|
||||
add.setall();
|
||||
for (auto u : adjacent_vertices_range(v, g)) {
|
||||
if (u != v) {
|
||||
add &= g[u].char_reach;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** The sustain set is used to show that once vertex p is on it stays on given
|
||||
* the alphabet new_cr. Every vertex pp in the sustain set has the following
|
||||
* properties:
|
||||
* -# an edge to p
|
||||
* -# enough edges to vertices in the sustain set to ensure that a vertex in
|
||||
* the sustain set will be on after consuming a character. */
|
||||
static
|
||||
set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p,
|
||||
bool ignore_starts, const CharReach &new_cr) {
|
||||
set<NFAVertex> cand;
|
||||
pred(g, p, &cand);
|
||||
if (ignore_starts) {
|
||||
cand.erase(g.startDs);
|
||||
}
|
||||
/* remove elements from cand until the sustain set property holds */
|
||||
bool changed;
|
||||
do {
|
||||
DEBUG_PRINTF("|cand| %zu\n", cand.size());
|
||||
changed = false;
|
||||
set<NFAVertex>::const_iterator it = cand.begin();
|
||||
while (it != cand.end()) {
|
||||
NFAVertex u = *it;
|
||||
++it;
|
||||
CharReach sus_cr;
|
||||
for (auto v : adjacent_vertices_range(u, g)) {
|
||||
if (contains(cand, v)) {
|
||||
sus_cr |= g[v].char_reach;
|
||||
}
|
||||
}
|
||||
|
||||
if (!new_cr.isSubsetOf(sus_cr)) {
|
||||
cand.erase(u);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
} while (changed);
|
||||
|
||||
/* Note: it may be possible to find a (larger) sustain set for a smaller
|
||||
* new_cr */
|
||||
return cand;
|
||||
}
|
||||
|
||||
/** Finds the reverse version of the sustain set.. whatever that means. */
|
||||
static
|
||||
set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p,
|
||||
const CharReach &new_cr) {
|
||||
set<NFAVertex> cand;
|
||||
succ(g, p, &cand);
|
||||
/* remove elements from cand until the sustain set property holds */
|
||||
bool changed;
|
||||
do {
|
||||
changed = false;
|
||||
set<NFAVertex>::const_iterator it = cand.begin();
|
||||
while (it != cand.end()) {
|
||||
NFAVertex u = *it;
|
||||
++it;
|
||||
CharReach sus_cr;
|
||||
for (auto v : inv_adjacent_vertices_range(u, g)) {
|
||||
if (contains(cand, v)) {
|
||||
sus_cr |= g[v].char_reach;
|
||||
}
|
||||
}
|
||||
|
||||
if (!new_cr.isSubsetOf(sus_cr)) {
|
||||
cand.erase(u);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
} while (changed);
|
||||
|
||||
/* Note: it may be possible to find a (larger) sustain set for a smaller
|
||||
* new_cr */
|
||||
return cand;
|
||||
}
|
||||
|
||||
static
|
||||
bool enlargeCyclicVertex(NGHolder &g, som_type som, NFAVertex v) {
|
||||
DEBUG_PRINTF("considering vertex %u\n", g[v].index);
|
||||
const CharReach &v_cr = g[v].char_reach;
|
||||
|
||||
CharReach add;
|
||||
predCRIntersection(g, v, add);
|
||||
|
||||
add |= v_cr;
|
||||
|
||||
if (add == v_cr) {
|
||||
DEBUG_PRINTF("no benefit\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
|
||||
|
||||
for (auto p : inv_adjacent_vertices_range(v, g)) {
|
||||
if (p == v) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("looking at pred %u\n", g[p].index);
|
||||
|
||||
bool ignore_sds = som; /* if we are tracking som, entries into a state
|
||||
from sds are significant. */
|
||||
|
||||
set<NFAVertex> sustain = findSustainSet(g, p, ignore_sds, add);
|
||||
DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
|
||||
if (sustain.empty()) {
|
||||
DEBUG_PRINTF("yawn\n");
|
||||
}
|
||||
|
||||
for (auto pp : inv_adjacent_vertices_range(p, g)) {
|
||||
/* we need to ensure that whenever pp sets p, that a member of the
|
||||
sustain set is set. Note: p's cr may be not be a subset of
|
||||
new_cr */
|
||||
CharReach sustain_cr;
|
||||
for (auto pv : adjacent_vertices_range(pp, g)) {
|
||||
if (contains(sustain, pv)) {
|
||||
sustain_cr |= g[pv].char_reach;
|
||||
}
|
||||
}
|
||||
if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
|
||||
DEBUG_PRINTF("unable to establish that preds are forced on\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* the cr can be increased */
|
||||
g[v].char_reach = add;
|
||||
DEBUG_PRINTF("vertex %u was widened\n", g[v].index);
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool enlargeCyclicVertex_rev(NGHolder &g, NFAVertex v) {
|
||||
DEBUG_PRINTF("considering vertex %u\n", g[v].index);
|
||||
const CharReach &v_cr = g[v].char_reach;
|
||||
|
||||
CharReach add;
|
||||
succCRIntersection(g, v, add);
|
||||
|
||||
add |= v_cr;
|
||||
|
||||
if (add == v_cr) {
|
||||
DEBUG_PRINTF("no benefit\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("cr of width %zu up for grabs\n", add.count() - v_cr.count());
|
||||
|
||||
for (auto p : adjacent_vertices_range(v, g)) {
|
||||
if (p == v) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("looking at succ %u\n", g[p].index);
|
||||
|
||||
set<NFAVertex> sustain = findSustainSet_rev(g, p, add);
|
||||
DEBUG_PRINTF("sustain set is %zu\n", sustain.size());
|
||||
if (sustain.empty()) {
|
||||
DEBUG_PRINTF("yawn\n");
|
||||
}
|
||||
|
||||
for (auto pp : adjacent_vertices_range(p, g)) {
|
||||
/* we need to ensure something - see fwd ver */
|
||||
CharReach sustain_cr;
|
||||
for (auto pv : inv_adjacent_vertices_range(pp, g)) {
|
||||
if (contains(sustain, pv)) {
|
||||
sustain_cr |= g[pv].char_reach;
|
||||
}
|
||||
}
|
||||
if (!g[p].char_reach.isSubsetOf(sustain_cr)) {
|
||||
DEBUG_PRINTF("unable to establish that succs are thingy\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* the cr can be increased */
|
||||
g[v].char_reach = add;
|
||||
DEBUG_PRINTF("vertex %u was widened\n", g[v].index);
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool enlargeCyclicCR(NGHolder &g, som_type som,
|
||||
const vector<NFAVertex> &ordering) {
|
||||
DEBUG_PRINTF("hello\n");
|
||||
|
||||
vector<NFAVertex> candidates;
|
||||
findCandidates(g, ordering, &candidates);
|
||||
|
||||
bool rv = false;
|
||||
for (auto v : candidates) {
|
||||
rv |= enlargeCyclicVertex(g, som, v);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
bool enlargeCyclicCR_rev(NGHolder &g, const vector<NFAVertex> &ordering) {
|
||||
DEBUG_PRINTF("olleh\n");
|
||||
|
||||
vector<NFAVertex> candidates;
|
||||
findCandidates_rev(g, ordering, &candidates);
|
||||
|
||||
bool rv = false;
|
||||
for (auto v : candidates) {
|
||||
rv |= enlargeCyclicVertex_rev(g, v);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
bool improveGraph(NGHolder &g, som_type som) {
|
||||
/* use a topo ordering so that we can get chains of cyclic states
|
||||
* done in one sweep */
|
||||
|
||||
const vector<NFAVertex> ordering = getTopoOrdering(g);
|
||||
|
||||
return enlargeCyclicCR(g, som, ordering)
|
||||
| enlargeCyclicCR_rev(g, ordering);
|
||||
}
|
||||
|
||||
/** finds a smaller reachability for a state by the reverse transformation of
|
||||
* enlargeCyclicCR. */
|
||||
CharReach reduced_cr(NFAVertex v, const NGHolder &g,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
|
||||
DEBUG_PRINTF("find minimal cr for %u\n", g[v].index);
|
||||
CharReach v_cr = g[v].char_reach;
|
||||
if (proper_in_degree(v, g) != 1) {
|
||||
return v_cr;
|
||||
}
|
||||
|
||||
NFAVertex pred = getSoleSourceVertex(g, v);
|
||||
assert(pred);
|
||||
|
||||
/* require pred to be fed by one vertex OR (start + startDS) */
|
||||
NFAVertex predpred;
|
||||
size_t idp = in_degree(pred, g);
|
||||
if (hasSelfLoop(pred, g)) {
|
||||
return v_cr; /* not cliche */
|
||||
} else if (idp == 1) {
|
||||
predpred = getSoleSourceVertex(g, pred);
|
||||
} else if (idp == 2
|
||||
&& edge(g.start, pred, g).second
|
||||
&& edge(g.startDs, pred, g).second) {
|
||||
predpred = g.startDs;
|
||||
} else {
|
||||
return v_cr; /* not cliche */
|
||||
}
|
||||
|
||||
assert(predpred);
|
||||
|
||||
/* require predpred to be cyclic and its cr to be a superset of
|
||||
pred and v */
|
||||
if (!hasSelfLoop(predpred, g)) {
|
||||
return v_cr; /* not cliche */
|
||||
}
|
||||
|
||||
if (contains(br_cyclic, predpred)
|
||||
&& !br_cyclic.at(predpred).unbounded()) {
|
||||
return v_cr; /* fake cyclic */
|
||||
}
|
||||
|
||||
const CharReach &p_cr = g[pred].char_reach;
|
||||
const CharReach &pp_cr = g[predpred].char_reach;
|
||||
if (!v_cr.isSubsetOf(pp_cr) || !p_cr.isSubsetOf(pp_cr)) {
|
||||
return v_cr; /* not cliche */
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("confirming [x]* prop\n");
|
||||
/* we require all of v succs to be succ of p */
|
||||
set<NFAVertex> v_succ;
|
||||
insert(&v_succ, adjacent_vertices(v, g));
|
||||
set<NFAVertex> p_succ;
|
||||
insert(&p_succ, adjacent_vertices(pred, g));
|
||||
|
||||
if (!is_subset_of(v_succ, p_succ)) {
|
||||
DEBUG_PRINTF("fail\n");
|
||||
return v_cr; /* not cliche */
|
||||
}
|
||||
|
||||
if (contains(v_succ, g.accept) || contains(v_succ, g.acceptEod)) {
|
||||
/* need to check that reports of v are a subset of p's */
|
||||
if (!is_subset_of(g[v].reports,
|
||||
g[pred].reports)) {
|
||||
DEBUG_PRINTF("fail - reports not subset\n");
|
||||
return v_cr; /* not cliche */
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("woot success\n");
|
||||
v_cr &= ~p_cr;
|
||||
return v_cr;
|
||||
}
|
||||
|
||||
vector<CharReach> reduced_cr(const NGHolder &g,
|
||||
const map<NFAVertex, BoundedRepeatSummary> &br_cyclic) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
vector<CharReach> refined_cr(num_vertices(g), CharReach());
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 v_idx = g[v].index;
|
||||
refined_cr[v_idx] = reduced_cr(v, g, br_cyclic);
|
||||
}
|
||||
|
||||
return refined_cr;
|
||||
}
|
||||
|
||||
static
|
||||
bool anyOutSpecial(NFAVertex v, const NGHolder &g) {
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (is_special(w, g) && w != v) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool mergeCyclicDotStars(NGHolder &g) {
|
||||
set<NFAVertex> verticesToRemove;
|
||||
set<NFAEdge> edgesToRemove;
|
||||
|
||||
// avoid graphs where startDs is not a free spirit
|
||||
if (out_degree(g.startDs, g) > 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// check if any of the connected vertices are dots
|
||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
|
||||
// if this is a cyclic dot
|
||||
if (cr.all() && edge(v, v, g).second) {
|
||||
// prevent insane graphs
|
||||
if (anyOutSpecial(v, g)) {
|
||||
continue;
|
||||
}
|
||||
// we don't know if we're going to remove this vertex yet
|
||||
vector<NFAEdge> deadEdges;
|
||||
|
||||
// check if all adjacent vertices have edges from start
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
NFAVertex t = target(e, g);
|
||||
// skip self
|
||||
if (t == v) {
|
||||
continue;
|
||||
}
|
||||
// skip vertices that don't have edges from start
|
||||
if (!edge(g.start, t, g).second) {
|
||||
continue;
|
||||
}
|
||||
// add an edge from startDs to this vertex
|
||||
add_edge_if_not_present(g.startDs, t, g);
|
||||
|
||||
// mark this edge for removal
|
||||
deadEdges.push_back(e);
|
||||
}
|
||||
// if the number of edges to be removed equals out degree, vertex
|
||||
// needs to be removed; else, only remove the edges
|
||||
if (deadEdges.size() == proper_out_degree(v, g)) {
|
||||
verticesToRemove.insert(v);
|
||||
} else {
|
||||
edgesToRemove.insert(deadEdges.begin(), deadEdges.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (verticesToRemove.empty() && edgesToRemove.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("removing %zu edges and %zu vertices\n", edgesToRemove.size(),
|
||||
verticesToRemove.size());
|
||||
remove_edges(edgesToRemove, g);
|
||||
remove_vertices(verticesToRemove, g);
|
||||
/* some predecessors to the cyclic vertices may no longer be useful (no out
|
||||
* edges), so we can remove them */
|
||||
pruneUseless(g);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
77
src/nfagraph/ng_misc_opt.h
Normal file
77
src/nfagraph/ng_misc_opt.h
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Miscellaneous optimisations.
|
||||
*/
|
||||
|
||||
#ifndef NG_MISC_OPT_H
|
||||
#define NG_MISC_OPT_H
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "som/som.h"
|
||||
#include "util/depth.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Small structure describing the bounds on a repeat. */
|
||||
struct BoundedRepeatSummary {
|
||||
BoundedRepeatSummary(void) : repeatMin(0), repeatMax(depth::infinity()) {}
|
||||
BoundedRepeatSummary(const depth &min_in, const depth &max_in)
|
||||
: repeatMin(min_in), repeatMax(max_in) {
|
||||
assert(repeatMin <= repeatMax);
|
||||
assert(repeatMax.is_reachable());
|
||||
}
|
||||
bool unbounded(void) const { return repeatMax.is_infinite(); }
|
||||
|
||||
depth repeatMin; //!< minimum repeat bound.
|
||||
depth repeatMax; //!< maximum repeat bound.
|
||||
};
|
||||
|
||||
/* returns true if anything changed */
|
||||
bool improveGraph(NGHolder &g, som_type som);
|
||||
|
||||
/** Sometimes the reach of a vertex is greater than it needs to be to reduce
|
||||
* stop chars for the benefit of the rest of our code base (accel, sidecar,
|
||||
* etc). In these circumstances, we can treat the reach as the smaller one as
|
||||
* the graphs are equivalent. */
|
||||
CharReach reduced_cr(NFAVertex v, const NGHolder &g,
|
||||
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
|
||||
|
||||
std::vector<CharReach> reduced_cr(const NGHolder &g,
|
||||
const std::map<NFAVertex, BoundedRepeatSummary> &br_cyclic);
|
||||
|
||||
/** Remove cyclic stars connected to start */
|
||||
bool mergeCyclicDotStars(NGHolder &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
220
src/nfagraph/ng_netflow.cpp
Normal file
220
src/nfagraph/ng_netflow.cpp
Normal file
@@ -0,0 +1,220 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Network flow (min flow, max cut) algorithms.
|
||||
*/
|
||||
#include "ng_netflow.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_literal_analysis.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/graph/boykov_kolmogorov_max_flow.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::default_color_type;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void addReverseEdge(const NGHolder &g, vector<NFAEdge> &reverseEdge,
|
||||
NFAEdge fwd, NFAEdge rev) {
|
||||
u32 fwdIndex = g[fwd].index;
|
||||
u32 revIndex = g[rev].index;
|
||||
|
||||
// Make sure our vector is big enough.
|
||||
size_t sz = max(fwdIndex, revIndex) + 1;
|
||||
if (reverseEdge.size() < sz) {
|
||||
reverseEdge.resize(sz);
|
||||
}
|
||||
|
||||
// Add entries to list.
|
||||
reverseEdge[fwdIndex] = rev;
|
||||
reverseEdge[revIndex] = fwd;
|
||||
}
|
||||
|
||||
/** Add temporary reverse edges to the graph \p g, as they are required by the
|
||||
* BGL's boykov_kolmogorov_max_flow algorithm. */
|
||||
static
|
||||
void addReverseEdges(NGHolder &g, vector<NFAEdge> &reverseEdge,
|
||||
vector<u64a> &capacityMap) {
|
||||
// We're probably going to need space for 2x edge count.
|
||||
const size_t numEdges = num_edges(g);
|
||||
reverseEdge.reserve(numEdges * 2);
|
||||
capacityMap.reserve(numEdges * 2);
|
||||
|
||||
// To avoid walking the graph for _ages_, we build a temporary map of all
|
||||
// edges indexed by vertex pair for existence checks.
|
||||
map<pair<size_t, size_t>, NFAEdge> allEdges;
|
||||
for (const auto &e : edges_range(g)) {
|
||||
NFAVertex u = source(e, g), v = target(e, g);
|
||||
size_t uidx = g[u].index, vidx = g[v].index;
|
||||
allEdges[make_pair(uidx, vidx)] = e;
|
||||
}
|
||||
|
||||
// Now we walk over all edges and add their reverse edges to the reverseEdge
|
||||
// vector, also adding them to the graph when they don't already exist.
|
||||
for (const auto &m : allEdges) {
|
||||
const NFAEdge &fwd = m.second;
|
||||
const size_t uidx = m.first.first, vidx = m.first.second;
|
||||
|
||||
auto it = allEdges.find(make_pair(vidx, uidx));
|
||||
if (it == allEdges.end()) {
|
||||
// No reverse edge, add one.
|
||||
NFAVertex u = source(fwd, g), v = target(fwd, g);
|
||||
NFAEdge rev = add_edge(v, u, g).first;
|
||||
it = allEdges.insert(make_pair(make_pair(vidx, uidx), rev)).first;
|
||||
// Add to capacity map.
|
||||
u32 revIndex = g[rev].index;
|
||||
if (capacityMap.size() < revIndex + 1) {
|
||||
capacityMap.resize(revIndex + 1);
|
||||
}
|
||||
capacityMap[revIndex] = 0;
|
||||
}
|
||||
|
||||
addReverseEdge(g, reverseEdge, fwd, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
/** Remove all edges with indices >= \p idx. */
|
||||
static
|
||||
void removeEdgesFromIndex(NGHolder &g, vector<u64a> &capacityMap, u32 idx) {
|
||||
remove_edge_if([&](const NFAEdge &e) { return g[e].index >= idx; }, g);
|
||||
capacityMap.resize(idx);
|
||||
}
|
||||
|
||||
/** A wrapper around boykov_kolmogorov_max_flow, returns the max flow and
|
||||
* colour map (from which we can find the min cut). */
|
||||
static
|
||||
u64a getMaxFlow(NGHolder &h, const vector<u64a> &capacityMap_in,
|
||||
vector<default_color_type> &colorMap) {
|
||||
vector<u64a> capacityMap = capacityMap_in;
|
||||
NFAVertex src = h.start;
|
||||
NFAVertex sink = h.acceptEod;
|
||||
|
||||
// netflow relies on these stylised edges, as all starts should be covered
|
||||
// by our source and all accepts by our sink.
|
||||
assert(edge(h.start, h.startDs, h).second);
|
||||
assert(edge(h.accept, h.acceptEod, h).second);
|
||||
|
||||
// The boykov_kolmogorov_max_flow algorithm requires us to have reverse
|
||||
// edges for all edges in the graph, so we create them here (and remove
|
||||
// them after the call).
|
||||
const unsigned int numRealEdges = num_edges(h);
|
||||
vector<NFAEdge> reverseEdges;
|
||||
addReverseEdges(h, reverseEdges, capacityMap);
|
||||
|
||||
const unsigned int numTotalEdges = num_edges(h);
|
||||
const unsigned int numVertices = num_vertices(h);
|
||||
|
||||
vector<u64a> edgeResiduals(numTotalEdges);
|
||||
vector<NFAEdge> predecessors(numVertices);
|
||||
vector<s32> distances(numVertices);
|
||||
assert(colorMap.size() == numVertices);
|
||||
|
||||
const NFAGraph &g = h.g;
|
||||
auto v_index_map = get(&NFAGraphVertexProps::index, g);
|
||||
auto e_index_map = get(&NFAGraphEdgeProps::index, g);
|
||||
|
||||
u64a flow = boykov_kolmogorov_max_flow(g,
|
||||
make_iterator_property_map(capacityMap.begin(), e_index_map),
|
||||
make_iterator_property_map(edgeResiduals.begin(), e_index_map),
|
||||
make_iterator_property_map(reverseEdges.begin(), e_index_map),
|
||||
make_iterator_property_map(predecessors.begin(), v_index_map),
|
||||
make_iterator_property_map(colorMap.begin(), v_index_map),
|
||||
make_iterator_property_map(distances.begin(), v_index_map),
|
||||
v_index_map,
|
||||
src, sink);
|
||||
|
||||
// Remove reverse edges from graph.
|
||||
removeEdgesFromIndex(h, capacityMap, numRealEdges);
|
||||
assert(num_edges(h.g) == numRealEdges);
|
||||
|
||||
DEBUG_PRINTF("flow = %llu\n", flow);
|
||||
return flow;
|
||||
}
|
||||
|
||||
/** Returns a min cut (in \p cutset) for the graph in \p h. */
|
||||
vector<NFAEdge> findMinCut(NGHolder &h, const vector<u64a> &scores) {
|
||||
assert(hasCorrectlyNumberedEdges(h));
|
||||
assert(hasCorrectlyNumberedVertices(h));
|
||||
|
||||
vector<default_color_type> colorMap(num_vertices(h));
|
||||
u64a flow = getMaxFlow(h, scores, colorMap);
|
||||
|
||||
vector<NFAEdge> picked_white;
|
||||
vector<NFAEdge> picked_black;
|
||||
u64a observed_black_flow = 0;
|
||||
u64a observed_white_flow = 0;
|
||||
|
||||
for (const auto &e : edges_range(h)) {
|
||||
NFAVertex from = source(e, h);
|
||||
NFAVertex to = target(e, h);
|
||||
u64a ec = scores[h[e].index];
|
||||
if (ec == 0) {
|
||||
continue; // skips, among other things, reverse edges
|
||||
}
|
||||
|
||||
default_color_type fromColor = colorMap[h[from].index];
|
||||
default_color_type toColor = colorMap[h[to].index];
|
||||
|
||||
if (fromColor != boost::white_color && toColor == boost::white_color) {
|
||||
assert(ec <= INVALID_EDGE_CAP);
|
||||
DEBUG_PRINTF("found white cut edge %u->%u cap %llu\n",
|
||||
h[from].index, h[to].index, ec);
|
||||
observed_white_flow += ec;
|
||||
picked_white.push_back(e);
|
||||
}
|
||||
if (fromColor == boost::black_color && toColor != boost::black_color) {
|
||||
assert(ec <= INVALID_EDGE_CAP);
|
||||
DEBUG_PRINTF("found black cut edge %u->%u cap %llu\n",
|
||||
h[from].index, h[to].index, ec);
|
||||
observed_black_flow += ec;
|
||||
picked_black.push_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("min flow = %llu b flow = %llu w flow %llu\n", flow,
|
||||
observed_black_flow, observed_white_flow);
|
||||
if (MIN(observed_white_flow, observed_black_flow) != flow) {
|
||||
DEBUG_PRINTF("bad cut\n");
|
||||
}
|
||||
|
||||
if (observed_white_flow < observed_black_flow) {
|
||||
return picked_white;
|
||||
} else {
|
||||
return picked_black;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
49
src/nfagraph/ng_netflow.h
Normal file
49
src/nfagraph/ng_netflow.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Network flow (min flow, max cut) algorithms.
|
||||
*/
|
||||
#ifndef NG_NETFLOW_H
|
||||
#define NG_NETFLOW_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
/** Returns a min cut (in \p cutset) for the graph in \p h. */
|
||||
std::vector<NFAEdge> findMinCut(NGHolder &h, const std::vector<u64a> &scores);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
374
src/nfagraph/ng_prefilter.cpp
Normal file
374
src/nfagraph/ng_prefilter.cpp
Normal file
@@ -0,0 +1,374 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Prefilter Reductions.
|
||||
*
|
||||
* This file contains routines for reducing the size of an NFA graph that we
|
||||
* know will be used as a prefilter.
|
||||
*
|
||||
* The approach used is to consider the graph as a chain of region subgraphs,
|
||||
* and to reduce the size of the graph by replacing regions with constructs
|
||||
* that can be implemented in fewer states.
|
||||
*
|
||||
* Right now, the approach used is to replace a region with a bounded repeat of
|
||||
* vertices (with bounds derived from the min/max width of the region
|
||||
* subgraph). These vertices are given the union of the region's character
|
||||
* reachability.
|
||||
*
|
||||
* For regions with bounded max width, this strategy is quite dependent on the
|
||||
* LimEx NFA's bounded repeat functionality.
|
||||
*/
|
||||
#include "ng_prefilter.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_region.h"
|
||||
#include "ng_util.h"
|
||||
#include "ng_width.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::adaptors::map_values;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Keep attempting to reduce the size of the graph until the number of
|
||||
* vertices falls below this value. */
|
||||
static const size_t MAX_COMPONENT_VERTICES = 128;
|
||||
|
||||
/** Only replace a region with at least this many vertices. */
|
||||
static const size_t MIN_REPLACE_VERTICES = 2;
|
||||
|
||||
/** Estimate of how many vertices are required to represent a bounded repeat in
|
||||
* the implementation NFA. */
|
||||
static const size_t BOUNDED_REPEAT_COUNT = 4;
|
||||
|
||||
/** Scoring penalty for boundary regions. */
|
||||
static const size_t PENALTY_BOUNDARY = 32;
|
||||
|
||||
namespace {
|
||||
|
||||
/** Information describing a region. */
|
||||
struct RegionInfo {
|
||||
explicit RegionInfo(u32 id_in) : id(id_in) {}
|
||||
u32 id; //!< region id
|
||||
deque<NFAVertex> vertices; //!< vertices in the region
|
||||
CharReach reach; //!< union of region reach
|
||||
depth minWidth = 0; //!< min width of region subgraph
|
||||
depth maxWidth = depth::infinity(); //!< max width of region subgraph
|
||||
bool atBoundary = false; //!< region is next to an accept
|
||||
|
||||
// Bigger score is better.
|
||||
size_t score() const {
|
||||
// FIXME: charreach should be a signal?
|
||||
size_t numVertices = vertices.size();
|
||||
if (atBoundary) {
|
||||
return numVertices - min(PENALTY_BOUNDARY, numVertices);
|
||||
} else {
|
||||
return numVertices;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/** Comparator used to order regions for consideration in a priority queue. */
|
||||
struct RegionInfoQueueComp {
|
||||
bool operator()(const RegionInfo &r1, const RegionInfo &r2) const {
|
||||
size_t score1 = r1.score(), score2 = r2.score();
|
||||
if (score1 != score2) {
|
||||
return score1 < score2;
|
||||
}
|
||||
if (r1.reach.count() != r2.reach.count()) {
|
||||
return r1.reach.count() < r2.reach.count();
|
||||
}
|
||||
return r1.id < r2.id;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
void findWidths(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
RegionInfo &ri) {
|
||||
NGHolder rg;
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> mapping;
|
||||
fillHolder(&rg, g, ri.vertices, &mapping);
|
||||
|
||||
// Wire our entries to start and our exits to accept.
|
||||
for (auto v : ri.vertices) {
|
||||
NFAVertex v_new = mapping[v];
|
||||
assert(v_new != NFAGraph::null_vertex());
|
||||
|
||||
if (isRegionEntry(g, v, region_map) &&
|
||||
!edge(rg.start, v_new, rg).second) {
|
||||
add_edge(rg.start, v_new, rg);
|
||||
}
|
||||
if (isRegionExit(g, v, region_map) &&
|
||||
!edge(v_new, rg.accept, rg).second) {
|
||||
add_edge(v_new, rg.accept, rg);
|
||||
}
|
||||
}
|
||||
|
||||
ri.minWidth = findMinWidth(rg);
|
||||
ri.maxWidth = findMaxWidth(rg);
|
||||
}
|
||||
|
||||
// acc can be either h.accept or h.acceptEod.
|
||||
static
|
||||
void markBoundaryRegions(const NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
map<u32, RegionInfo> ®ions, NFAVertex acc) {
|
||||
for (auto v : inv_adjacent_vertices_range(acc, h)) {
|
||||
if (is_special(v, h)) {
|
||||
continue;
|
||||
}
|
||||
u32 id = region_map.at(v);
|
||||
|
||||
map<u32, RegionInfo>::iterator ri = regions.find(id);
|
||||
if (ri == regions.end()) {
|
||||
continue; // Not tracking this region as it's too small.
|
||||
}
|
||||
|
||||
ri->second.atBoundary = true;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
map<u32, RegionInfo> findRegionInfo(const NGHolder &h,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
map<u32, RegionInfo> regions;
|
||||
for (auto v : vertices_range(h)) {
|
||||
if (is_special(v, h)) {
|
||||
continue;
|
||||
}
|
||||
u32 id = region_map.at(v);
|
||||
RegionInfo &ri = regions.insert(
|
||||
make_pair(id, RegionInfo(id))).first->second;
|
||||
ri.vertices.push_back(v);
|
||||
ri.reach |= h[v].char_reach;
|
||||
}
|
||||
|
||||
// There's no point tracking more information about regions that we won't
|
||||
// consider replacing, so we remove them from the region map.
|
||||
for (map<u32, RegionInfo>::iterator it = regions.begin();
|
||||
it != regions.end();) {
|
||||
if (it->second.vertices.size() < MIN_REPLACE_VERTICES) {
|
||||
regions.erase(it++);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%zu regions\n", regions.size());
|
||||
|
||||
markBoundaryRegions(h, region_map, regions, h.accept);
|
||||
markBoundaryRegions(h, region_map, regions, h.acceptEod);
|
||||
|
||||
// Determine min/max widths.
|
||||
for (RegionInfo &ri : regions | map_values) {
|
||||
findWidths(h, region_map, ri);
|
||||
DEBUG_PRINTF("region %u %shas widths [%s,%s]\n", ri.id,
|
||||
ri.atBoundary ? "(boundary) " : "",
|
||||
ri.minWidth.str().c_str(), ri.maxWidth.str().c_str());
|
||||
}
|
||||
|
||||
return regions;
|
||||
}
|
||||
|
||||
static
|
||||
void copyInEdges(NGHolder &g, NFAVertex from, NFAVertex to,
|
||||
const ue2::unordered_set<NFAVertex> &rverts) {
|
||||
for (const auto &e : in_edges_range(from, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
if (contains(rverts, u)) {
|
||||
continue;
|
||||
}
|
||||
if (edge(u, to, g).second) {
|
||||
continue;
|
||||
}
|
||||
|
||||
add_edge(u, to, g[e], g);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void copyOutEdges(NGHolder &g, NFAVertex from, NFAVertex to,
|
||||
const ue2::unordered_set<NFAVertex> &rverts) {
|
||||
for (const auto &e : out_edges_range(from, g)) {
|
||||
NFAVertex t = target(e, g);
|
||||
if (contains(rverts, t)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
add_edge_if_not_present(to, t, g[e], g);
|
||||
|
||||
if (is_any_accept(t, g)) {
|
||||
const auto &reports = g[from].reports;
|
||||
g[to].reports.insert(reports.begin(), reports.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void replaceRegion(NGHolder &g, const RegionInfo &ri,
|
||||
size_t *verticesAdded, size_t *verticesRemoved) {
|
||||
// TODO: more complex replacements.
|
||||
assert(ri.vertices.size() >= MIN_REPLACE_VERTICES);
|
||||
assert(ri.minWidth.is_finite());
|
||||
|
||||
size_t replacementSize;
|
||||
if (ri.minWidth == ri.maxWidth || ri.maxWidth.is_infinite()) {
|
||||
replacementSize = ri.minWidth; // {N} or {N,}
|
||||
} else {
|
||||
replacementSize = ri.maxWidth; // {N,M} case
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("orig size %zu, replace size %zu\n", ri.vertices.size(),
|
||||
replacementSize);
|
||||
|
||||
deque<NFAVertex> verts;
|
||||
for (size_t i = 0; i < replacementSize; i++) {
|
||||
NFAVertex v = add_vertex(g);
|
||||
g[v].char_reach = ri.reach;
|
||||
if (i > 0) {
|
||||
add_edge(verts.back(), v, g);
|
||||
}
|
||||
verts.push_back(v);
|
||||
}
|
||||
|
||||
if (ri.maxWidth.is_infinite()) {
|
||||
add_edge(verts.back(), verts.back(), g);
|
||||
}
|
||||
|
||||
// Set of vertices in region, for quick lookups.
|
||||
const ue2::unordered_set<NFAVertex> rverts(ri.vertices.begin(),
|
||||
ri.vertices.end());
|
||||
|
||||
for (size_t i = 0; i < replacementSize; i++) {
|
||||
NFAVertex v_new = verts[i];
|
||||
|
||||
for (auto v_old : ri.vertices) {
|
||||
if (i == 0) {
|
||||
copyInEdges(g, v_old, v_new, rverts);
|
||||
}
|
||||
if (i + 1 >= ri.minWidth) {
|
||||
copyOutEdges(g, v_old, v_new, rverts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
remove_vertices(ri.vertices, g, false);
|
||||
|
||||
*verticesAdded = verts.size();
|
||||
*verticesRemoved = ri.vertices.size();
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct SourceHasEdgeToAccept {
|
||||
explicit SourceHasEdgeToAccept(const NGHolder &g_in) : g(g_in) {}
|
||||
bool operator()(const NFAEdge &e) const {
|
||||
return edge(source(e, g), g.accept, g).second;
|
||||
}
|
||||
const NGHolder &g;
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
void reduceRegions(NGHolder &h) {
|
||||
map<u32, RegionInfo> regions = findRegionInfo(h, assignRegions(h));
|
||||
|
||||
RegionInfoQueueComp cmp;
|
||||
priority_queue<RegionInfo, deque<RegionInfo>, RegionInfoQueueComp> pq(cmp);
|
||||
|
||||
size_t numVertices = 0;
|
||||
for (const RegionInfo &ri : regions | map_values) {
|
||||
numVertices += ri.vertices.size();
|
||||
pq.push(ri);
|
||||
}
|
||||
|
||||
while (numVertices > MAX_COMPONENT_VERTICES && !pq.empty()) {
|
||||
const RegionInfo &ri = pq.top();
|
||||
DEBUG_PRINTF("region %u: vertices=%zu reach=%s score=%zu, "
|
||||
"widths=[%s,%s]\n",
|
||||
ri.id, ri.vertices.size(), describeClass(ri.reach).c_str(),
|
||||
ri.score(), ri.minWidth.str().c_str(),
|
||||
ri.maxWidth.str().c_str());
|
||||
|
||||
size_t verticesAdded = 0;
|
||||
size_t verticesRemoved = 0;
|
||||
replaceRegion(h, ri, &verticesAdded, &verticesRemoved);
|
||||
DEBUG_PRINTF("%zu vertices removed, %zu vertices added\n",
|
||||
verticesRemoved, verticesAdded);
|
||||
|
||||
// We are trusting that implementation NFAs will be able to use the
|
||||
// LimEx bounded repeat code here.
|
||||
numVertices -= verticesRemoved;
|
||||
numVertices += BOUNDED_REPEAT_COUNT;
|
||||
|
||||
DEBUG_PRINTF("numVertices is now %zu\n", numVertices);
|
||||
pq.pop();
|
||||
}
|
||||
|
||||
// We may have vertices that have edges to both accept and acceptEod: in
|
||||
// this case, we can optimize for performance by removing the acceptEod
|
||||
// edges.
|
||||
remove_in_edge_if(h.acceptEod, SourceHasEdgeToAccept(h), h.g);
|
||||
}
|
||||
|
||||
void prefilterReductions(NGHolder &h, const CompileContext &cc) {
|
||||
if (!cc.grey.prefilterReductions) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (num_vertices(h) <= MAX_COMPONENT_VERTICES) {
|
||||
DEBUG_PRINTF("graph is already small enough (%zu vertices)\n",
|
||||
num_vertices(h));
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("graph with %zu vertices\n", num_vertices(h));
|
||||
|
||||
h.renumberVertices();
|
||||
h.renumberEdges();
|
||||
|
||||
reduceRegions(h);
|
||||
|
||||
h.renumberVertices();
|
||||
h.renumberEdges();
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
45
src/nfagraph/ng_prefilter.h
Normal file
45
src/nfagraph/ng_prefilter.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Prefilter Reductions.
|
||||
*/
|
||||
|
||||
#ifndef NG_PREFILTER_H
|
||||
#define NG_PREFILTER_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
struct CompileContext;
|
||||
|
||||
void prefilterReductions(NGHolder &h, const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
438
src/nfagraph/ng_prune.cpp
Normal file
438
src/nfagraph/ng_prune.cpp
Normal file
@@ -0,0 +1,438 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Functions for pruning unreachable vertices or reports from the graph.
|
||||
*/
|
||||
#include "ng_prune.h"
|
||||
|
||||
#include "ng_dominators.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/report_manager.h"
|
||||
|
||||
#include <deque>
|
||||
#include <map>
|
||||
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::default_color_type;
|
||||
using boost::reverse_graph;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Remove any vertices that can't be reached by traversing the graph in
|
||||
* reverse from acceptEod. */
|
||||
void pruneUnreachable(NGHolder &g) {
|
||||
deque<NFAVertex> dead;
|
||||
|
||||
if (!hasGreaterInDegree(1, g.acceptEod, g) &&
|
||||
!hasGreaterInDegree(0, g.accept, g) &&
|
||||
edge(g.accept, g.acceptEod, g).second) {
|
||||
// Trivial case: there are no in-edges to our accepts (other than
|
||||
// accept->acceptEod), so all non-specials are unreachable.
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (!is_special(v, g)) {
|
||||
dead.push_back(v);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Walk a reverse graph from acceptEod with Boost's depth_first_visit
|
||||
// call.
|
||||
typedef reverse_graph<NFAGraph, NFAGraph&> RevNFAGraph;
|
||||
RevNFAGraph revg(g.g);
|
||||
|
||||
map<NFAVertex, default_color_type> colours;
|
||||
|
||||
depth_first_visit(revg, g.acceptEod,
|
||||
make_dfs_visitor(boost::null_visitor()),
|
||||
make_assoc_property_map(colours));
|
||||
|
||||
DEBUG_PRINTF("color map has %zu entries after DFV\n", colours.size());
|
||||
|
||||
// All non-special vertices that aren't in the colour map (because they
|
||||
// weren't reached) can be removed.
|
||||
for (auto v : vertices_range(revg)) {
|
||||
if (is_special(v, revg)) {
|
||||
continue;
|
||||
}
|
||||
if (!contains(colours, v)) {
|
||||
dead.push_back(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
DEBUG_PRINTF("no unreachable vertices\n");
|
||||
return;
|
||||
}
|
||||
|
||||
remove_vertices(dead, g, false);
|
||||
DEBUG_PRINTF("removed %zu unreachable vertices\n", dead.size());
|
||||
}
|
||||
|
||||
template<class nfag_t>
|
||||
static
|
||||
bool pruneForwardUseless(NGHolder &h, const nfag_t &g, NFAVertex s,
|
||||
vector<default_color_type> &vertexColor) {
|
||||
// Begin with all vertices set to white, as DFV only marks visited
|
||||
// vertices.
|
||||
fill(vertexColor.begin(), vertexColor.end(), boost::white_color);
|
||||
|
||||
auto index_map = get(&NFAGraphVertexProps::index, g);
|
||||
|
||||
depth_first_visit(g, s, make_dfs_visitor(boost::null_visitor()),
|
||||
make_iterator_property_map(vertexColor.begin(),
|
||||
index_map));
|
||||
|
||||
vector<NFAVertex> dead;
|
||||
|
||||
// All non-special vertices that are still white can be removed.
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 idx = g[v].index;
|
||||
if (!is_special(v, g) && vertexColor[idx] == boost::white_color) {
|
||||
DEBUG_PRINTF("vertex %u is unreachable from %u\n",
|
||||
g[v].index, g[s].index);
|
||||
dead.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("removing %zu vertices\n", dead.size());
|
||||
remove_vertices(dead, h, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Remove any vertices which can't be reached by traversing the graph forward
|
||||
* from start or in reverse from acceptEod. If \p renumber is false, no
|
||||
* vertex/edge renumbering is done. */
|
||||
void pruneUseless(NGHolder &g, bool renumber) {
|
||||
DEBUG_PRINTF("pruning useless vertices\n");
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
vector<default_color_type> vertexColor(num_vertices(g));
|
||||
|
||||
bool work_done = pruneForwardUseless(g, g.g, g.start, vertexColor);
|
||||
work_done |= pruneForwardUseless(
|
||||
g, reverse_graph<NFAGraph, NFAGraph &>(g.g), g.acceptEod, vertexColor);
|
||||
|
||||
if (!work_done) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (renumber) {
|
||||
g.renumberEdges();
|
||||
g.renumberVertices();
|
||||
}
|
||||
}
|
||||
|
||||
/** This code removes any vertices which do not accept any symbols. Any
|
||||
* vertices which no longer lie on a path from a start to an accept are also
|
||||
* pruned. */
|
||||
void pruneEmptyVertices(NGHolder &g) {
|
||||
DEBUG_PRINTF("pruning empty vertices\n");
|
||||
vector<NFAVertex> dead;
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
if (cr.none()) {
|
||||
DEBUG_PRINTF("empty: %u\n", g[v].index);
|
||||
dead.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
remove_vertices(dead, g);
|
||||
pruneUseless(g);
|
||||
}
|
||||
|
||||
/** Remove any edges from vertices that generate accepts (for Highlander
|
||||
* graphs). */
|
||||
void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm) {
|
||||
// Safety check: all reports must be simple exhaustible reports, or this is
|
||||
// not safe. This optimisation should be called early enough that no
|
||||
// internal reports have been added.
|
||||
for (auto report_id : all_reports(g)) {
|
||||
const Report &ir = rm.getReport(report_id);
|
||||
|
||||
if (ir.ekey == INVALID_EKEY || ir.hasBounds() ||
|
||||
!isExternalReport(ir)) {
|
||||
DEBUG_PRINTF("report %u is not external highlander with "
|
||||
"no bounds\n", report_id);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
vector<NFAEdge> dead;
|
||||
for (auto u : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
if (is_special(u, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// We can prune any out-edges that aren't accepts
|
||||
for (const auto &e : out_edges_range(u, g)) {
|
||||
if (!is_any_accept(target(e, g), g)) {
|
||||
dead.push_back(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("found %zu removable edges due to single match\n", dead.size());
|
||||
remove_edges(dead, g);
|
||||
pruneUseless(g);
|
||||
}
|
||||
|
||||
static
|
||||
bool isDominatedByReporter(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, NFAVertex> &dom,
|
||||
NFAVertex v, ReportID report_id) {
|
||||
for (auto it = dom.find(v); it != end(dom); it = dom.find(v)) {
|
||||
NFAVertex u = it->second;
|
||||
// Note: reporters with edges only to acceptEod are not considered to
|
||||
// dominate.
|
||||
if (edge(u, g.accept, g).second && contains(g[u].reports, report_id)) {
|
||||
DEBUG_PRINTF("%u is dominated by %u, and both report %u\n",
|
||||
g[v].index, g[u].index, report_id);
|
||||
return true;
|
||||
}
|
||||
v = u;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* True if the vertex has (a) a self-loop, (b) only out-edges to accept and
|
||||
* itself and (c) only simple exhaustible reports.
|
||||
*/
|
||||
static
|
||||
bool hasOnlySelfLoopAndExhaustibleAccepts(const NGHolder &g,
|
||||
const ReportManager &rm,
|
||||
NFAVertex v) {
|
||||
if (!edge(v, v, g).second) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w != v && w != g.accept) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &report_id : g[v].reports) {
|
||||
if (!isSimpleExhaustible(rm.getReport(report_id))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm) {
|
||||
vector<NFAVertex> reporters;
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
for (const auto &report_id : g[v].reports) {
|
||||
const Report &r = rm.getReport(report_id);
|
||||
if (isSimpleExhaustible(r)) {
|
||||
reporters.push_back(v);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
for (const auto &report_id : g[v].reports) {
|
||||
const Report &r = rm.getReport(report_id);
|
||||
if (isSimpleExhaustible(r)) {
|
||||
reporters.push_back(v);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reporters.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
sort(begin(reporters), end(reporters), make_index_ordering(g));
|
||||
reporters.erase(unique(begin(reporters), end(reporters)), end(reporters));
|
||||
|
||||
DEBUG_PRINTF("%zu vertices have simple exhaustible reports\n",
|
||||
reporters.size());
|
||||
|
||||
const auto &dom = findDominators(g);
|
||||
bool modified = false;
|
||||
|
||||
// If a reporter vertex is dominated by another with the same report, we
|
||||
// can remove that report; if all reports are removed, we can remove the
|
||||
// vertex entirely.
|
||||
for (const auto v : reporters) {
|
||||
const auto reports = g[v].reports; // copy, as we're going to mutate
|
||||
for (const auto &report_id : reports) {
|
||||
if (!isSimpleExhaustible(rm.getReport(report_id))) {
|
||||
continue;
|
||||
}
|
||||
if (isDominatedByReporter(g, dom, v, report_id)) {
|
||||
DEBUG_PRINTF("removed dominated report %u from vertex %u\n",
|
||||
report_id, g[v].index);
|
||||
g[v].reports.erase(report_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (g[v].reports.empty()) {
|
||||
DEBUG_PRINTF("removed edges to accepts from %u, no reports left\n",
|
||||
g[v].index);
|
||||
remove_edge(v, g.accept, g);
|
||||
remove_edge(v, g.acceptEod, g);
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If a reporter vertex has a self-loop, but otherwise only leads to accept
|
||||
// (note: NOT acceptEod) and has simple exhaustible reports, we can delete
|
||||
// the self-loop.
|
||||
for (const auto v : reporters) {
|
||||
if (hasOnlySelfLoopAndExhaustibleAccepts(g, rm, v)) {
|
||||
remove_edge(v, v, g);
|
||||
modified = true;
|
||||
DEBUG_PRINTF("removed self-loop on %u\n", g[v].index);
|
||||
}
|
||||
}
|
||||
|
||||
if (!modified) {
|
||||
return;
|
||||
}
|
||||
|
||||
pruneUseless(g);
|
||||
|
||||
// We may have only removed self-loops, in which case pruneUseless wouldn't
|
||||
// renumber, so we do edge renumbering explicitly here.
|
||||
g.renumberEdges();
|
||||
}
|
||||
|
||||
/** Removes the given Report ID from vertices connected to accept, and then
|
||||
* prunes useless vertices that have had their report sets reduced to empty. */
|
||||
void pruneReport(NGHolder &g, ReportID report) {
|
||||
set<NFAEdge> dead;
|
||||
|
||||
for (const auto &e : in_edges_range(g.accept, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
auto &reports = g[u].reports;
|
||||
if (contains(reports, report)) {
|
||||
reports.erase(report);
|
||||
if (reports.empty()) {
|
||||
dead.insert(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &e : in_edges_range(g.acceptEod, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
if (u == g.accept) {
|
||||
continue;
|
||||
}
|
||||
auto &reports = g[u].reports;
|
||||
if (contains(reports, report)) {
|
||||
reports.erase(report);
|
||||
if (reports.empty()) {
|
||||
dead.insert(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
remove_edges(dead, g);
|
||||
pruneUnreachable(g);
|
||||
g.renumberVertices();
|
||||
g.renumberEdges();
|
||||
}
|
||||
|
||||
/** Removes all Report IDs bar the given one from vertices connected to accept,
|
||||
* and then prunes useless vertices that have had their report sets reduced to
|
||||
* empty. */
|
||||
void pruneAllOtherReports(NGHolder &g, ReportID report) {
|
||||
set<NFAEdge> dead;
|
||||
|
||||
for (const auto &e : in_edges_range(g.accept, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
auto &reports = g[u].reports;
|
||||
if (contains(reports, report)) {
|
||||
reports.clear();
|
||||
reports.insert(report);
|
||||
} else {
|
||||
reports.clear();
|
||||
dead.insert(e);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &e : in_edges_range(g.acceptEod, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
if (u == g.accept) {
|
||||
continue;
|
||||
}
|
||||
auto &reports = g[u].reports;
|
||||
if (contains(reports, report)) {
|
||||
reports.clear();
|
||||
reports.insert(report);
|
||||
} else {
|
||||
reports.clear();
|
||||
dead.insert(e);
|
||||
}
|
||||
}
|
||||
|
||||
if (dead.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
remove_edges(dead, g);
|
||||
pruneUnreachable(g);
|
||||
g.renumberVertices();
|
||||
g.renumberEdges();
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
75
src/nfagraph/ng_prune.h
Normal file
75
src/nfagraph/ng_prune.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Functions for pruning unreachable vertices or reports from the graph.
|
||||
*/
|
||||
|
||||
#ifndef NG_PRUNE_H
|
||||
#define NG_PRUNE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
/** Remove any vertices that can't be reached by traversing the graph in
|
||||
* reverse from acceptEod. */
|
||||
void pruneUnreachable(NGHolder &g);
|
||||
|
||||
/** Remove any vertices which can't be reached by traversing the graph forward
|
||||
* from start or in reverse from acceptEod. If \p renumber is false, no
|
||||
* vertex/edge renumbering is done. */
|
||||
void pruneUseless(NGHolder &g, bool renumber = true);
|
||||
|
||||
/** Remove any vertices with empty reachability. */
|
||||
void pruneEmptyVertices(NGHolder &g);
|
||||
|
||||
/** Remove any edges from vertices that generate accepts (for Highlander
|
||||
* graphs). */
|
||||
void pruneHighlanderAccepts(NGHolder &g, const ReportManager &rm);
|
||||
|
||||
/**
|
||||
* Prune highlander reports that are dominated by earlier ones in the graph.
|
||||
*/
|
||||
void pruneHighlanderDominated(NGHolder &g, const ReportManager &rm);
|
||||
|
||||
/** Removes the given Report ID from vertices connected to accept, and then
|
||||
* prunes useless vertices that have had their report sets reduced to empty. */
|
||||
void pruneReport(NGHolder &g, ReportID report);
|
||||
|
||||
/** Removes all Report IDs bar the given one from vertices connected to accept,
|
||||
* and then prunes useless vertices that have had their report sets reduced to
|
||||
* empty. */
|
||||
void pruneAllOtherReports(NGHolder &g, ReportID report);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_PRUNE_H
|
||||
578
src/nfagraph/ng_puff.cpp
Normal file
578
src/nfagraph/ng_puff.cpp
Normal file
@@ -0,0 +1,578 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Puff construction from NGHolder.
|
||||
*/
|
||||
#include "ng_puff.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_depth.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_repeat.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/nfa_api_queue.h"
|
||||
#include "nfa/mpvcompile.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/report_manager.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static const unsigned MIN_PUFF_LENGTH = 16;
|
||||
static const unsigned HEAD_BACKOFF = 16;
|
||||
|
||||
static
|
||||
size_t countChain(const NGHolder &g, NFAVertex v) {
|
||||
size_t count = 0;
|
||||
while (v) {
|
||||
DEBUG_PRINTF("counting vertex %u\n", g[v].index);
|
||||
if (is_special(v, g)) {
|
||||
break;
|
||||
}
|
||||
|
||||
count++;
|
||||
v = getSoleDestVertex(g, v);
|
||||
}
|
||||
DEBUG_PRINTF("done %zu\n", count);
|
||||
return count;
|
||||
}
|
||||
|
||||
static
|
||||
void wireNewAccepts(NGHolder &g, NFAVertex head,
|
||||
const flat_set<ReportID> &chain_reports) {
|
||||
for (auto u : inv_adjacent_vertices_range(head, g)) {
|
||||
if (is_special(u, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("adding edge: %u -> accept\n", g[u].index);
|
||||
assert(!edge(u, g.accept, g).second);
|
||||
assert(!edge(u, g.acceptEod, g).second);
|
||||
add_edge(u, g.accept, g);
|
||||
|
||||
// Replace reports with our chain reports.
|
||||
auto &u_reports = g[u].reports;
|
||||
u_reports.clear();
|
||||
u_reports.insert(chain_reports.begin(), chain_reports.end());
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool isFixedDepth(const NGHolder &g, NFAVertex v) {
|
||||
// If the vertex is reachable from startDs, it can't be fixed depth.
|
||||
vector<DepthMinMax> depthFromStartDs;
|
||||
calcDepthsFrom(g, g.startDs, depthFromStartDs);
|
||||
|
||||
u32 idx = g[v].index;
|
||||
const DepthMinMax &ds = depthFromStartDs.at(idx);
|
||||
if (ds.min.is_reachable()) {
|
||||
DEBUG_PRINTF("vertex reachable from startDs\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<DepthMinMax> depthFromStart;
|
||||
calcDepthsFrom(g, g.start, depthFromStart);
|
||||
|
||||
/* we can still consider the head of a puff chain as at fixed depth if
|
||||
* it has a self-loop: so we look at all the preds of v (other than v
|
||||
* itself) */
|
||||
|
||||
assert(v && !is_special(v, g));
|
||||
|
||||
u32 count = 0;
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == v) {
|
||||
continue; // self-loop
|
||||
}
|
||||
count++;
|
||||
|
||||
idx = g[u].index;
|
||||
const DepthMinMax &d = depthFromStart.at(idx);
|
||||
if (d.min != d.max) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return count != 0; // at least one fixed-depth pred
|
||||
}
|
||||
|
||||
static
|
||||
bool singleStart(const NGHolder &g) {
|
||||
set<NFAVertex> seen;
|
||||
|
||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||
if (!is_special(v, g)) {
|
||||
DEBUG_PRINTF("saw %u\n", g[v].index);
|
||||
seen.insert(v);
|
||||
}
|
||||
}
|
||||
for (auto v : adjacent_vertices_range(g.startDs, g)) {
|
||||
if (!is_special(v, g)) {
|
||||
DEBUG_PRINTF("saw %u\n", g[v].index);
|
||||
seen.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("comp has %zu starts\n", seen.size());
|
||||
|
||||
return seen.size() == 1;
|
||||
}
|
||||
|
||||
static
|
||||
bool triggerResetsPuff(const NGHolder &g, NFAVertex head) {
|
||||
const CharReach puff_escapes = ~g[head].char_reach;
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(head, g)) {
|
||||
if (!g[u].char_reach.isSubsetOf(puff_escapes)) {
|
||||
DEBUG_PRINTF("no reset on trigger %u %u\n", g[u].index,
|
||||
g[head].index);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("reset on trigger\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
/** ".*[X]{N}" can be treated as ".*[X]{N,}" (misc_opt does reverse transform)
|
||||
* */
|
||||
static
|
||||
bool triggerFloodsPuff(const NGHolder &g, NFAVertex head) {
|
||||
DEBUG_PRINTF("head = %u\n", g[head].index);
|
||||
|
||||
const CharReach &puff_cr = g[head].char_reach;
|
||||
|
||||
/* we can use the pred of the head as the base of our check if it the cr
|
||||
* matches as if
|
||||
* head cr subsetof pred cr: if head is being pushed on then puff must
|
||||
* still being pushed on
|
||||
* pred cr subsetof head cr: if the puff matches then head must be also
|
||||
* always be on if the is connected to a wide enough cyclic
|
||||
*/
|
||||
if (proper_in_degree(head, g) == 1
|
||||
&& puff_cr == g[getSoleSourceVertex(g, head)].char_reach) {
|
||||
head = getSoleSourceVertex(g, head);
|
||||
DEBUG_PRINTF("temp new head = %u\n", g[head].index);
|
||||
}
|
||||
|
||||
for (auto s : inv_adjacent_vertices_range(head, g)) {
|
||||
DEBUG_PRINTF("s = %u\n", g[s].index);
|
||||
if (!puff_cr.isSubsetOf(g[s].char_reach)) {
|
||||
DEBUG_PRINTF("no flood on trigger %u %u\n",
|
||||
g[s].index, g[head].index);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!hasSelfLoop(s, g) && s != g.start) {
|
||||
DEBUG_PRINTF("no self loop\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (s == g.start && !edge(g.startDs, head, g).second) {
|
||||
DEBUG_PRINTF("not float\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("reset on trigger\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
u32 allowedSquashDistance(const CharReach &cr, u32 min_width, const NGHolder &g,
|
||||
NFAVertex pv, bool prefilter) {
|
||||
CharReach accept_cr;
|
||||
DEBUG_PRINTF("hello |cr|=%zu %d\n", cr.count(), (int)cr.find_first());
|
||||
|
||||
if (prefilter) {
|
||||
/* a later prefilter stage make weaken the lead up so we can't be sure
|
||||
* that all the triggers will be squashing the puffette. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* TODO: inspect further back in the pattern */
|
||||
for (auto u : inv_adjacent_vertices_range(pv, g)) {
|
||||
accept_cr |= g[u].char_reach;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
|
||||
|
||||
if ((accept_cr & cr).any()) {
|
||||
return 0; /* the accept byte doesn't always kill the puffette. TODO:
|
||||
* maybe if we look further back we could find something that
|
||||
* would kill the puffette... */
|
||||
}
|
||||
DEBUG_PRINTF("returning squash distance of %u\n", min_width);
|
||||
return min_width;
|
||||
}
|
||||
|
||||
/** Gives a stronger puff trigger when the trigger is connected to a wide
|
||||
* cyclic state (aside from sds) */
|
||||
static
|
||||
void improveHead(NGHolder &g, NFAVertex *a, vector<NFAVertex> *nodes) {
|
||||
DEBUG_PRINTF("attempting to improve puff trigger\n");
|
||||
assert(!nodes->empty());
|
||||
const CharReach &puff_cr = g[nodes->back()].char_reach;
|
||||
if (puff_cr.all()) {
|
||||
return; /* we can't really do much with this one */
|
||||
}
|
||||
|
||||
/* add the runway */
|
||||
DEBUG_PRINTF("backing off - allowing a decent header\n");
|
||||
assert(nodes->size() > HEAD_BACKOFF);
|
||||
for (u32 i = 0; i < HEAD_BACKOFF - 1; i++) {
|
||||
nodes->pop_back();
|
||||
}
|
||||
*a = nodes->back();
|
||||
nodes->pop_back();
|
||||
}
|
||||
|
||||
static
|
||||
void constructPuff(NGHolder &g, const NFAVertex a, const NFAVertex puffv,
|
||||
const CharReach &cr, const ReportID report, u32 width,
|
||||
bool fixed_depth, bool unbounded, bool auto_restart,
|
||||
RoseBuild &rose, ReportManager &rm,
|
||||
flat_set<ReportID> &chain_reports, bool prefilter) {
|
||||
DEBUG_PRINTF("constructing Puff for report %u\n", report);
|
||||
DEBUG_PRINTF("a = %u\n", g[a].index);
|
||||
|
||||
const bool pureAnchored = a == g.start && singleStart(g);
|
||||
if (!pureAnchored) {
|
||||
if (a == g.startDs || a == g.start) {
|
||||
DEBUG_PRINTF("add outfix ar(false)\n");
|
||||
|
||||
raw_puff rp(width, unbounded, report, cr, auto_restart);
|
||||
rose.addOutfix(rp);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("add chain tail\n");
|
||||
u32 qi = ~0U;
|
||||
u32 event = MQE_TOP;
|
||||
raw_puff rp(width, unbounded, report, cr);
|
||||
rose.addChainTail(rp, &qi, &event);
|
||||
assert(qi != ~0U);
|
||||
u32 squashDistance = allowedSquashDistance(cr, width, g, puffv,
|
||||
prefilter);
|
||||
|
||||
Report ir = makeRoseTrigger(event, squashDistance);
|
||||
/* only need to trigger once if floatingUnboundedDot */
|
||||
bool floatingUnboundedDot = unbounded && cr.all() && !fixed_depth;
|
||||
if (floatingUnboundedDot) {
|
||||
ir.ekey = rm.getUnassociatedExhaustibleKey();
|
||||
}
|
||||
ReportID id = rm.getInternalId(ir);
|
||||
chain_reports.insert(id);
|
||||
} else {
|
||||
DEBUG_PRINTF("add outfix ar(%d)\n", (int)auto_restart);
|
||||
assert(!auto_restart || unbounded);
|
||||
raw_puff rp(width, unbounded, report, cr, auto_restart);
|
||||
rose.addOutfix(rp);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool doComponent(RoseBuild &rose, ReportManager &rm, NGHolder &g, NFAVertex a,
|
||||
set<NFAVertex> &dead, const CompileContext &cc,
|
||||
bool prefilter) {
|
||||
DEBUG_PRINTF("hello\n");
|
||||
vector<NFAVertex> nodes;
|
||||
const CharReach &cr = g[a].char_reach;
|
||||
bool isDot = cr.all();
|
||||
bool unbounded = false;
|
||||
bool exhaustible = can_exhaust(g, rm);
|
||||
|
||||
while (a) {
|
||||
if (is_special(a, g)) {
|
||||
DEBUG_PRINTF("stopped puffing due to special vertex\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (g[a].char_reach != cr) {
|
||||
DEBUG_PRINTF("stopped puffing due to change in character "
|
||||
"reachability\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (proper_in_degree(a, g) != 1) {
|
||||
DEBUG_PRINTF("stopped puffing due to in degree != 1\n");
|
||||
break;
|
||||
}
|
||||
|
||||
size_t outDegree = out_degree(a, g);
|
||||
if (outDegree != 1 && (!hasSelfLoop(a, g) || outDegree != 2)) {
|
||||
DEBUG_PRINTF("stopping puffing due to out degree\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (hasSelfLoop(a, g)) {
|
||||
DEBUG_PRINTF("has self-loop, marking unbounded\n");
|
||||
unbounded = true;
|
||||
}
|
||||
|
||||
nodes.push_back(a);
|
||||
DEBUG_PRINTF("vertex %u has in_degree %zu\n", g[a].index,
|
||||
in_degree(a, g));
|
||||
|
||||
a = getSoleSourceVertex(g, a);
|
||||
|
||||
if (!a) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Snark: we can't handle this case, because we can only handle a
|
||||
// single report ID on a vertex
|
||||
if (is_match_vertex(a, g)) {
|
||||
DEBUG_PRINTF("stop puffing due to vertex that leads to accept\n");
|
||||
if (!nodes.empty()) {
|
||||
nodes.pop_back();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nodes.empty() && proper_in_degree(nodes.back(), g) != 1) {
|
||||
for (auto u : inv_adjacent_vertices_range(nodes.back(), g)) {
|
||||
if (is_special(u, g)) {
|
||||
DEBUG_PRINTF("pop\n");
|
||||
a = nodes.back();
|
||||
nodes.pop_back();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (a != g.startDs && edge(g.startDs, a, g).second
|
||||
&& proper_out_degree(a, g) == 1
|
||||
&& g[a].char_reach == cr) {
|
||||
nodes.push_back(a);
|
||||
a = g.startDs;
|
||||
}
|
||||
|
||||
bool auto_restart = false;
|
||||
|
||||
DEBUG_PRINTF("a = %u\n", g[a].index);
|
||||
|
||||
if (nodes.size() < MIN_PUFF_LENGTH || a == g.startDs) {
|
||||
DEBUG_PRINTF("bad %zu %u\n", nodes.size(), g[a].index);
|
||||
if (nodes.size() < MIN_PUFF_LENGTH) {
|
||||
return false;
|
||||
} else {
|
||||
DEBUG_PRINTF("mark unbounded\n");
|
||||
unbounded = true;
|
||||
a = g.start;
|
||||
auto_restart = !isDot;
|
||||
}
|
||||
}
|
||||
|
||||
bool supported = false;
|
||||
bool fixed_depth = isFixedDepth(g, nodes.back());
|
||||
|
||||
if (exhaustible) {
|
||||
supported = true;
|
||||
} else if (fixed_depth) {
|
||||
supported = true;
|
||||
} else if (unbounded) {
|
||||
/* any C{n, } can be supported as all ranges will be squashed together
|
||||
* only need to track the first */
|
||||
supported = true;
|
||||
} else if (triggerResetsPuff(g, nodes.back())) {
|
||||
supported = true;
|
||||
} else if (triggerFloodsPuff(g, nodes.back())) {
|
||||
DEBUG_PRINTF("trigger floods puff\n");
|
||||
supported = true;
|
||||
unbounded = true;
|
||||
}
|
||||
|
||||
if (!supported) {
|
||||
DEBUG_PRINTF("not supported\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (cc.grey.puffImproveHead && a != g.start) {
|
||||
if (edge(g.startDs, a, g).second) {
|
||||
goto skip_improve; /* direct sds cases are better handled by auto
|
||||
* restarting puffettes */
|
||||
}
|
||||
|
||||
if (fixed_depth) {
|
||||
goto skip_improve; /* no danger of trigger floods */
|
||||
}
|
||||
|
||||
/* if we come after something literalish don't bother */
|
||||
if (g[a].char_reach.count() <= 2
|
||||
&& in_degree(a, g) == 1
|
||||
&& g[getSoleSourceVertex(g, a)].char_reach.count() <= 2) {
|
||||
goto skip_improve;
|
||||
}
|
||||
|
||||
if (nodes.size() < MIN_PUFF_LENGTH + HEAD_BACKOFF) {
|
||||
return false; /* not enough of the puff left to worth bothering
|
||||
about */
|
||||
}
|
||||
|
||||
improveHead(g, &a, &nodes);
|
||||
skip_improve:;
|
||||
}
|
||||
|
||||
assert(!nodes.empty());
|
||||
const auto &reports = g[nodes[0]].reports;
|
||||
assert(!reports.empty());
|
||||
|
||||
for (auto report : reports) {
|
||||
const Report &ir = rm.getReport(report);
|
||||
const bool highlander = ir.ekey != INVALID_EKEY;
|
||||
if (!unbounded && highlander && !isSimpleExhaustible(ir)) {
|
||||
DEBUG_PRINTF("report %u is bounded highlander but not simple "
|
||||
"exhaustible\n",
|
||||
report);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ir.type == INTERNAL_ROSE_CHAIN) {
|
||||
DEBUG_PRINTF("puffettes cannot be chained together\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
NFAVertex puffv = nodes.back();
|
||||
assert(puffv != NFAGraph::null_vertex());
|
||||
u32 width = countChain(g, nodes.back());
|
||||
|
||||
flat_set<ReportID> chain_reports;
|
||||
|
||||
for (auto report : reports) {
|
||||
constructPuff(g, a, puffv, cr, report, width, fixed_depth, unbounded,
|
||||
auto_restart, rose, rm, chain_reports, prefilter);
|
||||
}
|
||||
|
||||
if (!chain_reports.empty()) {
|
||||
wireNewAccepts(g, puffv, chain_reports);
|
||||
}
|
||||
|
||||
dead.insert(nodes.begin(), nodes.end());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
|
||||
bool prefilter, const CompileContext &cc) {
|
||||
if (!cc.grey.allowPuff) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t count = 0;
|
||||
set<NFAVertex> dead;
|
||||
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
if (doComponent(rose, rm, g, v, dead, cc, prefilter)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dead.empty()) {
|
||||
remove_vertices(dead, g);
|
||||
pruneUseless(g);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("puffs: %zu\n", count);
|
||||
return num_vertices(g) <= N_SPECIALS;
|
||||
}
|
||||
|
||||
bool isPuffable(const NGHolder &g, bool fixed_depth,
|
||||
const ReportManager &rm, const Grey &grey) {
|
||||
if (!grey.allowPuff) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!onlyOneTop(g)) {
|
||||
DEBUG_PRINTF("more than one top\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const set<ReportID> reports = all_reports(g);
|
||||
if (reports.size() != 1) {
|
||||
DEBUG_PRINTF("too many reports\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const Report &ir = rm.getReport(*reports.begin());
|
||||
|
||||
if (ir.type == INTERNAL_ROSE_CHAIN) {
|
||||
DEBUG_PRINTF("puffettes cannot be chained together\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
PureRepeat repeat;
|
||||
if (!isPureRepeat(g, repeat)) {
|
||||
DEBUG_PRINTF("not pure bounded repeat\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (repeat.bounds.min == depth(0)) {
|
||||
DEBUG_PRINTF("repeat min bound is zero\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// We can puff if:
|
||||
// (a) repeat is {N,}; or
|
||||
// (b) repeat is {N} and fixed-depth, or highlander (and will accept the
|
||||
// first match)
|
||||
|
||||
DEBUG_PRINTF("repeat is %s\n", repeat.bounds.str().c_str());
|
||||
|
||||
if (repeat.bounds.max.is_infinite()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (repeat.bounds.min == repeat.bounds.max) {
|
||||
if (fixed_depth) {
|
||||
DEBUG_PRINTF("fixed depth\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
const bool highlander = ir.ekey != INVALID_EKEY;
|
||||
|
||||
// If we're highlander, we must be simple-exhaustible as well.
|
||||
if (highlander && isSimpleExhaustible(ir)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
56
src/nfagraph/ng_puff.h
Normal file
56
src/nfagraph/ng_puff.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Puff construction from NGHolder.
|
||||
*/
|
||||
|
||||
#ifndef NG_PUFF_H
|
||||
#define NG_PUFF_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CompileContext;
|
||||
struct Grey;
|
||||
class RoseBuild;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
/** \brief Split off portions of the graph that are implementable as Puff
|
||||
* engines. Returns true if the entire graph is consumed. */
|
||||
bool splitOffPuffs(RoseBuild &rose, ReportManager &rm, NGHolder &g,
|
||||
bool prefilter, const CompileContext &cc);
|
||||
|
||||
/** \brief True if the entire graph in \a g could be constructed as a Puff
|
||||
* engine. */
|
||||
bool isPuffable(const NGHolder &g, bool fixed_depth, const ReportManager &rm,
|
||||
const Grey &grey);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
915
src/nfagraph/ng_redundancy.cpp
Normal file
915
src/nfagraph/ng_redundancy.cpp
Normal file
@@ -0,0 +1,915 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA graph reductions.
|
||||
*
|
||||
* This code attempts to make the NFA graph smaller by performing a number of
|
||||
* local transformations:
|
||||
*
|
||||
* ### (1) removal of redundant vertices:
|
||||
*
|
||||
* v is redundant wrt to u if succ(v) is a subset of succ(u)
|
||||
* AND pred(v) is a subset of pred(u)
|
||||
* AND cr(v) is a subset of cr(u)
|
||||
*
|
||||
* ### (2) 'diamond' transformation:
|
||||
*
|
||||
* given succ(v) == succ(u) and pred(v) == pred(u),
|
||||
* v and u can be replaced by w with succ(w) = succ(v), pred(w) = pred(v),
|
||||
* and cr(w) = union(cr(v), cr(u))
|
||||
*
|
||||
* ### (3) locally identifiable left equivalence:
|
||||
*
|
||||
* given pred(v) == pred(u) (**) and cr(v) == cr(u),
|
||||
* v and u can be replaced by w with pred(w) = pred(v), cr(w) = cr(v),
|
||||
* and succ(w) = union(succ(v), succ(u))
|
||||
*
|
||||
* ### (4) locally identifiable right equivalence:
|
||||
*
|
||||
* given succ(v) == succ(u) (**) and cr(v) == cr(u),
|
||||
* v and u can be replaced by w with succ(w) = succ(v), cr(w) = cr(v),
|
||||
* and pred(w) = union(pred(v), pred(u))
|
||||
*
|
||||
* NOTE (**): for left and right equivalence, we can also do the transform if
|
||||
* set(u) contains u, set(v) contains v and the sets are otherwise equal. This
|
||||
* enables equivalent vertices with self-loops to be merged.
|
||||
*
|
||||
* If v and u raise accepts, they can only be merged if they raise the same
|
||||
* report IDs.
|
||||
*
|
||||
* Transformations are applied repeatedly until the graph stops changing.
|
||||
*
|
||||
* Note that the final graph may depend on the order in which these
|
||||
* transformations are applied. In order to reduce the non-determinism the
|
||||
* following order is imposed: (1); (2); (3) + (4).
|
||||
*/
|
||||
#include "ng_redundancy.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_calc_components.h"
|
||||
#include "ng_dominators.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
/** Precalculated (and maintained) information about a vertex. */
|
||||
class VertexInfo {
|
||||
public:
|
||||
flat_set<NFAVertex> pred; //!< predecessors of this vertex
|
||||
flat_set<NFAVertex> succ; //!< successors of this vertex
|
||||
bool isAccept = false; //!< does this vertex lead to accept?
|
||||
bool isRemoved = false; //!< have we already removed this vertex?
|
||||
|
||||
size_t inDegree() const { return pred.size(); }
|
||||
size_t outDegree() const { return succ.size(); }
|
||||
};
|
||||
|
||||
class VertexInfoMap {
|
||||
public:
|
||||
explicit VertexInfoMap(const NGHolder &gg)
|
||||
: g(gg), infos(num_vertices(gg)) {}
|
||||
VertexInfo &operator[](NFAVertex v) {
|
||||
u32 i = g[v].index;
|
||||
assert(i < infos.size());
|
||||
return infos[i];
|
||||
}
|
||||
|
||||
const VertexInfo &operator[](NFAVertex v) const {
|
||||
u32 i = g[v].index;
|
||||
assert(i < infos.size());
|
||||
return infos[i];
|
||||
}
|
||||
|
||||
private:
|
||||
const NGHolder &g;
|
||||
vector<VertexInfo> infos;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/** Populates the info map with their predecessor and successor states, and
|
||||
* whether they are accept states. */
|
||||
static
|
||||
void populateContainers(const NGHolder &g, VertexInfoMap &infoMap) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
VertexInfo &info = infoMap[v];
|
||||
assert(info.pred.empty() && info.succ.empty());
|
||||
|
||||
// Build successor and predecessor sets
|
||||
insert(&info.pred, inv_adjacent_vertices(v, g));
|
||||
insert(&info.succ, adjacent_vertices(v, g));
|
||||
|
||||
// Note whether the vertex is an accept state
|
||||
if (!is_special(v, g)) {
|
||||
if (contains(info.succ, g.accept)
|
||||
|| contains(info.succ, g.acceptEod)) {
|
||||
info.isAccept = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Helper function to take the intersection of two sorted vertex sets
|
||||
* in-place. */
|
||||
static
|
||||
void inplaceIntersection(vector<NFAVertex> &vset1,
|
||||
const flat_set<NFAVertex> &vset2) {
|
||||
const NFAVertex GONE = NFAGraph::null_vertex();
|
||||
|
||||
vector<NFAVertex>::iterator it = vset1.begin(), ite = vset1.end();
|
||||
flat_set<NFAVertex>::const_iterator jt = vset2.begin(), jte = vset2.end();
|
||||
|
||||
while ((it != ite) && (jt != jte)) {
|
||||
assert(*it != GONE);
|
||||
|
||||
if (*it < *jt) {
|
||||
// present in vset1 but not in vset2. Set to null, remove in a
|
||||
// second pass.
|
||||
*it = GONE;
|
||||
++it;
|
||||
} else if (*jt < *it) {
|
||||
// present in vset2 but not in vset1, skip.
|
||||
++jt;
|
||||
} else {
|
||||
// present in both sets.
|
||||
++it; ++jt;
|
||||
}
|
||||
}
|
||||
|
||||
// Left overs are only in that set.
|
||||
vset1.erase(it, ite);
|
||||
|
||||
// Remove nulls created above.
|
||||
vset1.erase(remove(vset1.begin(), vset1.end(), GONE), vset1.end());
|
||||
}
|
||||
|
||||
/** Find the intersection of the successors of our predecessors. */
|
||||
static
|
||||
void succPredIntersection(const NFAVertex v, const flat_set<NFAVertex> &predSet,
|
||||
const VertexInfoMap &infoMap,
|
||||
vector<NFAVertex> &intersection,
|
||||
bool considerSelf = true /* follow self loops */) {
|
||||
/* find a good seed for the intersection */
|
||||
const flat_set<NFAVertex> *best = nullptr;
|
||||
for (auto u : predSet) {
|
||||
if (!considerSelf && u == v) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const flat_set<NFAVertex> &succSet = infoMap[u].succ;
|
||||
if (!best || succSet.size() <= best->size()) {
|
||||
best = &succSet;
|
||||
|
||||
// Break out if we've reduced our intersection to [v]
|
||||
if (best->size() == 1) {
|
||||
assert(*(best->begin()) == v);
|
||||
intersection.push_back(v);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best) {
|
||||
insert(&intersection, intersection.end(), *best);
|
||||
}
|
||||
|
||||
for (auto u : predSet) {
|
||||
if (!considerSelf && u == v) {
|
||||
continue;
|
||||
}
|
||||
|
||||
inplaceIntersection(intersection, infoMap[u].succ);
|
||||
|
||||
// Check: intersection should always be at least size 1
|
||||
assert(!intersection.empty());
|
||||
|
||||
// Break out if we've reduced our intersection to [v]
|
||||
if (intersection.size() == 1) {
|
||||
assert(*intersection.begin() == v);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Find the intersection of the predecessors of our successors. */
|
||||
static
|
||||
void predSuccIntersection(const NFAVertex v,
|
||||
const flat_set<NFAVertex> &succSet,
|
||||
const VertexInfoMap &infoMap,
|
||||
vector<NFAVertex> &intersection,
|
||||
bool considerSelf = true /* follow self loops */) {
|
||||
/* find a good seed for the intersection */
|
||||
const flat_set<NFAVertex> *best = nullptr;
|
||||
for (auto w : succSet) {
|
||||
if (!considerSelf && w == v) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const flat_set<NFAVertex> &predSet = infoMap[w].pred;
|
||||
if (!best || predSet.size() <= best->size()) {
|
||||
best = &predSet;
|
||||
|
||||
// Break out if we've reduced our intersection to [v]
|
||||
if (best->size() == 1) {
|
||||
assert(*(best->begin()) == v);
|
||||
intersection.push_back(v);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best) {
|
||||
insert(&intersection, intersection.end(), *best);
|
||||
}
|
||||
|
||||
for (auto w : succSet) {
|
||||
if (!considerSelf && w == v) {
|
||||
continue;
|
||||
}
|
||||
|
||||
inplaceIntersection(intersection, infoMap[w].pred);
|
||||
|
||||
// Check: intersection should always be at least size 1
|
||||
assert(!intersection.empty());
|
||||
|
||||
// Break out if we've reduced our intersection to [v]
|
||||
if (intersection.size() == 1) {
|
||||
assert(*intersection.begin() == v);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Update containers to take into account the removal of vertex v. */
|
||||
static
|
||||
void markForRemoval(const NFAVertex v, VertexInfoMap &infoMap,
|
||||
set<NFAVertex> &removable) {
|
||||
VertexInfo &info = infoMap[v];
|
||||
assert(!info.isRemoved);
|
||||
assert(!contains(removable, v));
|
||||
info.isRemoved = true;
|
||||
removable.insert(v);
|
||||
|
||||
// remove v from its predecessors' successors
|
||||
for (auto u : info.pred) {
|
||||
infoMap[u].succ.erase(v);
|
||||
}
|
||||
|
||||
// remove v from its successors' predecessors
|
||||
for (auto w : info.succ) {
|
||||
infoMap[w].pred.erase(v);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool hasInEdgeTops(const NGHolder &g, NFAVertex v) {
|
||||
bool exists;
|
||||
NFAEdge e;
|
||||
tie(e, exists) = edge_by_target(g.start, v, g);
|
||||
if (exists && g[e].top != 0) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Transform (1), removal of redundant vertices. */
|
||||
static
|
||||
bool doUselessMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
|
||||
set<NFAVertex> &removable) {
|
||||
/* useless merges can be done in any order, no need to take any care with
|
||||
* ordering */
|
||||
|
||||
// Temporary vectors used for intersections below
|
||||
vector<NFAVertex> succPredSet, predSuccSet, intersection;
|
||||
|
||||
bool changed = false;
|
||||
for (auto v : vertices_range(g)) {
|
||||
VertexInfo &info = infoMap[v];
|
||||
|
||||
if (info.isRemoved) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(!contains(removable, v));
|
||||
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we do not need to check for out edge tops - as only specials (start)
|
||||
* can have tops and they are already disqualified. */
|
||||
if (hasInEdgeTops(g, v)) {
|
||||
continue; // Conservatively skip anything with nonzero tops.
|
||||
}
|
||||
|
||||
if (info.pred.empty() || info.succ.empty()) {
|
||||
DEBUG_PRINTF("vertex %u has empty pred/succ list\n",
|
||||
g[v].index);
|
||||
assert(0); // non-special states should always have succ/pred lists
|
||||
continue;
|
||||
}
|
||||
|
||||
// The following cases are more complex and rely on the intersection of
|
||||
// Succ(Pred(v)) and Pred(Succ(v))
|
||||
|
||||
// Compute intersections, operating on the smaller set first
|
||||
// Note that we use vectors here, as set_intersection underneath
|
||||
// guarantees sorted output, and vectors were quite a bit
|
||||
// faster than sets or lists.
|
||||
|
||||
succPredSet.clear();
|
||||
predSuccSet.clear();
|
||||
|
||||
if (info.pred.size() <= info.succ.size()) {
|
||||
succPredIntersection(v, info.pred, infoMap, succPredSet);
|
||||
if (succPredSet.size() == 1) {
|
||||
// nobody in here but us chickens
|
||||
assert(*succPredSet.begin() == v);
|
||||
continue;
|
||||
}
|
||||
predSuccIntersection(v, info.succ, infoMap, predSuccSet);
|
||||
if (predSuccSet.size() == 1) {
|
||||
assert(*predSuccSet.begin() == v);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
predSuccIntersection(v, info.succ, infoMap, predSuccSet);
|
||||
if (predSuccSet.size() == 1) {
|
||||
assert(*predSuccSet.begin() == v);
|
||||
continue;
|
||||
}
|
||||
succPredIntersection(v, info.pred, infoMap, succPredSet);
|
||||
if (succPredSet.size() == 1) {
|
||||
assert(*succPredSet.begin() == v);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
|
||||
intersection.clear();
|
||||
set_intersection(succPredSet.begin(), succPredSet.end(),
|
||||
predSuccSet.begin(), predSuccSet.end(),
|
||||
back_inserter(intersection));
|
||||
|
||||
/* Boring if it is just us in the intersection */
|
||||
if (intersection.size() < 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compare char_reach, mark v for removal if any members of
|
||||
// the intersection have an equal or greater reach
|
||||
const CharReach &currReach = g[v].char_reach;
|
||||
const auto &currReports = g[v].reports;
|
||||
for (auto t : intersection) {
|
||||
const VertexInfo &info2 = infoMap[t];
|
||||
|
||||
/* start is never a succ of a state, so will never be in the
|
||||
* predsucc/succpred intersection */
|
||||
assert(t != g.start);
|
||||
|
||||
if (t == v || info2.isRemoved) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// For each candidate C to make V redundant, check:
|
||||
// if V is an accept state, C must be an accept state for
|
||||
// the same pattern
|
||||
// pred(C) is a superset of pred(V)
|
||||
// succ(C) is a superset of succ(V)
|
||||
// reach(C) is a superset of reach(V)
|
||||
//
|
||||
// Note: pred/sec tests are covered by the intersections
|
||||
// calculated above.
|
||||
|
||||
/* note: links to accepts are also tracked in succs */
|
||||
if (info.isAccept && currReports != g[t].reports) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (som) {
|
||||
if (t == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
if (is_virtual_start(t, g) != is_virtual_start(v, g)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* we do not need to check for out edge tops - as only start
|
||||
* can have tops and it has already been ruled out. */
|
||||
if (hasInEdgeTops(g, t)) {
|
||||
continue; // Conservatively skip anything with nonzero tops.
|
||||
}
|
||||
|
||||
CharReach &otherReach = g[t].char_reach;
|
||||
if (currReach.isSubsetOf(otherReach)) {
|
||||
DEBUG_PRINTF("removing redundant vertex %u (keeping %u)\n",
|
||||
g[v].index, g[t].index);
|
||||
markForRemoval(v, infoMap, removable);
|
||||
changed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
/** Transform (2), diamond merge pass. */
|
||||
static
|
||||
bool doDiamondMergePass(NGHolder &g, som_type som, VertexInfoMap &infoMap,
|
||||
set<NFAVertex> &removable) {
|
||||
// Temporary vectors used for intersections below
|
||||
vector<NFAVertex> succPredSet, predSuccSet, intersection;
|
||||
|
||||
bool changed = false;
|
||||
for (auto v : vertices_range(g)) {
|
||||
VertexInfo &info = infoMap[v];
|
||||
|
||||
if (info.isRemoved) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(!contains(removable, v));
|
||||
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we do not need to check for out edge tops - as only specials (start)
|
||||
* can have tops and they are already disqualified. */
|
||||
if (hasInEdgeTops(g, v)) {
|
||||
continue; // Conservatively skip anything with nonzero tops.
|
||||
}
|
||||
|
||||
if (info.pred.empty() || info.succ.empty()) {
|
||||
assert(0); // non-special states should always have succ/pred lists
|
||||
continue;
|
||||
}
|
||||
|
||||
// The following cases are more complex and rely on the intersection of
|
||||
// Succ(Pred(v)) and Pred(Succ(v))
|
||||
|
||||
// Compute intersections, operating on the smaller set first
|
||||
// Note that we use vectors here, as set_intersection underneath
|
||||
// guarantees sorted output, and vectors were quite a bit faster than
|
||||
// sets or lists.
|
||||
|
||||
succPredSet.clear();
|
||||
predSuccSet.clear();
|
||||
|
||||
if (info.pred.size() <= info.succ.size()) {
|
||||
succPredIntersection(v, info.pred, infoMap, succPredSet);
|
||||
if (succPredSet.size() == 1) {
|
||||
// nobody in here but us chickens
|
||||
assert(*succPredSet.begin() == v);
|
||||
continue;
|
||||
}
|
||||
predSuccIntersection(v, info.succ, infoMap, predSuccSet);
|
||||
if (predSuccSet.size() == 1) {
|
||||
assert(*predSuccSet.begin() == v);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
predSuccIntersection(v, info.succ, infoMap, predSuccSet);
|
||||
if (predSuccSet.size() == 1) {
|
||||
assert(*predSuccSet.begin() == v);
|
||||
continue;
|
||||
}
|
||||
succPredIntersection(v, info.pred, infoMap, succPredSet);
|
||||
if (succPredSet.size() == 1) {
|
||||
assert(*succPredSet.begin() == v);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Find the intersection of Succ(Pred(v)) and Pred(Succ(v))
|
||||
intersection.clear();
|
||||
set_intersection(succPredSet.begin(), succPredSet.end(),
|
||||
predSuccSet.begin(), predSuccSet.end(),
|
||||
back_inserter(intersection));
|
||||
|
||||
/* Boring if it is just us in the intersection */
|
||||
if (intersection.size() < 2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* ensure that we look for candidates in the same order */
|
||||
sort(intersection.begin(), intersection.end(), make_index_ordering(g));
|
||||
|
||||
const CharReach &currReach = g[v].char_reach;
|
||||
const auto &currReports = g[v].reports;
|
||||
for (auto t : intersection) {
|
||||
const VertexInfo &info2 = infoMap[t];
|
||||
|
||||
if (t == v || info2.isRemoved || is_special(t, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* note: links to accepts are also tracked in succs */
|
||||
if (info.isAccept && currReports != g[t].reports) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we do not need to check for out edge tops - as only specials
|
||||
* (start) can have tops and they are already disqualified. */
|
||||
if (hasInEdgeTops(g, t)) {
|
||||
continue; // Conservatively skip anything with nonzero tops.
|
||||
}
|
||||
|
||||
if (som) {
|
||||
if (is_virtual_start(v, g) != is_virtual_start(t, g)) {
|
||||
continue; // can only merge like with like.
|
||||
}
|
||||
}
|
||||
|
||||
// If in-degree of v == in-degree of target
|
||||
// and out-degree of v == out-degree of target
|
||||
// (because pred and succ are supersets)
|
||||
// then combine charreach of v into target and remove v
|
||||
if (info.inDegree() == info2.inDegree()
|
||||
&& info.outDegree() == info2.outDegree()) {
|
||||
// add character reachability of v into target
|
||||
CharReach &otherReach = g[t].char_reach;
|
||||
otherReach |= currReach;
|
||||
// v can be removed
|
||||
DEBUG_PRINTF("removing redundant vertex %u and merging "
|
||||
"reachability with vertex %u\n",
|
||||
g[v].index, g[t].index);
|
||||
markForRemoval(v, infoMap, removable);
|
||||
changed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct ReachMismatch {};
|
||||
|
||||
class ReachSubsetVisitor : public boost::default_dfs_visitor {
|
||||
public:
|
||||
explicit ReachSubsetVisitor(const CharReach &r) : cr(r) {}
|
||||
|
||||
template <class Graph, class Vertex>
|
||||
void discover_vertex(const Vertex &v, const Graph &g) const {
|
||||
if (is_any_start(v, g)) {
|
||||
return; // start vertices are OK
|
||||
} else if (is_special(v, g)) {
|
||||
assert(0);
|
||||
throw ReachMismatch(); // other special nodes??
|
||||
}
|
||||
|
||||
const CharReach &vcr = g[v].char_reach;
|
||||
DEBUG_PRINTF("checking if vcr (%zu) is subset of (%zu)\n", vcr.count(),
|
||||
cr.count());
|
||||
if (vcr != (vcr & cr)) {
|
||||
throw ReachMismatch();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const CharReach &cr;
|
||||
};
|
||||
|
||||
/** Terminator function for DFS used in pathReachSubset. */
|
||||
template <class Graph, class Vertex> class VertexIs {
|
||||
public:
|
||||
explicit VertexIs(const Vertex &v) : vertex(v) {}
|
||||
bool operator()(const Vertex &v, const Graph &) const {
|
||||
return v == vertex;
|
||||
}
|
||||
|
||||
private:
|
||||
Vertex vertex;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/** Returns true if every vertex on paths leading to edge \p e has reachability
|
||||
* which is a subset of the reachability of \p dom */
|
||||
static
|
||||
bool reversePathReachSubset(const NFAEdge &e, const NFAVertex &dom,
|
||||
const NGHolder &g) {
|
||||
const CharReach &domReach = g[dom].char_reach;
|
||||
if (domReach.all()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
NFAVertex start = source(e, g);
|
||||
using RevGraph = boost::reverse_graph<NFAGraph, const NFAGraph &>;
|
||||
map<RevGraph::vertex_descriptor, boost::default_color_type> vertexColor;
|
||||
|
||||
// Walk the graph backwards from v, examining each node. We fail (return
|
||||
// false) if we encounter a node with reach NOT a subset of domReach, and
|
||||
// we stop searching at dom.
|
||||
try {
|
||||
depth_first_visit(RevGraph(g.g), start,
|
||||
ReachSubsetVisitor(domReach),
|
||||
make_assoc_property_map(vertexColor),
|
||||
VertexIs<RevGraph, RevGraph::vertex_descriptor>(dom));
|
||||
} catch(ReachMismatch&) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Returns true if every vertex on paths leading from edge \p e has
|
||||
* reachability which is a subset of the reachability of \p dom */
|
||||
static
|
||||
bool forwardPathReachSubset(const NFAEdge &e, const NFAVertex &dom,
|
||||
const NGHolder &g) {
|
||||
const CharReach &domReach = g[dom].char_reach;
|
||||
if (domReach.all()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
NFAVertex start = target(e, g);
|
||||
map<NFAGraph::vertex_descriptor, boost::default_color_type> vertexColor;
|
||||
|
||||
// Walk the graph forward from v, examining each node. We fail (return
|
||||
// false) if we encounter a node with reach NOT a subset of domReach, and
|
||||
// we stop searching at dom.
|
||||
try {
|
||||
depth_first_visit(g.g, start,
|
||||
ReachSubsetVisitor(domReach),
|
||||
make_assoc_property_map(vertexColor),
|
||||
VertexIs<NFAGraph, NFAVertex>(dom));
|
||||
} catch(ReachMismatch&) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool allOutsSpecial(NFAVertex v, const NGHolder &g) {
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (!is_special(w, g)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool allInsSpecial(NFAVertex v, const NGHolder &g) {
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (!is_special(u, g)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Cheaply check whether this graph can't be reduced at all, because it is
|
||||
* just a chain of vertices with no other edges. */
|
||||
static
|
||||
bool isIrreducible(const NGHolder &g) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
// skip specials
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (in_degree(v, g) != 1 && !allInsSpecial(v, g)) {
|
||||
return false;
|
||||
}
|
||||
if (out_degree(v, g) != 1 && !allOutsSpecial(v, g)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* if calcComponents got sleepy and went home, the above checks don't hold
|
||||
* as it assumes there is only one connected component. */
|
||||
if (isAlternationOfClasses(g)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
u32 findCyclic(const NGHolder &g, vector<bool> &cyclic) {
|
||||
u32 count = 0;
|
||||
|
||||
cyclic.resize(num_vertices(g));
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
assert(g[v].index < cyclic.size());
|
||||
bool c = edge(v, v, g).second;
|
||||
if (c) {
|
||||
count++;
|
||||
}
|
||||
cyclic[g[v].index] = c;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static
|
||||
void findCyclicDom(NGHolder &g, vector<bool> &cyclic,
|
||||
set<NFAEdge> &dead, som_type som) {
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> dominators = findDominators(g);
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Path in through a dominator (e.g. '.+a?foobar')
|
||||
NFAVertex dom = dominators[v];
|
||||
if (dom && cyclic[g[dom].index]
|
||||
&& edge(dom, v, g).second) {
|
||||
|
||||
if (som && dom == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("vertex %u is dominated by directly-connected cyclic "
|
||||
"vertex %u\n", g[v].index,
|
||||
g[dom].index);
|
||||
|
||||
// iff all paths through in-edge e of v involve vertices whose
|
||||
// reachability is a subset of reach(dom), we can delete edge e.
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
if (source(e, g) == dom) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (reversePathReachSubset(e, dom, g)) {
|
||||
DEBUG_PRINTF("edge (%u, %u) can be removed: leading paths "
|
||||
"share dom reach\n",
|
||||
g[source(e, g)].index, g[target(e, g)].index);
|
||||
dead.insert(e);
|
||||
if (source(e, g) == v) {
|
||||
cyclic[g[v].index] = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void findCyclicPostDom(NGHolder &g, vector<bool> &cyclic,
|
||||
set<NFAEdge> &dead) {
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> postdominators =
|
||||
findPostDominators(g);
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Path out through a post-dominator (e.g. a?.+foobar')
|
||||
NFAVertex postdom = postdominators[v];
|
||||
if (postdom && cyclic[g[postdom].index]
|
||||
&& edge(v, postdom, g).second) {
|
||||
DEBUG_PRINTF("vertex %u is postdominated by directly-connected "
|
||||
"cyclic vertex %u\n", g[v].index,
|
||||
g[postdom].index);
|
||||
|
||||
// iff all paths through in-edge e of v involve vertices whose
|
||||
// reachability is a subset of reach(dom), we can delete edge e.
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
if (target(e, g) == postdom) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (forwardPathReachSubset(e, postdom, g)) {
|
||||
DEBUG_PRINTF("edge (%u, %u) can be removed: trailing paths "
|
||||
"share postdom reach\n",
|
||||
g[source(e, g)].index, g[target(e, g)].index);
|
||||
if (target(e, g) == v) {
|
||||
cyclic[g[v].index] = false;
|
||||
}
|
||||
dead.insert(e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool removeRedundancy(NGHolder &g, som_type som) {
|
||||
DEBUG_PRINTF("rr som = %d\n", (int)som);
|
||||
g.renumberVertices();
|
||||
|
||||
// Cheap check: if all the non-special vertices have in-degree one and
|
||||
// out-degree one, there's no redundancy in this here graph and we can
|
||||
// vamoose.
|
||||
if (isIrreducible(g)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
VertexInfoMap infoMap(g);
|
||||
|
||||
// Populate maps of successors and predecessors, and accept status
|
||||
populateContainers(g, infoMap);
|
||||
|
||||
/* Run multiple passes: terminate when a full pass doesn't remove
|
||||
* any vertices */
|
||||
bool doUseless = true;
|
||||
bool doDiamond = true;
|
||||
set<NFAVertex> removable;
|
||||
while (doUseless || doDiamond) {
|
||||
if (doUseless
|
||||
&& doUselessMergePass(g, som, infoMap, removable)) {
|
||||
doDiamond = true;
|
||||
}
|
||||
doUseless = false;
|
||||
|
||||
if (doDiamond
|
||||
&& doDiamondMergePass(g, som, infoMap, removable)) {
|
||||
doUseless = true;
|
||||
}
|
||||
doDiamond = false;
|
||||
}
|
||||
DEBUG_PRINTF("found %zu removable vertices overall.\n", removable.size());
|
||||
remove_vertices(removable, g);
|
||||
|
||||
return !removable.empty();
|
||||
}
|
||||
|
||||
/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
|
||||
* reachability that is a superset of all paths feeding into that edge. */
|
||||
bool removeCyclicDominated(NGHolder &g, som_type som) {
|
||||
set<NFAEdge> dead;
|
||||
vector<bool> cyclic;
|
||||
bool changed = false;
|
||||
|
||||
findCyclic(g, cyclic);
|
||||
|
||||
findCyclicDom(g, cyclic, dead, som);
|
||||
if (!dead.empty()) {
|
||||
remove_edges(dead, g);
|
||||
pruneUseless(g);
|
||||
dead.clear();
|
||||
cyclic.clear(); // need to recalculate cyclic as ids have changed
|
||||
findCyclic(g, cyclic);
|
||||
changed = true;
|
||||
}
|
||||
|
||||
findCyclicPostDom(g, cyclic, dead);
|
||||
if (!dead.empty()) {
|
||||
remove_edges(dead, g);
|
||||
pruneUseless(g);
|
||||
dead.clear();
|
||||
changed = true;
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
54
src/nfagraph/ng_redundancy.h
Normal file
54
src/nfagraph/ng_redundancy.h
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA graph reductions.
|
||||
*/
|
||||
|
||||
#ifndef NG_REDUNDANCY_H
|
||||
#define NG_REDUNDANCY_H
|
||||
|
||||
#include "som/som.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
struct CompileContext;
|
||||
|
||||
/** Attempt to make the NFA graph \p g smaller by performing a number of local
|
||||
* transformations. */
|
||||
bool removeRedundancy(NGHolder &g, som_type som);
|
||||
|
||||
/** UE-524: remove edges into nodes that are dominated by cyclic nodes with
|
||||
* reachability that is a superset of all paths feeding into that edge. Returns
|
||||
* true if any edges/vertices were removed. */
|
||||
bool removeCyclicDominated(NGHolder &g, som_type som);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
476
src/nfagraph/ng_region.cpp
Normal file
476
src/nfagraph/ng_region.cpp
Normal file
@@ -0,0 +1,476 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Region analysis.
|
||||
*
|
||||
* Definition: a \a region is a subset of vertices in a graph such that:
|
||||
* - the edges entering the region are a cutset of the graph
|
||||
* - for every in-edge (u, v) to the region there exist edges (u, w) for all
|
||||
* w in {w : w in region and w has an in-edge}
|
||||
* - the regions in a graph partition the graph
|
||||
*
|
||||
* Note:
|
||||
* - we partition a graph into the maximal number of regions
|
||||
* - similar properties for exit edges should hold as a consequence
|
||||
* - graph == sequence of regions
|
||||
* - a region is considered to have an epsilon vertex to allow jumps
|
||||
* - vertices which only lead to back edges need to be floated up in the topo
|
||||
* order
|
||||
*
|
||||
* Algorithm overview:
|
||||
* -# topo-order over the DAG skeleton;
|
||||
* -# incrementally add vertices to the current region until the boundary edges
|
||||
* form a valid cut-set;
|
||||
* -# for each back-edge, if the source and target are in different regions,
|
||||
* merge the regions (and all intervening regions) into a common region.
|
||||
*/
|
||||
#include "ng_region.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/graph/filtered_graph.hpp>
|
||||
#include <boost/graph/topological_sort.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
typedef ue2::unordered_set<NFAEdge> BackEdgeSet;
|
||||
typedef boost::filtered_graph<NFAGraph, AcyclicFilter<BackEdgeSet>>
|
||||
AcyclicGraph;
|
||||
|
||||
namespace {
|
||||
struct exit_info {
|
||||
explicit exit_info(NFAVertex v) : exit(v) {}
|
||||
|
||||
NFAVertex exit;
|
||||
ue2::unordered_set<NFAVertex> open;
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
void checkAndAddExitCandidate(const AcyclicGraph &g,
|
||||
const ue2::unordered_set<NFAVertex> &r,
|
||||
NFAVertex v, vector<exit_info> *exits) {
|
||||
// set when we find our first candidate.
|
||||
ue2::unordered_set<NFAVertex> *open = nullptr;
|
||||
|
||||
/* find the set of vertices reachable from v which are not in r */
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (!contains(r, w)) {
|
||||
if (!open) {
|
||||
exits->push_back(exit_info(v));
|
||||
open = &exits->back().open;
|
||||
}
|
||||
open->insert(w);
|
||||
}
|
||||
}
|
||||
|
||||
if (open) {
|
||||
DEBUG_PRINTF("exit %u\n", g[v].index);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void findExits(const AcyclicGraph &g, const ue2::unordered_set<NFAVertex> &r,
|
||||
vector<exit_info> *exits) {
|
||||
exits->clear();
|
||||
|
||||
for (auto v : r) {
|
||||
checkAndAddExitCandidate(g, r, v, exits);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void refineExits(const AcyclicGraph &g, const ue2::unordered_set<NFAVertex> &r,
|
||||
NFAVertex new_v, vector<exit_info> *exits) {
|
||||
for (u32 i = 0; i < exits->size(); i++) {
|
||||
(*exits)[i].open.erase(new_v); /* new_v is no long an open edge */
|
||||
if ((*exits)[i].open.empty()) { /* no open edges: no longer an exit */
|
||||
/* shuffle to back and kill */
|
||||
(*exits)[i] = exits->back();
|
||||
exits->pop_back();
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
checkAndAddExitCandidate(g, r, new_v, exits);
|
||||
}
|
||||
|
||||
/** the set of exits from a candidate region are valid if: FIXME: document
|
||||
*/
|
||||
static
|
||||
bool exitValid(UNUSED const AcyclicGraph &g, const vector<exit_info> &exits,
|
||||
const ue2::unordered_set<NFAVertex> &open_jumps) {
|
||||
if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) {
|
||||
return true;
|
||||
}
|
||||
if (exits.size() == 1 && open_jumps.size() == 1) {
|
||||
DEBUG_PRINTF("oj %u, e %u\n", g[*open_jumps.begin()].index,
|
||||
g[exits[0].exit].index);
|
||||
if (*open_jumps.begin() == exits[0].exit) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
assert(!exits.empty());
|
||||
const auto &enters = exits.front().open;
|
||||
|
||||
if (!open_jumps.empty() && enters != open_jumps) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto it = begin(exits) + 1; it != end(exits); ++it) {
|
||||
if (it->open != enters) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void setRegion(const ue2::unordered_set<NFAVertex> &r, u32 rid,
|
||||
ue2::unordered_map<NFAVertex, u32> ®ions) {
|
||||
for (auto v : r) {
|
||||
regions[v] = rid;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void buildInitialCandidate(const AcyclicGraph &g,
|
||||
vector<NFAVertex>::const_reverse_iterator &it,
|
||||
const vector<NFAVertex>::const_reverse_iterator &ite,
|
||||
ue2::unordered_set<NFAVertex> *candidate,
|
||||
/* in exits of prev region;
|
||||
* out exits from candidate */
|
||||
vector<exit_info> *exits,
|
||||
ue2::unordered_set<NFAVertex> *open_jumps) {
|
||||
if (it == ite) {
|
||||
candidate->clear();
|
||||
exits->clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (exits->empty()) {
|
||||
DEBUG_PRINTF("odd\n");
|
||||
candidate->clear();
|
||||
DEBUG_PRINTF("adding %u to initial\n", g[*it].index);
|
||||
candidate->insert(*it);
|
||||
open_jumps->erase(*it);
|
||||
checkAndAddExitCandidate(g, *candidate, *it, exits);
|
||||
++it;
|
||||
return;
|
||||
}
|
||||
|
||||
ue2::unordered_set<NFAVertex> enters = (*exits)[0].open;
|
||||
candidate->clear();
|
||||
|
||||
for (; it != ite; ++it) {
|
||||
DEBUG_PRINTF("adding %u to initial\n", g[*it].index);
|
||||
candidate->insert(*it);
|
||||
if (contains(enters, *it)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (it != ite) {
|
||||
enters.erase(*it);
|
||||
open_jumps->swap(enters);
|
||||
DEBUG_PRINTF("oj size = %zu\n", open_jumps->size());
|
||||
++it;
|
||||
} else {
|
||||
open_jumps->clear();
|
||||
}
|
||||
|
||||
findExits(g, *candidate, exits);
|
||||
}
|
||||
|
||||
static
|
||||
void findDagLeaders(const NGHolder &h, const AcyclicGraph &g,
|
||||
const vector<NFAVertex> &topo,
|
||||
ue2::unordered_map<NFAVertex, u32> ®ions) {
|
||||
assert(!topo.empty());
|
||||
u32 curr_id = 0;
|
||||
vector<NFAVertex>::const_reverse_iterator t_it = topo.rbegin();
|
||||
vector<exit_info> exits;
|
||||
ue2::unordered_set<NFAVertex> candidate;
|
||||
ue2::unordered_set<NFAVertex> open_jumps;
|
||||
DEBUG_PRINTF("adding %u to current\n", g[*t_it].index);
|
||||
assert(t_it != topo.rend());
|
||||
candidate.insert(*t_it++);
|
||||
DEBUG_PRINTF("adding %u to current\n", g[*t_it].index);
|
||||
assert(t_it != topo.rend());
|
||||
candidate.insert(*t_it++);
|
||||
findExits(g, candidate, &exits);
|
||||
|
||||
while (t_it != topo.rend()) {
|
||||
assert(!candidate.empty());
|
||||
|
||||
if (exitValid(g, exits, open_jumps)) {
|
||||
if (contains(candidate, h.accept) && !open_jumps.empty()) {
|
||||
/* we have tried to make an optional region containing accept as
|
||||
* we have an open jump to eod. This candidate region needs to
|
||||
* be put in with the previous region. */
|
||||
curr_id--;
|
||||
DEBUG_PRINTF("merging in with region %u\n", curr_id);
|
||||
} else {
|
||||
DEBUG_PRINTF("setting region %u\n", curr_id);
|
||||
}
|
||||
setRegion(candidate, curr_id++, regions);
|
||||
buildInitialCandidate(g, t_it, topo.rend(), &candidate, &exits,
|
||||
&open_jumps);
|
||||
} else {
|
||||
NFAVertex curr = *t_it;
|
||||
DEBUG_PRINTF("adding %u to current\n", g[curr].index);
|
||||
candidate.insert(curr);
|
||||
open_jumps.erase(curr);
|
||||
refineExits(g, candidate, *t_it, &exits);
|
||||
DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(),
|
||||
exits.size());
|
||||
++t_it;
|
||||
}
|
||||
}
|
||||
/* assert exits valid */
|
||||
setRegion(candidate, curr_id, regions);
|
||||
}
|
||||
|
||||
static
|
||||
void mergeUnderBackEdges(const NGHolder &g, const vector<NFAVertex> &topo,
|
||||
const BackEdgeSet &backEdges,
|
||||
ue2::unordered_map<NFAVertex, u32> ®ions) {
|
||||
for (const auto &e : backEdges) {
|
||||
NFAVertex u = source(e, g);
|
||||
NFAVertex v = target(e, g);
|
||||
|
||||
u32 ru = regions[u];
|
||||
u32 rv = regions[v];
|
||||
if (ru == rv) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("merging v = %u(%u), u = %u(%u)\n", g[v].index, rv,
|
||||
g[u].index, ru);
|
||||
assert(rv < ru);
|
||||
|
||||
for (auto t : topo) {
|
||||
u32 r = regions[t];
|
||||
if (r <= ru && r > rv) {
|
||||
regions[t] = rv;
|
||||
} else if (r > ru) {
|
||||
regions[t] = rv + r - ru;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g,
|
||||
vector<NFAVertex> &topoOrder) {
|
||||
// Start is last element of reverse topo ordering.
|
||||
auto it = find(topoOrder.begin(), topoOrder.end(), w.start);
|
||||
if (it != topoOrder.end() - 1) {
|
||||
DEBUG_PRINTF("repositioning start\n");
|
||||
assert(it != topoOrder.end());
|
||||
topoOrder.erase(it);
|
||||
topoOrder.insert(topoOrder.end(), w.start);
|
||||
}
|
||||
|
||||
// StartDs is second-to-last element of reverse topo ordering.
|
||||
it = find(topoOrder.begin(), topoOrder.end(), w.startDs);
|
||||
if (it != topoOrder.end() - 2) {
|
||||
DEBUG_PRINTF("repositioning start ds\n");
|
||||
assert(it != topoOrder.end());
|
||||
topoOrder.erase(it);
|
||||
topoOrder.insert(topoOrder.end() - 1, w.startDs);
|
||||
}
|
||||
|
||||
// AcceptEOD is first element of reverse topo ordering.
|
||||
it = find(topoOrder.begin(), topoOrder.end(), w.acceptEod);
|
||||
if (it != topoOrder.begin()) {
|
||||
DEBUG_PRINTF("repositioning accept\n");
|
||||
assert(it != topoOrder.end());
|
||||
topoOrder.erase(it);
|
||||
topoOrder.insert(topoOrder.begin(), w.acceptEod);
|
||||
}
|
||||
|
||||
// Accept is second element of reverse topo ordering, if it's connected.
|
||||
it = find(topoOrder.begin(), topoOrder.end(), w.accept);
|
||||
if (it != topoOrder.begin() + 1) {
|
||||
DEBUG_PRINTF("repositioning accept\n");
|
||||
assert(it != topoOrder.end());
|
||||
topoOrder.erase(it);
|
||||
if (in_degree(w.accept, acyclic_g) != 0) {
|
||||
topoOrder.insert(topoOrder.begin() + 1, w.accept);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void liftSinks(const AcyclicGraph &acyclic_g, vector<NFAVertex> &topoOrder) {
|
||||
ue2::unordered_set<NFAVertex> sinks;
|
||||
for (auto v : vertices_range(acyclic_g)) {
|
||||
if (is_special(v, acyclic_g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isLeafNode(v, acyclic_g)) {
|
||||
DEBUG_PRINTF("sink found %u\n", acyclic_g[v].index);
|
||||
sinks.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
if (sinks.empty()) {
|
||||
DEBUG_PRINTF("no sinks found\n");
|
||||
return;
|
||||
}
|
||||
|
||||
bool changed;
|
||||
do {
|
||||
DEBUG_PRINTF("look\n");
|
||||
changed = false;
|
||||
for (auto v : vertices_range(acyclic_g)) {
|
||||
if (is_special(v, acyclic_g) || contains(sinks, v)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, acyclic_g)) {
|
||||
if (!contains(sinks, w)) {
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("sink found %u\n", acyclic_g[v].index);
|
||||
sinks.insert(v);
|
||||
changed = true;
|
||||
next:;
|
||||
}
|
||||
} while (changed);
|
||||
|
||||
for (auto ri = topoOrder.rbegin() + 1; ri != topoOrder.rend(); ++ri) {
|
||||
if (!contains(sinks, *ri)) {
|
||||
continue;
|
||||
}
|
||||
NFAVertex s = *ri;
|
||||
DEBUG_PRINTF("handling sink %u\n", acyclic_g[s].index);
|
||||
ue2::unordered_set<NFAVertex> parents;
|
||||
for (const auto &e : in_edges_range(s, acyclic_g)) {
|
||||
parents.insert(source(e, acyclic_g));
|
||||
}
|
||||
|
||||
/* vertex has no children not reachable on a back edge, bubble the
|
||||
* vertex up the topo order to be near its parents */
|
||||
vector<NFAVertex>::reverse_iterator rj = ri;
|
||||
--rj;
|
||||
while (rj != topoOrder.rbegin() && !contains(parents, *rj)) {
|
||||
/* sink is in rj + 1 */
|
||||
assert(*(rj + 1) == s);
|
||||
DEBUG_PRINTF("lifting\n");
|
||||
using std::swap;
|
||||
swap(*rj, *(rj + 1));
|
||||
--rj;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Build a reverse topo ordering (with only the specials that are in use). We
|
||||
* also want to ensure vertices which only lead to back edges are placed near
|
||||
* their parents. */
|
||||
static
|
||||
vector<NFAVertex> buildTopoOrder(const NGHolder &w,
|
||||
const AcyclicGraph &acyclic_g,
|
||||
vector<boost::default_color_type> &colours) {
|
||||
vector<NFAVertex> topoOrder;
|
||||
|
||||
topological_sort(
|
||||
acyclic_g, back_inserter(topoOrder),
|
||||
color_map(make_iterator_property_map(
|
||||
colours.begin(), get(&NFAGraphVertexProps::index, acyclic_g))));
|
||||
|
||||
reorderSpecials(w, acyclic_g, topoOrder);
|
||||
|
||||
if (topoOrder.empty()) {
|
||||
return topoOrder;
|
||||
}
|
||||
|
||||
liftSinks(acyclic_g, topoOrder);
|
||||
|
||||
DEBUG_PRINTF("TOPO ORDER\n");
|
||||
for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) {
|
||||
DEBUG_PRINTF("[%u]\n", acyclic_g[*ri].index);
|
||||
}
|
||||
DEBUG_PRINTF("----------\n");
|
||||
|
||||
return topoOrder;
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
const u32 numVertices = num_vertices(g);
|
||||
DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices);
|
||||
|
||||
vector<boost::default_color_type> colours(numVertices);
|
||||
|
||||
// Build an acyclic graph for this NGHolder.
|
||||
BackEdgeSet deadEdges;
|
||||
depth_first_search(
|
||||
g.g, visitor(BackEdges<BackEdgeSet>(deadEdges))
|
||||
.root_vertex(g.start)
|
||||
.color_map(make_iterator_property_map(
|
||||
colours.begin(), get(&NFAGraphVertexProps::index, g.g))));
|
||||
|
||||
AcyclicFilter<BackEdgeSet> af(&deadEdges);
|
||||
AcyclicGraph acyclic_g(g.g, af);
|
||||
|
||||
// Build a (reverse) topological ordering.
|
||||
vector<NFAVertex> topoOrder = buildTopoOrder(g, acyclic_g, colours);
|
||||
|
||||
// Everybody starts in region 0.
|
||||
ue2::unordered_map<NFAVertex, u32> regions;
|
||||
regions.reserve(numVertices);
|
||||
for (auto v : vertices_range(g)) {
|
||||
regions.emplace(v, 0);
|
||||
}
|
||||
|
||||
findDagLeaders(g, acyclic_g, topoOrder, regions);
|
||||
mergeUnderBackEdges(g, topoOrder, deadEdges, regions);
|
||||
|
||||
return regions;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
219
src/nfagraph/ng_region.h
Normal file
219
src/nfagraph/ng_region.h
Normal file
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Region analysis and utility functions.
|
||||
*/
|
||||
|
||||
#ifndef NG_REGION_H
|
||||
#define NG_REGION_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Assign a region ID to every vertex in the graph. */
|
||||
ue2::unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g);
|
||||
|
||||
/** \brief True if vertices \p a and \p b are in the same region. */
|
||||
template <class Graph>
|
||||
bool inSameRegion(const Graph &g, NFAVertex a, NFAVertex b,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
assert(contains(region_map, a) && contains(region_map, b));
|
||||
|
||||
return region_map.at(a) == region_map.at(b) &&
|
||||
is_special(a, g) == is_special(b, g);
|
||||
}
|
||||
|
||||
/** \brief True if vertex \p b is in a later region than vertex \p a. */
|
||||
template <class Graph>
|
||||
bool inLaterRegion(const Graph &g, NFAVertex a, NFAVertex b,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
assert(contains(region_map, a) && contains(region_map, b));
|
||||
|
||||
u32 aa = g[a].index;
|
||||
u32 bb = g[b].index;
|
||||
|
||||
if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
|
||||
return true;
|
||||
}
|
||||
if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return region_map.at(a) < region_map.at(b);
|
||||
}
|
||||
|
||||
/** \brief True if vertex \p b is in an earlier region than vertex \p a. */
|
||||
template <class Graph>
|
||||
bool inEarlierRegion(const Graph &g, NFAVertex a, NFAVertex b,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
assert(contains(region_map, a) && contains(region_map, b));
|
||||
|
||||
u32 aa = g[a].index;
|
||||
u32 bb = g[b].index;
|
||||
|
||||
if (bb == NODE_START || bb == NODE_START_DOTSTAR) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (aa == NODE_START || aa == NODE_START_DOTSTAR) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bb == NODE_ACCEPT || bb == NODE_ACCEPT_EOD) {
|
||||
return false;
|
||||
}
|
||||
if (aa == NODE_ACCEPT || aa == NODE_ACCEPT_EOD) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return region_map.at(b) < region_map.at(a);
|
||||
}
|
||||
|
||||
/** \brief True if vertex \p v is an entry vertex for its region. */
|
||||
template <class Graph>
|
||||
bool isRegionEntry(const Graph &g, NFAVertex v,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
// Note that some graph types do not have inv_adjacent_vertices, so we must
|
||||
// use in_edges here.
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
if (!inSameRegion(g, v, source(e, g), region_map)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** \brief True if vertex \p v is an exit vertex for its region. */
|
||||
template <class Graph>
|
||||
bool isRegionExit(const Graph &g, NFAVertex v,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (!inSameRegion(g, v, w, region_map)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** \brief True if vertex \p v is in a region all on its own. */
|
||||
template <class Graph>
|
||||
bool isSingletonRegion(const Graph &g, NFAVertex v,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
auto u = source(e, g);
|
||||
if (u != v && inSameRegion(g, v, u, region_map)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto w : ue2::adjacent_vertices_range(u, g)) {
|
||||
if (w != v && inSameRegion(g, v, w, region_map)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w != v && inSameRegion(g, v, w, region_map)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const auto &e : in_edges_range(w, g)) {
|
||||
auto u = source(e, g);
|
||||
if (u != v && inSameRegion(g, v, u, region_map)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief True if the region containing vertex \p v is optional. The vertex \p v
|
||||
* should be a region leader.
|
||||
*/
|
||||
template <class Graph>
|
||||
bool isOptionalRegion(const Graph &g, NFAVertex v,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
assert(isRegionEntry(g, v, region_map));
|
||||
|
||||
DEBUG_PRINTF("check if r%u is optional (inspecting v%u)\n",
|
||||
region_map.at(v), g[v].index);
|
||||
|
||||
// Region zero is never optional.
|
||||
assert(contains(region_map, v));
|
||||
if (region_map.at(v) == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Optional if v has a predecessor in an earlier region that has a
|
||||
// successor in a later one.
|
||||
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
auto u = source(e, g);
|
||||
if (inSameRegion(g, v, u, region_map)) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF(" searching from u=%u\n", g[u].index);
|
||||
|
||||
assert(inEarlierRegion(g, v, u, region_map));
|
||||
|
||||
for (auto w : adjacent_vertices_range(u, g)) {
|
||||
DEBUG_PRINTF(" searching to w=%u\n", g[w].index);
|
||||
if (inLaterRegion(g, v, w, region_map)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
270
src/nfagraph/ng_region_redundancy.cpp
Normal file
270
src/nfagraph/ng_region_redundancy.cpp
Normal file
@@ -0,0 +1,270 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Region Redundancy optimisation pass.
|
||||
*
|
||||
* Identifies and removes entire regions that are adjacent to a cyclic state
|
||||
* with a superset of their character reachability.
|
||||
*/
|
||||
#include "ng_region_redundancy.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_region.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
/** Precalculated information about a region. */
|
||||
struct RegionInfo {
|
||||
NFAVertex entry; //!< arbitrary entry vertex
|
||||
CharReach cr; //!< union of the reach of all vertices in region
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region,
|
||||
const flat_set<ReportID> &expected_reports,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
/* TODO: only check vertices connected to accept/acceptEOD */
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (region != region_map.at(v)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_any_accept(v, g)) {
|
||||
return true; /* encountering an actual special in the region is
|
||||
* possible but definitely unexpected */
|
||||
}
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (is_any_accept(w, g) && g[v].reports != expected_reports) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
void processCyclicStateForward(NGHolder &h, NFAVertex cyc,
|
||||
const map<u32, RegionInfo> &info,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
set<u32> &deadRegions) {
|
||||
u32 region = region_map.at(cyc);
|
||||
CharReach cr = h[cyc].char_reach;
|
||||
auto reports = h[cyc].reports;
|
||||
|
||||
DEBUG_PRINTF("going forward from %u/%u\n", h[cyc].index,
|
||||
region);
|
||||
|
||||
map<u32, RegionInfo>::const_iterator it;
|
||||
while ((it = info.find(++region)) != info.end()) {
|
||||
NFAVertex v = it->second.entry;
|
||||
const CharReach ®ion_cr = it->second.cr;
|
||||
assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
|
||||
DEBUG_PRINTF("checking %u\n", h[v].index);
|
||||
|
||||
if (!region_cr.isSubsetOf(cr)) {
|
||||
DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
|
||||
break;
|
||||
}
|
||||
|
||||
if (isOptionalRegion(h, v, region_map)
|
||||
&& !regionHasUnexpectedAccept(h, region, reports, region_map)) {
|
||||
DEBUG_PRINTF("cyclic state %u leads to optional region leader %u\n",
|
||||
h[cyc].index, h[v].index);
|
||||
deadRegions.insert(region);
|
||||
} else if (isSingletonRegion(h, v, region_map)) {
|
||||
/* we can use this region as straw and suck in optional regions on
|
||||
* the other side. This allows us to transform /a{n,m}/ to /a{n}/ */
|
||||
cr = h[v].char_reach;
|
||||
reports = h[v].reports;
|
||||
DEBUG_PRINTF("%u is straw\n", region);
|
||||
assert(cr.isSubsetOf(h[cyc].char_reach));
|
||||
if (hasSelfLoop(v, h)) {
|
||||
DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
|
||||
remove_edge(v, v, h);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void processCyclicStateReverse(NGHolder &h, NFAVertex cyc,
|
||||
const map<u32, RegionInfo> &info,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
set<u32> &deadRegions) {
|
||||
u32 region = region_map.at(cyc);
|
||||
CharReach cr = h[cyc].char_reach;
|
||||
auto reports = h[cyc].reports;
|
||||
|
||||
DEBUG_PRINTF("going back from %u/%u\n", h[cyc].index, region);
|
||||
|
||||
map<u32, RegionInfo>::const_iterator it;
|
||||
while ((it = info.find(--region)) != info.end()) {
|
||||
NFAVertex v = it->second.entry;
|
||||
const CharReach ®ion_cr = it->second.cr;
|
||||
assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
|
||||
DEBUG_PRINTF("checking %u\n", h[v].index);
|
||||
|
||||
if (!region_cr.isSubsetOf(cr)) {
|
||||
DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
|
||||
break;
|
||||
}
|
||||
|
||||
if (isOptionalRegion(h, v, region_map)
|
||||
&& !regionHasUnexpectedAccept(h, region, reports, region_map)) {
|
||||
DEBUG_PRINTF("cyclic state %u trails optional region leader %u\n",
|
||||
h[cyc].index, h[v].index);
|
||||
deadRegions.insert(region);
|
||||
} else if (isSingletonRegion(h, v, region_map)) {
|
||||
/* we can use this region as a reverse straw and suck in optional
|
||||
* regions on the other side. This allows us to transform
|
||||
* /^a?a{n}.*b/ to /^a{n}.*b/ */
|
||||
cr = h[v].char_reach;
|
||||
reports = h[v].reports;
|
||||
DEBUG_PRINTF("%u is straw\n", region);
|
||||
assert(cr.isSubsetOf(h[cyc].char_reach));
|
||||
if (hasSelfLoop(v, h)) {
|
||||
DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
|
||||
remove_edge(v, v, h);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!region) { // No wrapping
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map) {
|
||||
map<u32, RegionInfo> info;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 region = region_map.at(v);
|
||||
if (is_special(v, g) || region == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
RegionInfo &ri = info[region];
|
||||
ri.cr |= g[v].char_reach;
|
||||
if (isRegionEntry(g, v, region_map)) {
|
||||
ri.entry = v;
|
||||
}
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
static
|
||||
bool hasNoStartAnchoring(const NGHolder &h) {
|
||||
for (auto v : adjacent_vertices_range(h.start, h)) {
|
||||
if (!edge(h.startDs, v, h).second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void removeRegionRedundancy(NGHolder &g, som_type som) {
|
||||
auto region_map = assignRegions(g);
|
||||
|
||||
map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map);
|
||||
|
||||
set<u32> deadRegions;
|
||||
|
||||
/* if we are not tracking som, we can treat sds as a cyclic region if there
|
||||
* is no anchoring */
|
||||
if (!som && hasNoStartAnchoring(g)) {
|
||||
processCyclicStateForward(g, g.startDs, info, region_map, deadRegions);
|
||||
}
|
||||
|
||||
// Walk the region mapping, looking for regions that consist of a single
|
||||
// cyclic node.
|
||||
|
||||
for (const auto &m : info) {
|
||||
// Must not have already been removed
|
||||
if (contains(deadRegions, m.first)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
NFAVertex v = m.second.entry;
|
||||
/* require a singleton cyclic region */
|
||||
if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (som && is_virtual_start(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
processCyclicStateForward(g, v, info, region_map, deadRegions);
|
||||
processCyclicStateReverse(g, v, info, region_map, deadRegions);
|
||||
}
|
||||
|
||||
if (deadRegions.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<NFAVertex> dead;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
u32 region = region_map.at(v);
|
||||
if (contains(deadRegions, region)) {
|
||||
dead.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
if (!dead.empty()) {
|
||||
DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n",
|
||||
dead.size(), deadRegions.size());
|
||||
remove_vertices(dead, g);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
49
src/nfagraph/ng_region_redundancy.h
Normal file
49
src/nfagraph/ng_region_redundancy.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Region Redundancy optimisation pass.
|
||||
*
|
||||
* Identifies and removes entire regions that are adjacent to a cyclic state
|
||||
* with a superset of their character reachability.
|
||||
*/
|
||||
|
||||
#ifndef NG_REGION_REDUNDANCY_H
|
||||
#define NG_REGION_REDUNDANCY_H
|
||||
|
||||
#include "som/som.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
void removeRegionRedundancy(NGHolder &g, som_type som);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
2531
src/nfagraph/ng_repeat.cpp
Normal file
2531
src/nfagraph/ng_repeat.cpp
Normal file
File diff suppressed because it is too large
Load Diff
160
src/nfagraph/ng_repeat.h
Normal file
160
src/nfagraph/ng_repeat.h
Normal file
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Bounded repeat analysis.
|
||||
*/
|
||||
|
||||
#ifndef NG_REPEAT_H
|
||||
#define NG_REPEAT_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/repeat_internal.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
struct Grey;
|
||||
|
||||
/**
|
||||
* \brief Everything you need to know about a bounded repeat that we have
|
||||
* transformed.
|
||||
*/
|
||||
struct BoundedRepeatData {
|
||||
BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z,
|
||||
u32 minPeriod_in, NFAVertex cyc, NFAVertex pos,
|
||||
const std::vector<NFAVertex> &tug_in)
|
||||
: type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in),
|
||||
cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {}
|
||||
|
||||
BoundedRepeatData() = delete; // no default construction allowed.
|
||||
|
||||
enum RepeatType type; //!< selected type based on bounds and structure
|
||||
depth repeatMin; //!< minimum repeat bound
|
||||
depth repeatMax; //!< maximum repeat bound
|
||||
u32 minPeriod; //!< min trigger period
|
||||
NFAVertex cyclic; //!< cyclic vertex representing repeat in graph
|
||||
NFAVertex pos_trigger; //!< positive trigger vertex
|
||||
std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Run the bounded repeat analysis and transform the graph where
|
||||
* bounded repeats are found.
|
||||
*
|
||||
* \param h
|
||||
* Graph to operate on.
|
||||
* \param rm
|
||||
* ReportManager, or nullptr if the graph's reports are internal (e.g. for
|
||||
* Rose use).
|
||||
* \param fixed_depth_tops
|
||||
* Map of top to possible trigger depth.
|
||||
* \param triggers
|
||||
* Map of top to the vector of triggers (i.e. preceding literals/masks)
|
||||
* \param repeats
|
||||
* Repeat info is filled in for caller here.
|
||||
* \param streaming
|
||||
* True if we're in streaming mode.
|
||||
* \param simple_model_selection
|
||||
* Don't perform complex (and slow) model selection analysis, e.g.
|
||||
* determining whether the repeat is sole entry.
|
||||
* \param grey
|
||||
* Grey box object.
|
||||
* \param reformed_start_ds
|
||||
* If supplied, this will be set to true if the graph was optimised for a
|
||||
* leading first repeat, resulting in the output graph having no self-loop
|
||||
* on startDs.
|
||||
*/
|
||||
void analyseRepeats(NGHolder &h, const ReportManager *rm,
|
||||
const std::map<u32, u32> &fixed_depth_tops,
|
||||
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
||||
std::vector<BoundedRepeatData> *repeats, bool streaming,
|
||||
bool simple_model_selection, const Grey &grey,
|
||||
bool *reformed_start_ds = nullptr);
|
||||
|
||||
/**
|
||||
* \brief Information on repeats in a holder, returned from \ref findRepeats.
|
||||
*/
|
||||
struct GraphRepeatInfo {
|
||||
depth repeatMin; /**< minimum bound */
|
||||
depth repeatMax; /**< effective max bound */
|
||||
std::vector<NFAVertex> vertices; /**< vertices involved in repeat */
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Provides information on repeats in the graph.
|
||||
*/
|
||||
void findRepeats(const NGHolder &h, u32 minRepeatVertices,
|
||||
std::vector<GraphRepeatInfo> *repeats_out);
|
||||
|
||||
struct PureRepeat {
|
||||
CharReach reach;
|
||||
DepthMinMax bounds;
|
||||
ue2::flat_set<ReportID> reports;
|
||||
|
||||
bool operator==(const PureRepeat &a) const {
|
||||
return reach == a.reach && bounds == a.bounds && reports == a.reports;
|
||||
}
|
||||
|
||||
bool operator!=(const PureRepeat &a) const { return !(*this == a); }
|
||||
|
||||
bool operator<(const PureRepeat &a) const {
|
||||
if (reach != a.reach) {
|
||||
return reach < a.reach;
|
||||
}
|
||||
if (bounds != a.bounds) {
|
||||
return bounds < a.bounds;
|
||||
}
|
||||
return reports < a.reports;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Returns true and fills the given PureRepeat structure if the graph is
|
||||
* wholly a repeat over a single character class.
|
||||
*
|
||||
* For example, something like:
|
||||
*
|
||||
* /^[a-z]{10,20}/
|
||||
*
|
||||
* - Note: graph must not use SDS or EOD.
|
||||
* - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper
|
||||
* bound on the repeat.
|
||||
*/
|
||||
bool isPureRepeat(const NGHolder &h, PureRepeat &r);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_REPEAT_H
|
||||
86
src/nfagraph/ng_reports.cpp
Normal file
86
src/nfagraph/ng_reports.cpp
Normal file
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Utility functions for working with Report ID sets.
|
||||
*/
|
||||
#include "ng_reports.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "util/container.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/report_manager.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Returns the set of all reports in the graph. */
|
||||
set<ReportID> all_reports(const NGHolder &g) {
|
||||
set<ReportID> rv;
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
insert(&rv, g[v].reports);
|
||||
}
|
||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
insert(&rv, g[v].reports);
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
/** True if *all* reports in the graph are exhaustible. */
|
||||
bool can_exhaust(const NGHolder &g, const ReportManager &rm) {
|
||||
for (ReportID report_id : all_reports(g)) {
|
||||
if (rm.getReport(report_id).ekey == INVALID_EKEY) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Derive a maximum offset for the graph from the max_offset values of its
|
||||
* reports. Returns MAX_OFFSET for inf. */
|
||||
u64a findMaxOffset(const NGHolder &g, const ReportManager &rm) {
|
||||
u64a maxOffset = 0;
|
||||
set<ReportID> reports = all_reports(g);
|
||||
assert(!reports.empty());
|
||||
|
||||
for (ReportID report_id : all_reports(g)) {
|
||||
const Report &ir = rm.getReport(report_id);
|
||||
if (ir.hasBounds()) {
|
||||
maxOffset = max(maxOffset, ir.maxOffset);
|
||||
} else {
|
||||
return MAX_OFFSET;
|
||||
}
|
||||
}
|
||||
return maxOffset;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
57
src/nfagraph/ng_reports.h
Normal file
57
src/nfagraph/ng_reports.h
Normal file
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Utility functions for working with Report ID sets.
|
||||
*/
|
||||
|
||||
#ifndef NG_REPORTS_H
|
||||
#define NG_REPORTS_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
/** Returns the set of all reports in the graph. */
|
||||
std::set<ReportID> all_reports(const NGHolder &g);
|
||||
|
||||
/** True if *all* reports in the graph are exhaustible. */
|
||||
bool can_exhaust(const NGHolder &g, const ReportManager &rm);
|
||||
|
||||
/** Derive a maximum offset for the graph from the max_offset values of its
|
||||
* reports. Returns MAX_OFFSET for inf. */
|
||||
u64a findMaxOffset(const NGHolder &g, const ReportManager &rm);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_REPORTS_H
|
||||
340
src/nfagraph/ng_restructuring.cpp
Normal file
340
src/nfagraph/ng_restructuring.cpp
Normal file
@@ -0,0 +1,340 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief State numbering and late graph restructuring code.
|
||||
*/
|
||||
#include "ng_restructuring.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
||||
#include <boost/graph/transpose_graph.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Connect the start vertex to each of the vertices in \p tops. This is useful
|
||||
* temporarily for when we need to run a graph algorithm that expects a single
|
||||
* source vertex. */
|
||||
void wireStartToTops(NGHolder &g, const map<u32, NFAVertex> &tops,
|
||||
vector<NFAEdge> &topEdges) {
|
||||
for (const auto &top : tops) {
|
||||
NFAVertex v = top.second;
|
||||
assert(!isLeafNode(v, g));
|
||||
|
||||
const NFAEdge &e = add_edge(g.start, v, g).first;
|
||||
topEdges.push_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void getStateOrdering(NGHolder &g, const map<u32, NFAVertex> &tops,
|
||||
vector<NFAVertex> &ordering) {
|
||||
// First, wire up our "tops" to start so that we have a single source,
|
||||
// which will give a nicer topo order.
|
||||
vector<NFAEdge> topEdges;
|
||||
wireStartToTops(g, tops, topEdges);
|
||||
|
||||
renumberGraphVertices(g);
|
||||
|
||||
vector<NFAVertex> temp = getTopoOrdering(g);
|
||||
|
||||
remove_edges(topEdges, g);
|
||||
|
||||
// Move {start, startDs} to the end, so they'll be first when we reverse
|
||||
// the ordering.
|
||||
temp.erase(remove(temp.begin(), temp.end(), g.startDs));
|
||||
temp.erase(remove(temp.begin(), temp.end(), g.start));
|
||||
temp.push_back(g.startDs);
|
||||
temp.push_back(g.start);
|
||||
|
||||
// Walk ordering, remove vertices that shouldn't be participating in state
|
||||
// numbering, such as accepts.
|
||||
for (auto v : temp) {
|
||||
if (is_any_accept(v, g)) {
|
||||
continue; // accepts don't need states
|
||||
}
|
||||
|
||||
ordering.push_back(v);
|
||||
}
|
||||
|
||||
// Output of topo order was in reverse.
|
||||
reverse(ordering.begin(), ordering.end());
|
||||
}
|
||||
|
||||
// Returns the number of states.
|
||||
static
|
||||
ue2::unordered_map<NFAVertex, u32>
|
||||
getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) {
|
||||
ue2::unordered_map<NFAVertex, u32> states;
|
||||
for (const auto &v : vertices_range(h)) {
|
||||
states[v] = NO_STATE;
|
||||
}
|
||||
|
||||
u32 stateNum = 0;
|
||||
for (auto v : ordering) {
|
||||
DEBUG_PRINTF("assigning state num %u to vertex %u\n", stateNum,
|
||||
h[v].index);
|
||||
states[v] = stateNum++;
|
||||
}
|
||||
return states;
|
||||
}
|
||||
|
||||
/** UE-1648: A state with a single successor that happens to be a predecessor
|
||||
* can be given any ol' state ID by the topological ordering, so we sink it
|
||||
* next to its pred. This enables better merging. */
|
||||
static
|
||||
void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) {
|
||||
deque<pair<NFAVertex, NFAVertex>> candidates;
|
||||
|
||||
auto start = ordering.begin();
|
||||
for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
|
||||
NFAVertex v = *it;
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (out_degree(v, g) == 1) {
|
||||
NFAVertex t = *(adjacent_vertices(v, g).first);
|
||||
if (v == t) {
|
||||
continue;
|
||||
}
|
||||
if (edge(t, v, g).second && find(start, it, t) != ite) {
|
||||
candidates.push_back(make_pair(v, t));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &cand : candidates) {
|
||||
NFAVertex v = cand.first, u = cand.second;
|
||||
auto u_it = find(ordering.begin(), ordering.end(), u);
|
||||
auto v_it = find(ordering.begin(), ordering.end(), v);
|
||||
|
||||
// Only move candidates backwards in the ordering, and only move them
|
||||
// when necessary.
|
||||
if (u_it >= v_it || distance(u_it, v_it) == 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("moving vertex %u next to %u\n",
|
||||
g[v].index, g[u].index);
|
||||
|
||||
ordering.erase(v_it);
|
||||
ordering.insert(++u_it, v);
|
||||
}
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32>
|
||||
numberStates(NGHolder &h, const map<u32, NFAVertex> &tops) {
|
||||
DEBUG_PRINTF("numbering states for holder %p\n", &h);
|
||||
|
||||
vector<NFAVertex> ordering;
|
||||
getStateOrdering(h, tops, ordering);
|
||||
|
||||
optimiseTightLoops(h, ordering);
|
||||
|
||||
ue2::unordered_map<NFAVertex, u32> states = getStateIndices(h, ordering);
|
||||
|
||||
return states;
|
||||
}
|
||||
|
||||
u32 countStates(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
bool addTops) {
|
||||
if (state_ids.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
u32 max_state = 0;
|
||||
for (const auto &m : state_ids) {
|
||||
if (m.second != NO_STATE) {
|
||||
max_state = max(m.second, max_state);
|
||||
}
|
||||
}
|
||||
|
||||
u32 num_states = max_state + 1;
|
||||
|
||||
assert(contains(state_ids, g.start));
|
||||
if (addTops && state_ids.at(g.start) != NO_STATE) {
|
||||
num_states--;
|
||||
set<u32> tops;
|
||||
for (auto e : out_edges_range(g.start, g)) {
|
||||
tops.insert(g[e].top);
|
||||
}
|
||||
num_states += tops.size();
|
||||
}
|
||||
|
||||
return num_states;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if start leads to all of startDs's proper successors or if
|
||||
* start has no successors other than startDs.
|
||||
*/
|
||||
static
|
||||
bool startIsRedundant(const NGHolder &g) {
|
||||
set<NFAVertex> start, startDs;
|
||||
|
||||
for (const auto &e : out_edges_range(g.start, g)) {
|
||||
NFAVertex v = target(e, g);
|
||||
if (v == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
start.insert(v);
|
||||
}
|
||||
|
||||
for (const auto &e : out_edges_range(g.startDs, g)) {
|
||||
NFAVertex v = target(e, g);
|
||||
if (v == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
startDs.insert(v);
|
||||
}
|
||||
|
||||
// Trivial case: start has no successors other than startDs.
|
||||
if (start.empty()) {
|
||||
DEBUG_PRINTF("start has no out-edges other than to startDs\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (start != startDs) {
|
||||
DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** One final, FINAL optimisation. Drop either start or startDs if it's unused
|
||||
* in this graph. We leave this until this late because having both vertices in
|
||||
* the graph, with fixed state indices, is useful for merging and other
|
||||
* analyses. */
|
||||
void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states) {
|
||||
u32 adj = 0;
|
||||
|
||||
if (startIsRedundant(g)) {
|
||||
DEBUG_PRINTF("dropping unused start\n");
|
||||
states[g.start] = NO_STATE;
|
||||
adj++;
|
||||
}
|
||||
|
||||
if (proper_out_degree(g.startDs, g) == 0) {
|
||||
DEBUG_PRINTF("dropping unused startDs\n");
|
||||
states[g.startDs] = NO_STATE;
|
||||
adj++;
|
||||
}
|
||||
|
||||
if (!adj) {
|
||||
DEBUG_PRINTF("both start and startDs must remain\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// We have removed one or both of the starts. Walk the non-special vertices
|
||||
// in the graph with state indices assigned to them and subtract
|
||||
// adj from all of them.
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 &state = states[v]; // note ref
|
||||
if (state == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
if (is_any_start(v, g)) {
|
||||
assert(state <= 1);
|
||||
state = 0; // one start remains
|
||||
} else {
|
||||
assert(!is_special(v, g));
|
||||
assert(state >= adj);
|
||||
state -= adj;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
|
||||
* accepts. */
|
||||
void reverseHolder(const NGHolder &g_in, NGHolder &g) {
|
||||
// Make the BGL do the grunt work.
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> vertexMap;
|
||||
boost::transpose_graph(g_in.g, g.g,
|
||||
orig_to_copy(boost::make_assoc_property_map(vertexMap)).
|
||||
vertex_index_map(get(&NFAGraphVertexProps::index, g_in.g)));
|
||||
|
||||
// The transpose_graph operation will have created extra copies of our
|
||||
// specials. We have to rewire their neighbours to the 'real' specials and
|
||||
// delete them.
|
||||
NFAVertex start = vertexMap[g_in.acceptEod];
|
||||
NFAVertex startDs = vertexMap[g_in.accept];
|
||||
NFAVertex accept = vertexMap[g_in.startDs];
|
||||
NFAVertex acceptEod = vertexMap[g_in.start];
|
||||
|
||||
// Successors of starts.
|
||||
for (const auto &e : out_edges_range(start, g)) {
|
||||
NFAVertex v = target(e, g);
|
||||
add_edge(g.start, v, g[e], g);
|
||||
}
|
||||
for (const auto &e : out_edges_range(startDs, g)) {
|
||||
NFAVertex v = target(e, g);
|
||||
add_edge(g.startDs, v, g[e], g);
|
||||
}
|
||||
|
||||
// Predecessors of accepts.
|
||||
for (const auto &e : in_edges_range(accept, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
add_edge(u, g.accept, g[e], g);
|
||||
}
|
||||
for (const auto &e : in_edges_range(acceptEod, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
add_edge(u, g.acceptEod, g[e], g);
|
||||
}
|
||||
|
||||
// Remove our impostors.
|
||||
clear_vertex(start, g);
|
||||
remove_vertex(start, g);
|
||||
clear_vertex(startDs, g);
|
||||
remove_vertex(startDs, g);
|
||||
clear_vertex(accept, g);
|
||||
remove_vertex(accept, g);
|
||||
clear_vertex(acceptEod, g);
|
||||
remove_vertex(acceptEod, g);
|
||||
|
||||
// Renumber so that g's properties (number of vertices, edges) are
|
||||
// accurate.
|
||||
g.renumberVertices();
|
||||
g.renumberEdges();
|
||||
|
||||
assert(num_vertices(g) == num_vertices(g_in));
|
||||
assert(num_edges(g) == num_edges(g_in));
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
86
src/nfagraph/ng_restructuring.h
Normal file
86
src/nfagraph/ng_restructuring.h
Normal file
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief State numbering and late graph restructuring code.
|
||||
|
||||
*/
|
||||
#ifndef NG_RESTRUCTURING_H
|
||||
#define NG_RESTRUCTURING_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
|
||||
* accepts. */
|
||||
void reverseHolder(const NGHolder &g, NGHolder &out);
|
||||
|
||||
/** Connect the start vertex to each of the vertices in \p tops. This is useful
|
||||
* temporarily for when we need to run a graph algorithm that expects a single
|
||||
* source vertex. */
|
||||
void wireStartToTops(NGHolder &g, const std::map<u32, NFAVertex> &tops,
|
||||
std::vector<NFAEdge> &topEdges);
|
||||
|
||||
/**
|
||||
* \brief Special state index value meaning that the vertex will not
|
||||
* participate in an (NFA/DFA/etc) implementation.
|
||||
*/
|
||||
static constexpr u32 NO_STATE = ~0;
|
||||
|
||||
/**
|
||||
* \brief Gives each participating vertex in the graph a unique state index.
|
||||
*/
|
||||
ue2::unordered_map<NFAVertex, u32>
|
||||
numberStates(NGHolder &h,
|
||||
const std::map<u32, NFAVertex> &tops = std::map<u32, NFAVertex>{});
|
||||
|
||||
/**
|
||||
* \brief Counts the number of states (vertices with state indices) in the
|
||||
* graph.
|
||||
*
|
||||
* If addTops is true, also accounts for states that will be constructed for
|
||||
* each unique top.
|
||||
*/
|
||||
u32 countStates(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
||||
bool addTops = true);
|
||||
|
||||
/** Optimisation: drop unnecessary start states. */
|
||||
void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
297
src/nfagraph/ng_revacc.cpp
Normal file
297
src/nfagraph/ng_revacc.cpp
Normal file
@@ -0,0 +1,297 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Reverse acceleration analysis.
|
||||
*/
|
||||
#include "ng_revacc.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/accel.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool isPseudoNoCaseChar(const CharReach &cr) {
|
||||
return cr.count() == 2 && !(cr.find_first() & 32)
|
||||
&& cr.test(cr.find_first() | 32);
|
||||
}
|
||||
|
||||
static
|
||||
bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth,
|
||||
NFA *nfa) {
|
||||
DEBUG_PRINTF("pure eod triggered pattern\n");
|
||||
|
||||
/* 2 char */
|
||||
for (u8 nocase = 0; nocase < 2; nocase++) {
|
||||
for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
|
||||
const CharReach &cr = rev_info.acceptEodReach[i];
|
||||
const CharReach &cr2 = rev_info.acceptEodReach[i - 1];
|
||||
|
||||
if (!nocase && cr.count() == 1 && cr2.count() == 1) {
|
||||
assert(i < minWidth);
|
||||
if (i >= minWidth) {
|
||||
goto single;
|
||||
}
|
||||
nfa->rAccelType = ACCEL_RDEOD;
|
||||
nfa->rAccelData.array[0] = (u8)cr.find_first();
|
||||
nfa->rAccelData.array[1] = (u8)cr2.find_first();
|
||||
nfa->rAccelOffset = i + 1;
|
||||
DEBUG_PRINTF("raccel eod x2 %u %04hx\n",
|
||||
nfa->rAccelOffset, nfa->rAccelData.dc);
|
||||
return true;
|
||||
} else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
|
||||
&& (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
|
||||
assert(i < minWidth);
|
||||
if (i >= minWidth) {
|
||||
goto single;
|
||||
}
|
||||
nfa->rAccelType = ACCEL_RDEOD_NOCASE;
|
||||
nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; /* uppercase */
|
||||
nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
|
||||
nfa->rAccelOffset = i + 1;
|
||||
DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n",
|
||||
nfa->rAccelOffset, nfa->rAccelData.dc);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
single:
|
||||
/* 1 char */
|
||||
for (u8 nocase = 0; nocase < 2; nocase++) {
|
||||
for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
|
||||
const CharReach &cr = rev_info.acceptEodReach[i];
|
||||
if (!nocase && cr.count() == 1) {
|
||||
assert(i < minWidth);
|
||||
if (i >= minWidth) {
|
||||
return false;
|
||||
}
|
||||
nfa->rAccelType = ACCEL_REOD;
|
||||
nfa->rAccelData.c = (u8) cr.find_first();
|
||||
nfa->rAccelOffset = i + 1;
|
||||
DEBUG_PRINTF("raccel eod %u %02hhx\n",
|
||||
nfa->rAccelOffset, nfa->rAccelData.c);
|
||||
return true;
|
||||
} else if (nocase && isPseudoNoCaseChar(cr)) {
|
||||
assert(i < minWidth);
|
||||
if (i >= minWidth) {
|
||||
return false;
|
||||
}
|
||||
nfa->rAccelType = ACCEL_REOD_NOCASE;
|
||||
nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */
|
||||
nfa->rAccelOffset = i + 1;
|
||||
DEBUG_PRINTF("raccel nc eod %u %02hhx\n",
|
||||
nfa->rAccelOffset, nfa->rAccelData.c);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool lookForFloatingSchemes(const RevAccInfo &rev_info,
|
||||
const u32 minWidth, NFA *nfa) {
|
||||
/* 2 char */
|
||||
for (u8 nocase = 0; nocase < 2; nocase++) {
|
||||
for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
|
||||
CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
|
||||
CharReach cr2 = rev_info.acceptEodReach[i - 1]
|
||||
| rev_info.acceptReach[i - 1];
|
||||
if (!nocase && cr.count() == 1 && cr2.count() == 1) {
|
||||
assert((u8)(i - 1) < minWidth);
|
||||
if (i > minWidth) {
|
||||
goto single;
|
||||
}
|
||||
nfa->rAccelType = ACCEL_RDVERM;
|
||||
nfa->rAccelData.array[0] = (u8)cr.find_first();
|
||||
nfa->rAccelData.array[1] = (u8)cr2.find_first();
|
||||
nfa->rAccelOffset = i;
|
||||
DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n",
|
||||
nfa->rAccelOffset, nfa->rAccelData.array[0],
|
||||
nfa->rAccelData.array[1]);
|
||||
return true;
|
||||
} else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
|
||||
&& (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
|
||||
assert((u8)(i - 1) < minWidth);
|
||||
if (i > minWidth) {
|
||||
goto single;
|
||||
}
|
||||
nfa->rAccelType = ACCEL_RDVERM_NOCASE;
|
||||
nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR;
|
||||
nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
|
||||
nfa->rAccelOffset = i;
|
||||
DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n",
|
||||
nfa->rAccelOffset, nfa->rAccelData.array[0],
|
||||
nfa->rAccelData.array[1]);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
single:
|
||||
/* 1 char */
|
||||
for (u8 nocase = 0; nocase < 2; nocase++) {
|
||||
for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
|
||||
CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
|
||||
if (!nocase && cr.count() == 1) {
|
||||
assert(i < minWidth);
|
||||
if (i >= minWidth) {
|
||||
return false;
|
||||
}
|
||||
nfa->rAccelType = ACCEL_RVERM;
|
||||
nfa->rAccelData.c = (u8)cr.find_first();
|
||||
nfa->rAccelOffset = i + 1;
|
||||
DEBUG_PRINTF("raccel verm %u %02hhx\n", nfa->rAccelOffset,
|
||||
nfa->rAccelData.c);
|
||||
return true;
|
||||
} else if (nocase && isPseudoNoCaseChar(cr)) {
|
||||
assert(i < minWidth);
|
||||
if (i >= minWidth) {
|
||||
return false;
|
||||
}
|
||||
nfa->rAccelType = ACCEL_RVERM_NOCASE;
|
||||
nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */
|
||||
nfa->rAccelOffset = i + 1;
|
||||
DEBUG_PRINTF("raccel nc verm %u %02hhx\n", nfa->rAccelOffset,
|
||||
nfa->rAccelData.c);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info,
|
||||
u32 min_width, bool eod_only) {
|
||||
assert(nfa);
|
||||
|
||||
if (!rev_info.valid) {
|
||||
return;
|
||||
}
|
||||
|
||||
nfa->rAccelOffset = 1;
|
||||
|
||||
assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any());
|
||||
if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) {
|
||||
DEBUG_PRINTF("expected path to accept\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (rev_info.acceptReach[0].none()) {
|
||||
/* eod only */
|
||||
|
||||
if (lookForEodSchemes(rev_info, min_width, nfa)) {
|
||||
assert(nfa->rAccelOffset <= min_width);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (eod_only) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!lookForFloatingSchemes(rev_info, min_width, nfa)) {
|
||||
DEBUG_PRINTF("failed to accelerate\n");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal,
|
||||
vector<CharReach> *reach) {
|
||||
set<NFAVertex> vset;
|
||||
|
||||
for (auto v : inv_adjacent_vertices_range(terminal, g)) {
|
||||
if (!is_special(v, g)) {
|
||||
vset.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) {
|
||||
set<NFAVertex> next;
|
||||
|
||||
for (auto v : vset) {
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
(*reach)[offset] |= cr;
|
||||
|
||||
DEBUG_PRINTF("off %u adding %zu to %zu\n", offset, cr.count(),
|
||||
(*reach)[offset].count());
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == g.start || u == g.startDs) {
|
||||
/* kill all subsequent offsets by setting to dot, setting
|
||||
* to dot is in someways not accurate as there may be no
|
||||
* data at all but neither case can be accelerated */
|
||||
for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) {
|
||||
(*reach)[i].setall();
|
||||
}
|
||||
break;
|
||||
} else if (!is_special(u, g)) {
|
||||
next.insert(u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
swap(vset, next);
|
||||
}
|
||||
}
|
||||
|
||||
void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) {
|
||||
DEBUG_PRINTF("pop rev info\n");
|
||||
populateRevAccelInfo(g, g.accept, &rai.acceptReach);
|
||||
populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach);
|
||||
rai.valid = true;
|
||||
}
|
||||
|
||||
void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) {
|
||||
DEBUG_PRINTF("merging ra\n");
|
||||
|
||||
dest.valid &= vic.valid;
|
||||
|
||||
for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
|
||||
dest.acceptReach[i] |= vic.acceptReach[i];
|
||||
dest.acceptEodReach[i] |= vic.acceptEodReach[i];
|
||||
}
|
||||
}
|
||||
|
||||
RevAccInfo::RevAccInfo(void)
|
||||
: valid(false), acceptReach(MAX_RACCEL_OFFSET),
|
||||
acceptEodReach(MAX_RACCEL_OFFSET) {}
|
||||
|
||||
} // namespace ue2
|
||||
65
src/nfagraph/ng_revacc.h
Normal file
65
src/nfagraph/ng_revacc.h
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Reverse acceleration analysis.
|
||||
*/
|
||||
|
||||
#ifndef NG_REVACC_H
|
||||
#define NG_REVACC_H
|
||||
|
||||
#include "util/charreach.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
#define MAX_RACCEL_OFFSET 16
|
||||
|
||||
struct RevAccInfo {
|
||||
RevAccInfo(void);
|
||||
bool valid;
|
||||
std::vector<CharReach> acceptReach; /**< bytes which can appear n
|
||||
* bytes before a match */
|
||||
std::vector<CharReach> acceptEodReach; /**< bytes which can appear n
|
||||
* bytes before eod match */
|
||||
};
|
||||
|
||||
void buildReverseAcceleration(struct NFA *nfa, const RevAccInfo &rev_info,
|
||||
u32 min_width, bool eod_only = false);
|
||||
|
||||
void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g);
|
||||
void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
3036
src/nfagraph/ng_rose.cpp
Normal file
3036
src/nfagraph/ng_rose.cpp
Normal file
File diff suppressed because it is too large
Load Diff
70
src/nfagraph/ng_rose.h
Normal file
70
src/nfagraph/ng_rose.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose construction from NGHolder.
|
||||
*/
|
||||
|
||||
#ifndef NG_ROSE_H
|
||||
#define NG_ROSE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
class RoseBuild;
|
||||
|
||||
struct CompileContext;
|
||||
struct ue2_literal;
|
||||
|
||||
/** \brief Attempt to consume the entire pattern in graph \a h with Rose.
|
||||
* Returns true if successful. */
|
||||
bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter,
|
||||
const CompileContext &cc);
|
||||
|
||||
/** \brief Attempt to consume the entire pattern in graph \a h with Rose.
|
||||
* This is the last attempt to handle a pattern before we resort to an outfix.
|
||||
* Returns true if successful. */
|
||||
bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter,
|
||||
const CompileContext &cc);
|
||||
|
||||
/** \brief True if the pattern in \a h is consumable by Rose. This function
|
||||
* may be conservative (return false even if supported) for efficiency. */
|
||||
bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter,
|
||||
const CompileContext &cc);
|
||||
|
||||
/** \brief Returns the delay or MO_INVALID_IDX if the graph cannot match with
|
||||
* the trailing literal. */
|
||||
u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
|
||||
u32 max_delay, bool overhang_ok = true);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_ROSE_H
|
||||
93
src/nfagraph/ng_sep.cpp
Normal file
93
src/nfagraph/ng_sep.cpp
Normal file
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Short Exhaustible Passthroughs.
|
||||
*
|
||||
* Analysis code for determining whether a graph should be treated specially
|
||||
* because it is short and contains exhaustible reports; typically we turn
|
||||
* these into outfixes rather than risk them becoming Rose literals.
|
||||
*
|
||||
* For example, the pattern:
|
||||
*
|
||||
* /[a-f]/H
|
||||
*
|
||||
* ... is far better suited to becoming a small outfix that generates one match
|
||||
* and goes dead than being split into six one-byte Rose literals that end up
|
||||
* in the literal matcher.
|
||||
*/
|
||||
#include "ng_sep.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_reports.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool checkFromVertex(const NGHolder &g, NFAVertex start) {
|
||||
for (auto v : adjacent_vertices_range(start, g)) {
|
||||
if (v == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(!is_special(v, g)); /* should not be vacuous */
|
||||
|
||||
if (!edge(g.startDs, v, g).second) { /* only floating starts */
|
||||
return false;
|
||||
} else if (out_degree(v, g) == 1
|
||||
&& edge(v, g.accept, g).second) { /* only floating end */
|
||||
; /* possible sep */
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey) {
|
||||
if (!grey.mergeSEP || !can_exhaust(g, rm)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!checkFromVertex(g, g.start) || !checkFromVertex(g, g.startDs)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(out_degree(g.start, g) || proper_out_degree(g.startDs, g));
|
||||
|
||||
DEBUG_PRINTF("graph is an SEP\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
46
src/nfagraph/ng_sep.h
Normal file
46
src/nfagraph/ng_sep.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Short Exhaustible Passthroughs.
|
||||
*/
|
||||
|
||||
#ifndef NG_SEP_H
|
||||
#define NG_SEP_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct Grey;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
bool isSEP(const NGHolder &g, const ReportManager &rm, const Grey &grey);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
245
src/nfagraph/ng_small_literal_set.cpp
Normal file
245
src/nfagraph/ng_small_literal_set.cpp
Normal file
@@ -0,0 +1,245 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose construction from NGHolder for cases representing small literal
|
||||
* sets.
|
||||
*/
|
||||
#include "ng_small_literal_set.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ng_util.h"
|
||||
#include "ng_holder.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::adaptors::map_keys;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief The maximum number of literals to accept per pattern. */
|
||||
static const size_t MAX_LITERAL_SET_SIZE = 30;
|
||||
|
||||
/**
|
||||
* \brief The maximum number of literals to accept per pattern where at least
|
||||
* one is weak (has period < MIN_STRONG_PERIOD).
|
||||
*/
|
||||
static const size_t MAX_WEAK_LITERAL_SET_SIZE = 20;
|
||||
|
||||
/**
|
||||
* \brief The minimum string period to consider a literal "strong" (and not
|
||||
* apply the weak size limit).
|
||||
*/
|
||||
static const size_t MIN_STRONG_PERIOD = 3;
|
||||
|
||||
namespace {
|
||||
|
||||
struct sls_literal {
|
||||
bool anchored;
|
||||
bool eod;
|
||||
ue2_literal s;
|
||||
|
||||
explicit sls_literal(bool a) : anchored(a), eod(false) {}
|
||||
|
||||
sls_literal append(char c, bool nocase) const {
|
||||
sls_literal rv(anchored);
|
||||
rv.s = s;
|
||||
rv.s.push_back(ue2_literal::elem(c, nocase));
|
||||
|
||||
return rv;
|
||||
}
|
||||
};
|
||||
|
||||
static
|
||||
bool operator<(const sls_literal &a, const sls_literal &b) {
|
||||
ORDER_CHECK(anchored);
|
||||
ORDER_CHECK(eod);
|
||||
ORDER_CHECK(s);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
bool checkLongMixedSensitivityLiterals(
|
||||
const map<sls_literal, ue2::flat_set<ReportID>> &literals) {
|
||||
const size_t len = MAX_MASK2_WIDTH;
|
||||
|
||||
for (const sls_literal &lit : literals | map_keys) {
|
||||
if (mixed_sensitivity(lit.s) && lit.s.length() > len) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool findLiterals(const NGHolder &g,
|
||||
map<sls_literal, ue2::flat_set<ReportID>> *literals) {
|
||||
vector<NFAVertex> order = getTopoOrdering(g);
|
||||
|
||||
vector<set<sls_literal>> built(num_vertices(g));
|
||||
|
||||
for (auto it = order.rbegin(); it != order.rend(); ++it) {
|
||||
NFAVertex v = *it;
|
||||
set<sls_literal> &out = built[g[v].index];
|
||||
|
||||
assert(out.empty());
|
||||
if (v == g.start) {
|
||||
out.insert(sls_literal(true));
|
||||
continue;
|
||||
} else if (v == g.startDs) {
|
||||
out.insert(sls_literal(false));
|
||||
continue;
|
||||
}
|
||||
|
||||
bool eod = v == g.acceptEod;
|
||||
bool accept = v == g.accept || v == g.acceptEod;
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == g.accept) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (u == g.start && edge(g.startDs, v, g).second) {
|
||||
/* floating start states may have connections to start and
|
||||
* startDs - don't create duplicate anchored literals */
|
||||
DEBUG_PRINTF("skipping as floating\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
set<sls_literal> &in = built[g[u].index];
|
||||
assert(!in.empty());
|
||||
|
||||
for (const sls_literal &lit : in) {
|
||||
if (accept) {
|
||||
sls_literal accept_lit = lit; // copy
|
||||
accept_lit.eod = eod;
|
||||
insert(&(*literals)[accept_lit], g[u].reports);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t c = cr.find_first(); c != cr.npos;
|
||||
c = cr.find_next(c)) {
|
||||
bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
|
||||
&& cr.test(mytolower(c));
|
||||
|
||||
if (nocase && (char)c == mytolower(c)) {
|
||||
continue; /* uppercase already handled us */
|
||||
}
|
||||
|
||||
out.insert(lit.append((u8)c, nocase));
|
||||
|
||||
if (out.size() + literals->size() > MAX_LITERAL_SET_SIZE) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
size_t min_period(const map<sls_literal, ue2::flat_set<ReportID>> &literals) {
|
||||
size_t rv = SIZE_MAX;
|
||||
|
||||
for (const sls_literal &lit : literals | map_keys) {
|
||||
rv = min(rv, minStringPeriod(lit.s));
|
||||
}
|
||||
DEBUG_PRINTF("min period %zu\n", rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
// If this component is just a small set of literals and can be handled by
|
||||
// Rose, feed it directly into rose.
|
||||
bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &g,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowSmallLiteralSet) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isAcyclic(g)) {
|
||||
/* literal sets would typically be acyclic... */
|
||||
DEBUG_PRINTF("not acyclic\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
map<sls_literal, ue2::flat_set<ReportID>> literals;
|
||||
if (!findLiterals(g, &literals)) {
|
||||
DEBUG_PRINTF(":(\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(!literals.empty());
|
||||
|
||||
if (literals.size() > MAX_LITERAL_SET_SIZE) {
|
||||
/* try a mask instead */
|
||||
DEBUG_PRINTF("too many literals\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t period = min_period(literals);
|
||||
if (period < MIN_STRONG_PERIOD &&
|
||||
literals.size() > MAX_WEAK_LITERAL_SET_SIZE) {
|
||||
DEBUG_PRINTF("too many literals with weak period\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!checkLongMixedSensitivityLiterals(literals)) {
|
||||
DEBUG_PRINTF("long mixed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("adding %zu literals\n", literals.size());
|
||||
for (const auto &m : literals) {
|
||||
const sls_literal &lit = m.first;
|
||||
const auto &reports = m.second;
|
||||
rose.add(lit.anchored, lit.eod, lit.s, reports);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
50
src/nfagraph/ng_small_literal_set.h
Normal file
50
src/nfagraph/ng_small_literal_set.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose construction from NGHolder for cases representing small literal
|
||||
* sets.
|
||||
*/
|
||||
|
||||
#ifndef NG_SMALL_LITERAL_SET_H
|
||||
#define NG_SMALL_LITERAL_SET_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseBuild;
|
||||
class NGHolder;
|
||||
struct CompileContext;
|
||||
|
||||
/** \brief If the graph represents a small set of literals, feed them directly
|
||||
* to rose. Returns true if successful. */
|
||||
bool handleSmallLiteralSets(RoseBuild &rose, const NGHolder &h,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_SMALL_LITERAL_SET_H
|
||||
3108
src/nfagraph/ng_som.cpp
Normal file
3108
src/nfagraph/ng_som.cpp
Normal file
File diff suppressed because it is too large
Load Diff
77
src/nfagraph/ng_som.h
Normal file
77
src/nfagraph/ng_som.h
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief SOM ("Start of Match") analysis.
|
||||
*/
|
||||
|
||||
#ifndef NG_SOM_H
|
||||
#define NG_SOM_H
|
||||
|
||||
#include "som/som.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NG;
|
||||
class NGHolder;
|
||||
class NGWrapper;
|
||||
struct Grey;
|
||||
|
||||
enum sombe_rv {
|
||||
SOMBE_FAIL,
|
||||
SOMBE_HANDLED_INTERNAL,
|
||||
SOMBE_HANDLED_ALL
|
||||
};
|
||||
|
||||
/** \brief Perform SOM analysis on the given graph.
|
||||
*
|
||||
* This function will replace report IDs and mutate the graph, then return
|
||||
* SOMBE_HANDLED_INTERNAL if SOM can be established and the full graph still
|
||||
* needs to be handled (rose, etc).
|
||||
*
|
||||
* Returns SOMBE_HANDLED_ALL if everything has been done and the pattern has
|
||||
* been handled in all its glory.
|
||||
*
|
||||
* Returns SOMBE_FAIL and does not mutate the graph if SOM cannot be
|
||||
* established.
|
||||
*
|
||||
* May throw a "Pattern too large" exception if prefixes of the
|
||||
* pattern are too large to compile.
|
||||
*/
|
||||
sombe_rv doSom(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id,
|
||||
som_type som);
|
||||
|
||||
/** Returns SOMBE_FAIL (and the original graph) if SOM cannot be established.
|
||||
* May also throw pattern too large if prefixes of the pattern are too large to
|
||||
* compile. */
|
||||
sombe_rv doSomWithHaig(NG &ng, NGHolder &h, const NGWrapper &w, u32 comp_id,
|
||||
som_type som);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_SOM_H
|
||||
198
src/nfagraph/ng_som_add_redundancy.cpp
Normal file
198
src/nfagraph/ng_som_add_redundancy.cpp
Normal file
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Add redundancy to graph to assist in SOM analysis.
|
||||
*
|
||||
* Currently patterns of the form:
|
||||
*
|
||||
* /(GET|POST).*foo/
|
||||
*
|
||||
* baffle our SOM analysis as the T's get merged into one by our graph
|
||||
* reductions and they lose the fixed depth property. One way to solve this is
|
||||
* to tell the T vertex to go fork itself before we do the main SOM pass.
|
||||
*
|
||||
* Overall plan:
|
||||
*
|
||||
* 1. build a topo ordering
|
||||
* 2. walk vertices in topo order
|
||||
* 3. fix up vertices where possible
|
||||
* 4. go home
|
||||
*
|
||||
* Vertex fix up plan:
|
||||
*
|
||||
* 1. consider depth of vertex
|
||||
* - if vertex is at fixed depth continue to next vertex
|
||||
* - if vertex can be at an unbounded depth continue to next vertex
|
||||
* - if vertex has a pred which is not a fixed depth continue to next vertex
|
||||
* 2. group preds by their depth
|
||||
* 3. for each group:
|
||||
* - create a clone of the vertex (vertex props and out edges)
|
||||
* - create edges from each vertex in the group to the clone
|
||||
* - work out the depth for the clone
|
||||
* 4. blow away original vertex
|
||||
*
|
||||
* Originally in UE-1862.
|
||||
*/
|
||||
#include "ng_som_add_redundancy.h"
|
||||
|
||||
#include "ng_dump.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Hard limit on the maximum number of new vertices to create. */
|
||||
static const size_t MAX_NEW_VERTICES = 32;
|
||||
|
||||
static
|
||||
const DepthMinMax &getDepth(NFAVertex v, const NGHolder &g,
|
||||
const vector<DepthMinMax> &depths) {
|
||||
return depths.at(g[v].index);
|
||||
}
|
||||
|
||||
static
|
||||
bool hasFloatingPred(NFAVertex v, const NGHolder &g,
|
||||
const vector<DepthMinMax> &depths) {
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
const DepthMinMax &d = getDepth(u, g, depths);
|
||||
if (d.min != d.max) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool forkVertex(NFAVertex v, NGHolder &g, vector<DepthMinMax> &depths,
|
||||
set<NFAVertex> &dead, size_t *numNewVertices) {
|
||||
map<depth, vector<NFAEdge>> predGroups;
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
const DepthMinMax &d = getDepth(source(e, g), g, depths);
|
||||
assert(d.min == d.max);
|
||||
predGroups[d.min].push_back(e);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("forking vertex with %zu pred groups\n", predGroups.size());
|
||||
|
||||
if (*numNewVertices + predGroups.size() > MAX_NEW_VERTICES) {
|
||||
return false;
|
||||
}
|
||||
*numNewVertices += predGroups.size();
|
||||
|
||||
for (auto &group : predGroups) {
|
||||
const depth &predDepth = group.first;
|
||||
const vector<NFAEdge> &preds = group.second;
|
||||
|
||||
// Clone v for this depth with all its associated out-edges.
|
||||
u32 clone_idx = depths.size(); // next index to be used
|
||||
NFAVertex clone = add_vertex(g[v], g);
|
||||
depth clone_depth = predDepth + 1;
|
||||
g[clone].index = clone_idx;
|
||||
depths.push_back(DepthMinMax(clone_depth, clone_depth));
|
||||
DEBUG_PRINTF("cloned vertex %u with depth %s\n", clone_idx,
|
||||
clone_depth.str().c_str());
|
||||
|
||||
// Add copies of the out-edges from v.
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
add_edge(clone, target(e, g), g[e], g);
|
||||
}
|
||||
|
||||
// Add in-edges from preds in this group.
|
||||
for (const auto &e : preds) {
|
||||
add_edge(source(e, g), clone, g[e], g);
|
||||
}
|
||||
}
|
||||
|
||||
clear_vertex(v, g);
|
||||
dead.insert(v);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool addSomRedundancy(NGHolder &g, vector<DepthMinMax> &depths) {
|
||||
DEBUG_PRINTF("entry\n");
|
||||
|
||||
const vector<NFAVertex> ordering = getTopoOrdering(g);
|
||||
|
||||
set<NFAVertex> dead;
|
||||
size_t numNewVertices = 0;
|
||||
|
||||
for (auto it = ordering.rbegin(), ite = ordering.rend(); it != ite; ++it) {
|
||||
NFAVertex v = *it;
|
||||
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
if (!hasGreaterInDegree(0, v, g)) {
|
||||
continue; // unreachable, probably killed
|
||||
}
|
||||
|
||||
const DepthMinMax &d = getDepth(v, g, depths);
|
||||
|
||||
DEBUG_PRINTF("vertex %u has depths %s\n", g[v].index,
|
||||
d.str().c_str());
|
||||
|
||||
if (d.min == d.max) {
|
||||
DEBUG_PRINTF("fixed depth\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (d.max.is_unreachable()) {
|
||||
DEBUG_PRINTF("unbounded depth\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (hasFloatingPred(v, g, depths)) {
|
||||
DEBUG_PRINTF("has floating pred\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!forkVertex(v, g, depths, dead, &numNewVertices)) {
|
||||
DEBUG_PRINTF("new vertex limit reached\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(numNewVertices <= MAX_NEW_VERTICES);
|
||||
|
||||
if (dead.empty()) {
|
||||
return false; // no changes made to the graph
|
||||
}
|
||||
|
||||
remove_vertices(dead, g);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
47
src/nfagraph/ng_som_add_redundancy.h
Normal file
47
src/nfagraph/ng_som_add_redundancy.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Add redundancy to graph to assist in SOM analysis.
|
||||
*/
|
||||
|
||||
#ifndef NG_SOM_ADD_REDUNDANCY_H
|
||||
#define NG_SOM_ADD_REDUNDANCY_H
|
||||
|
||||
#include "util/depth.h"
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
bool addSomRedundancy(NGHolder &g, std::vector<DepthMinMax> &depths);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
358
src/nfagraph/ng_som_util.cpp
Normal file
358
src/nfagraph/ng_som_util.cpp
Normal file
@@ -0,0 +1,358 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Utility functions related to SOM ("Start of Match").
|
||||
*/
|
||||
#include "ng_som_util.h"
|
||||
|
||||
#include "ng_depth.h"
|
||||
#include "ng_execute.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void wireSuccessorsToStart(NGHolder &g, NFAVertex u) {
|
||||
for (auto v : adjacent_vertices_range(u, g)) {
|
||||
add_edge_if_not_present(g.start, v, g);
|
||||
}
|
||||
}
|
||||
|
||||
vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g_orig) {
|
||||
// We operate on a temporary copy of the original graph here, so we don't
|
||||
// have to mutate the original.
|
||||
NGHolder g;
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> vmap; // vertex in g_orig to vertex in g
|
||||
cloneHolder(g, g_orig, &vmap);
|
||||
|
||||
vector<NFAVertex> vstarts;
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_virtual_start(v, g)) {
|
||||
vstarts.push_back(v);
|
||||
}
|
||||
}
|
||||
vstarts.push_back(g.startDs);
|
||||
|
||||
// wire the successors of every virtual start or startDs to g.start.
|
||||
for (auto v : vstarts) {
|
||||
wireSuccessorsToStart(g, v);
|
||||
}
|
||||
|
||||
// drop the in-edges of every virtual start so that they don't participate
|
||||
// in the depth calculation.
|
||||
for (auto v : vstarts) {
|
||||
clear_in_edges(v, g);
|
||||
}
|
||||
|
||||
//dumpGraph("som_depth.dot", g.g);
|
||||
|
||||
vector<DepthMinMax> temp_depths; // numbered by vertex index in g
|
||||
calcDepthsFrom(g, g.start, temp_depths);
|
||||
|
||||
// Transfer depths, indexed by vertex index in g_orig.
|
||||
vector<DepthMinMax> depths(num_vertices(g_orig));
|
||||
|
||||
for (auto v_orig : vertices_range(g_orig)) {
|
||||
assert(contains(vmap, v_orig));
|
||||
NFAVertex v_new = vmap[v_orig];
|
||||
|
||||
u32 orig_idx = g_orig[v_orig].index;
|
||||
|
||||
DepthMinMax &d = depths.at(orig_idx);
|
||||
|
||||
if (v_orig == g_orig.startDs || is_virtual_start(v_orig, g_orig)) {
|
||||
// StartDs and virtual starts always have zero depth.
|
||||
d = DepthMinMax(0, 0);
|
||||
} else {
|
||||
u32 new_idx = g[v_new].index;
|
||||
d = temp_depths.at(new_idx);
|
||||
}
|
||||
}
|
||||
|
||||
return depths;
|
||||
}
|
||||
|
||||
bool firstMatchIsFirst(const NGHolder &p) {
|
||||
/* If the first match (by end offset) is not the first match (by start
|
||||
* offset) then we can't create a lock after it.
|
||||
*
|
||||
* Consider: 4009:/(foobar|ob).*bugger/s
|
||||
*
|
||||
* We don't care about races on the last byte as they can be resolved easily
|
||||
* at runtime /(foobar|obar).*hi/
|
||||
*
|
||||
* It should be obvious we don't care about one match being a prefix
|
||||
* of another as they share the same start offset.
|
||||
*
|
||||
* Therefore, the case were we cannot establish that the som does not
|
||||
* regress is when there exists s1 and s2 in the language of p and s2 is a
|
||||
* proper infix of s1.
|
||||
*
|
||||
* It is tempting to add the further restriction that there does not exist a
|
||||
* prefix of s1 that is in the language of p (as in which case we would
|
||||
* presume, the lock has already been set). However, we have no way of
|
||||
* knowing if the lock can be cleared by some characters, and if so, if it
|
||||
* is still set. TODO: if we knew the lock's escapes where we could verify
|
||||
* that the rest of s1 does not clear the lock. (1)
|
||||
*/
|
||||
|
||||
DEBUG_PRINTF("entry\n");
|
||||
|
||||
/* If there are any big cycles throw up our hands in despair */
|
||||
if (hasBigCycles(p)) {
|
||||
DEBUG_PRINTF("fail, big cycles\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
set<NFAVertex> states;
|
||||
/* turn on all states (except starts - avoid suffix matches) */
|
||||
/* If we were doing (1) we would also except states leading to accepts -
|
||||
avoid prefix matches */
|
||||
for (auto v : vertices_range(p)) {
|
||||
assert(!is_virtual_start(v, p));
|
||||
if (!is_special(v, p)) {
|
||||
DEBUG_PRINTF("turning on %u\n", p[v].index);
|
||||
states.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
/* run the prefix the main graph */
|
||||
execute_graph(p, p, &states);
|
||||
|
||||
for (auto v : states) {
|
||||
/* need to check if this vertex may represent an infix match - ie
|
||||
* it does not have an edge to accept. */
|
||||
DEBUG_PRINTF("check %u\n", p[v].index);
|
||||
if (!edge(v, p.accept, p).second) {
|
||||
DEBUG_PRINTF("fail %u\n", p[v].index);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("done first is first check\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
smgb_cache &cache) {
|
||||
/* Need to ensure all matches of the graph g up to u contain no infixes
|
||||
* which are also matches of the graph to u.
|
||||
*
|
||||
* This is basically the same as firstMatchIsFirst except we g is not
|
||||
* always a dag. As we haven't gotten around to writing an execute_graph
|
||||
* that operates on general graphs, we take some (hopefully) conservative
|
||||
* short cuts.
|
||||
*
|
||||
* Note: if the u can be jumped we will take jump edges
|
||||
* into account as a possibility of som going backwards
|
||||
*
|
||||
* TODO: write a generalised ng_execute_graph/make this less hacky
|
||||
*/
|
||||
assert(&g == &cache.g);
|
||||
if (contains(cache.smgb, u)) {
|
||||
return cache.smgb[u];
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("checking if som can go backwards on %u\n",
|
||||
g[u].index);
|
||||
|
||||
set<NFAEdge> be;
|
||||
BackEdges<set<NFAEdge>> backEdgeVisitor(be);
|
||||
depth_first_search(
|
||||
g.g, visitor(backEdgeVisitor)
|
||||
.root_vertex(g.start)
|
||||
.vertex_index_map(get(&NFAGraphVertexProps::index, g.g)));
|
||||
|
||||
bool rv;
|
||||
if (0) {
|
||||
exit:
|
||||
DEBUG_PRINTF("using cached result\n");
|
||||
cache.smgb[u] = rv;
|
||||
return rv;
|
||||
}
|
||||
|
||||
assert(contains(region_map, u));
|
||||
const u32 u_region = region_map.at(u);
|
||||
|
||||
for (const auto &e : be) {
|
||||
NFAVertex s = source(e, g);
|
||||
NFAVertex t = target(e, g);
|
||||
/* only need to worry about big cycles including/before u */
|
||||
DEBUG_PRINTF("back edge %u %u\n", g[s].index,
|
||||
g[t].index);
|
||||
if (s != t && region_map.at(s) <= u_region) {
|
||||
DEBUG_PRINTF("eek big cycle\n");
|
||||
rv = true; /* big cycle -> eek */
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
|
||||
NGHolder c_g;
|
||||
cloneHolder(c_g, g, &orig_to_copy);
|
||||
|
||||
for (NFAVertex v : vertices_range(g)) {
|
||||
if (!is_virtual_start(v, g)) {
|
||||
continue;
|
||||
}
|
||||
NFAVertex c_v = orig_to_copy[v];
|
||||
orig_to_copy[v] = c_g.startDs;
|
||||
for (NFAVertex c_w : adjacent_vertices_range(c_v, c_g)) {
|
||||
add_edge_if_not_present(c_g.startDs, c_w, c_g);
|
||||
}
|
||||
clear_vertex(c_v, c_g);
|
||||
}
|
||||
|
||||
NFAVertex c_u = orig_to_copy[u];
|
||||
clear_in_edges(c_g.acceptEod, c_g);
|
||||
add_edge(c_g.accept, c_g.acceptEod, c_g);
|
||||
clear_in_edges(c_g.accept, c_g);
|
||||
clear_out_edges(c_u, c_g);
|
||||
if (hasSelfLoop(u, g)) {
|
||||
add_edge(c_u, c_u, c_g);
|
||||
}
|
||||
add_edge(c_u, c_g.accept, c_g);
|
||||
|
||||
set<NFAVertex> u_succ;
|
||||
insert(&u_succ, adjacent_vertices(u, g));
|
||||
u_succ.erase(u);
|
||||
|
||||
for (auto t : inv_adjacent_vertices_range(u, g)) {
|
||||
if (t == u) {
|
||||
continue;
|
||||
}
|
||||
for (auto v : adjacent_vertices_range(t, g)) {
|
||||
if (contains(u_succ, v)) {
|
||||
add_edge(orig_to_copy[t], c_g.accept, c_g);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pruneUseless(c_g);
|
||||
|
||||
be.clear();
|
||||
depth_first_search(c_g.g, visitor(backEdgeVisitor).root_vertex(c_g.start).
|
||||
vertex_index_map(get(&NFAGraphVertexProps::index, c_g.g)));
|
||||
|
||||
for (const auto &e : be) {
|
||||
NFAVertex s = source(e, c_g);
|
||||
NFAVertex t = target(e, c_g);
|
||||
DEBUG_PRINTF("back edge %u %u\n", c_g[s].index, c_g[t].index);
|
||||
if (s != t) {
|
||||
assert(0);
|
||||
DEBUG_PRINTF("eek big cycle\n");
|
||||
rv = true; /* big cycle -> eek */
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("checking acyclic+selfloop graph\n");
|
||||
|
||||
rv = !firstMatchIsFirst(c_g);
|
||||
DEBUG_PRINTF("som may regress? %d\n", (int)rv);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
bool sentClearsTail(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
const NGHolder &sent, u32 last_head_region,
|
||||
u32 *bad_region) {
|
||||
/* if a subsequent match from the prefix clears the rest of the pattern
|
||||
* we can just keep track of the last match of the prefix.
|
||||
* To see if this property holds, we could:
|
||||
*
|
||||
* 1A: turn on all states in the tail and run all strings that may
|
||||
* match the prefix past the tail, if we are still in any states then
|
||||
* this property does not hold.
|
||||
*
|
||||
* 1B: we turn on the initial states of the tail and run any strings which
|
||||
* may finish any partial matches in the prefix and see if we end up with
|
||||
* anything which would also imply that this property does not hold.
|
||||
*
|
||||
* OR
|
||||
*
|
||||
* 2: we just turn everything and run the prefix inputs past it and see what
|
||||
* we are left with. I think that is equivalent to scheme 1 and is easier to
|
||||
* implement. TODO: ponder
|
||||
*
|
||||
* Anyway, we are going with scheme 2 until further notice.
|
||||
*/
|
||||
|
||||
u32 first_bad_region = ~0U;
|
||||
set<NFAVertex> states;
|
||||
/* turn on all states */
|
||||
DEBUG_PRINTF("region %u is cutover\n", last_head_region);
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (v != g.accept && v != g.acceptEod) {
|
||||
states.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
for (UNUSED auto v : states) {
|
||||
DEBUG_PRINTF("start state: %u\n", g[v].index);
|
||||
}
|
||||
|
||||
/* run the prefix the main graph */
|
||||
execute_graph(g, sent, &states);
|
||||
|
||||
/* .. and check if we are left with anything in the tail region */
|
||||
for (auto v : states) {
|
||||
if (v == g.start || v == g.startDs) {
|
||||
continue; /* not in tail */
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("v %u is still on\n", g[v].index);
|
||||
assert(v != g.accept && v != g.acceptEod); /* no cr */
|
||||
|
||||
assert(contains(region_map, v));
|
||||
const u32 v_region = region_map.at(v);
|
||||
if (v_region > last_head_region) {
|
||||
DEBUG_PRINTF("bailing, %u > %u\n", v_region, last_head_region);
|
||||
first_bad_region = min(first_bad_region, v_region);
|
||||
}
|
||||
}
|
||||
|
||||
if (first_bad_region != ~0U) {
|
||||
DEBUG_PRINTF("first bad region is %u\n", first_bad_region);
|
||||
*bad_region = first_bad_region;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
84
src/nfagraph/ng_som_util.h
Normal file
84
src/nfagraph/ng_som_util.h
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Utility functions related to SOM ("Start of Match").
|
||||
*/
|
||||
|
||||
#ifndef NG_SOM_UTIL_H
|
||||
#define NG_SOM_UTIL_H
|
||||
|
||||
#include "ng_util.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
/**
|
||||
* Returns min/max distance from start of match, index by vertex_id.
|
||||
*/
|
||||
std::vector<DepthMinMax> getDistancesFromSOM(const NGHolder &g);
|
||||
|
||||
/**
|
||||
* Returns true if the first match by end-offset must always be the first match
|
||||
* by start-offset.
|
||||
*/
|
||||
bool firstMatchIsFirst(const NGHolder &p);
|
||||
|
||||
struct smgb_cache : public mbsb_cache {
|
||||
explicit smgb_cache(const NGHolder &gg) : mbsb_cache(gg) {}
|
||||
std::map<NFAVertex, bool> smgb;
|
||||
};
|
||||
|
||||
bool somMayGoBackwards(NFAVertex u, const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
smgb_cache &cache);
|
||||
|
||||
/**
|
||||
* Returns true if matching 'sent' causes all tail states in the main graph \a
|
||||
* g to go dead. A tail state is any state with a region greater than
|
||||
* \a last_head_region.
|
||||
*
|
||||
* - The graph \a sent must be a "kinda-DAG", where the only back-edges present
|
||||
* are self-loops.
|
||||
* - If the result is false, \a bad_region will be updated with the smallest
|
||||
* region ID associated with a tail state that is still on.
|
||||
*/
|
||||
bool sentClearsTail(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
const NGHolder &sent, u32 last_head_region,
|
||||
u32 *bad_region);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_SOM_UTIL_H
|
||||
216
src/nfagraph/ng_split.cpp
Normal file
216
src/nfagraph/ng_split.cpp
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Functions for splitting NFAGraphs into LHS and RHS.
|
||||
*/
|
||||
#include "ng_split.h"
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void clearAccepts(NGHolder &g) {
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
g[v].reports.clear();
|
||||
}
|
||||
|
||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||
g[v].reports.clear();
|
||||
}
|
||||
|
||||
clear_in_edges(g.accept, g);
|
||||
clear_in_edges(g.acceptEod, g);
|
||||
add_edge(g.accept, g.acceptEod, g);
|
||||
}
|
||||
|
||||
static
|
||||
void filterSplitMap(const NGHolder &g, ue2::unordered_map<NFAVertex, NFAVertex> *out_map) {
|
||||
ue2::unordered_set<NFAVertex> verts;
|
||||
insert(&verts, vertices(g));
|
||||
ue2::unordered_map<NFAVertex, NFAVertex>::iterator it = out_map->begin();
|
||||
while (it != out_map->end()) {
|
||||
ue2::unordered_map<NFAVertex, NFAVertex>::iterator jt = it;
|
||||
++it;
|
||||
if (!contains(verts, jt->second)) {
|
||||
out_map->erase(jt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots,
|
||||
const vector<NFAVertex> &rhs_pivots,
|
||||
NGHolder *lhs, ue2::unordered_map<NFAVertex, NFAVertex> *lhs_map) {
|
||||
assert(lhs && lhs_map);
|
||||
|
||||
cloneHolder(*lhs, base, lhs_map);
|
||||
|
||||
clearAccepts(*lhs);
|
||||
|
||||
for (auto pivot : pivots) {
|
||||
DEBUG_PRINTF("pivot is %u lv %zu lm %zu\n", base[pivot].index,
|
||||
num_vertices(*lhs), lhs_map->size());
|
||||
assert(contains(*lhs_map, pivot));
|
||||
|
||||
for (auto v : rhs_pivots) {
|
||||
assert(contains(*lhs_map, v));
|
||||
remove_edge((*lhs_map)[pivot], (*lhs_map)[v], *lhs);
|
||||
}
|
||||
|
||||
(*lhs)[(*lhs_map)[pivot]].reports.insert(0);
|
||||
add_edge((*lhs_map)[pivot], lhs->accept, *lhs);
|
||||
}
|
||||
|
||||
pruneUseless(*lhs);
|
||||
filterSplitMap(*lhs, lhs_map);
|
||||
|
||||
switch (base.kind) {
|
||||
case NFA_PREFIX:
|
||||
case NFA_OUTFIX:
|
||||
lhs->kind = NFA_PREFIX;
|
||||
break;
|
||||
case NFA_INFIX:
|
||||
case NFA_SUFFIX:
|
||||
lhs->kind = NFA_INFIX;
|
||||
break;
|
||||
case NFA_REV_PREFIX:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void splitLHS(const NGHolder &base, NFAVertex pivot,
|
||||
NGHolder *lhs, ue2::unordered_map<NFAVertex, NFAVertex> *lhs_map) {
|
||||
vector<NFAVertex> pivots(1, pivot);
|
||||
vector<NFAVertex> rhs_pivots;
|
||||
insert(&rhs_pivots, rhs_pivots.end(), adjacent_vertices(pivot, base));
|
||||
splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
|
||||
}
|
||||
|
||||
void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
|
||||
NGHolder *rhs, ue2::unordered_map<NFAVertex, NFAVertex> *rhs_map) {
|
||||
assert(rhs && rhs_map);
|
||||
|
||||
cloneHolder(*rhs, base, rhs_map);
|
||||
|
||||
clear_out_edges(rhs->start, *rhs);
|
||||
clear_out_edges(rhs->startDs, *rhs);
|
||||
add_edge(rhs->start, rhs->startDs, *rhs);
|
||||
add_edge(rhs->startDs, rhs->startDs, *rhs);
|
||||
|
||||
for (auto pivot : pivots) {
|
||||
assert(contains(*rhs_map, pivot));
|
||||
add_edge(rhs->start, (*rhs_map)[pivot], *rhs);
|
||||
}
|
||||
pruneUseless(*rhs);
|
||||
filterSplitMap(*rhs, rhs_map);
|
||||
|
||||
switch (base.kind) {
|
||||
case NFA_PREFIX:
|
||||
case NFA_INFIX:
|
||||
rhs->kind = NFA_INFIX;
|
||||
break;
|
||||
case NFA_SUFFIX:
|
||||
case NFA_OUTFIX:
|
||||
rhs->kind = NFA_SUFFIX;
|
||||
break;
|
||||
case NFA_REV_PREFIX:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Fills \a succ with the common successors of the vertices in \a
|
||||
* pivots. */
|
||||
static
|
||||
void findCommonSuccessors(const NGHolder &g, const vector<NFAVertex> &pivots,
|
||||
vector<NFAVertex> &succ) {
|
||||
assert(!pivots.empty());
|
||||
|
||||
// Note: for determinism, we must sort our successor sets by vertex_index.
|
||||
set<NFAVertex, VertexIndexOrdering<NGHolder> > adj(g), adj_temp(g);
|
||||
|
||||
insert(&adj, adjacent_vertices(pivots.at(0), g));
|
||||
|
||||
for (auto it = pivots.begin() + 1, ite = pivots.end(); it != ite; ++it) {
|
||||
NFAVertex pivot = *it;
|
||||
adj_temp.clear();
|
||||
for (auto v : adjacent_vertices_range(pivot, g)) {
|
||||
if (contains(adj, v)) {
|
||||
adj_temp.insert(v);
|
||||
}
|
||||
}
|
||||
adj.swap(adj_temp);
|
||||
}
|
||||
|
||||
succ.insert(succ.end(), adj.begin(), adj.end());
|
||||
}
|
||||
|
||||
void splitGraph(const NGHolder &base, const vector<NFAVertex> &pivots,
|
||||
NGHolder *lhs, ue2::unordered_map<NFAVertex, NFAVertex> *lhs_map,
|
||||
NGHolder *rhs, ue2::unordered_map<NFAVertex, NFAVertex> *rhs_map) {
|
||||
DEBUG_PRINTF("splitting graph at %zu vertices\n", pivots.size());
|
||||
|
||||
assert(!has_parallel_edge(base));
|
||||
|
||||
/* RHS pivots are built from the common set of successors of pivots. */
|
||||
vector<NFAVertex> rhs_pivots;
|
||||
findCommonSuccessors(base, pivots, rhs_pivots);
|
||||
|
||||
/* generate lhs */
|
||||
splitLHS(base, pivots, rhs_pivots, lhs, lhs_map);
|
||||
|
||||
/* generate the rhs */
|
||||
splitRHS(base, rhs_pivots, rhs, rhs_map);
|
||||
|
||||
assert(!has_parallel_edge(*lhs));
|
||||
assert(!has_parallel_edge(*rhs));
|
||||
}
|
||||
|
||||
void splitGraph(const NGHolder &base, NFAVertex pivot,
|
||||
NGHolder *lhs, ue2::unordered_map<NFAVertex, NFAVertex> *lhs_map,
|
||||
NGHolder *rhs, ue2::unordered_map<NFAVertex, NFAVertex> *rhs_map) {
|
||||
vector<NFAVertex> pivots(1, pivot);
|
||||
splitGraph(base, pivots, lhs, lhs_map, rhs, rhs_map);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
74
src/nfagraph/ng_split.h
Normal file
74
src/nfagraph/ng_split.h
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Functions for splitting NFAGraphs into LHS and RHS.
|
||||
*/
|
||||
|
||||
#ifndef NG_SPLIT_H
|
||||
#define NG_SPLIT_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
|
||||
/** Note: pivot should be a vertex that dominates acceptEod. Treating 'in'
|
||||
* allocated to rhs if they are reachable from the pivot. Conversely, a vertex
|
||||
* is in the lhs if it is reachable from start without going through the
|
||||
* pivot. The pivot ends up in the LHS and any adjacent vertices in the RHS.
|
||||
*
|
||||
* When multiple split vertices are provided:
|
||||
* - RHS contains all vertices reachable from every pivot
|
||||
* - LHS contains all vertices which are reachable from start ignoring any
|
||||
* vertices which have an edge to every pivot
|
||||
*/
|
||||
void splitGraph(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> *lhs_map,
|
||||
NGHolder *rhs,
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> *rhs_map);
|
||||
|
||||
void splitGraph(const NGHolder &base, const std::vector<NFAVertex> &pivots,
|
||||
NGHolder *lhs,
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> *lhs_map,
|
||||
NGHolder *rhs,
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> *rhs_map);
|
||||
|
||||
void splitLHS(const NGHolder &base, NFAVertex pivot, NGHolder *lhs,
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> *lhs_map);
|
||||
|
||||
void splitRHS(const NGHolder &base, const std::vector<NFAVertex> &pivots,
|
||||
NGHolder *rhs, ue2::unordered_map<NFAVertex, NFAVertex> *rhs_map);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_SPLIT_H
|
||||
655
src/nfagraph/ng_squash.cpp
Normal file
655
src/nfagraph/ng_squash.cpp
Normal file
@@ -0,0 +1,655 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA graph state squashing analysis.
|
||||
*
|
||||
* The basic idea behind the state squashing is that when we are in a cyclic
|
||||
* state v there are certain other states which are completely irrelevant. This
|
||||
* is used primarily by the determinisation process to produce smaller DFAs by
|
||||
* not tracking irrelevant states. It's also used by the LimEx NFA model.
|
||||
*
|
||||
* Working out which states we can ignore mainly uses the post-dominator
|
||||
* analysis.
|
||||
*
|
||||
* ### Dot Squash Masks:
|
||||
*
|
||||
* The following vertices are added to the squash mask:
|
||||
* - (1) Any vertex post-dominated by the cyclic dot state
|
||||
* - (2) Any other vertex post-dominated by the cyclic dot state's successors
|
||||
* - (3) Any vertex post-dominated by a predecessor of the cyclic dot state -
|
||||
* provided the predecessor's successors are a subset of the cyclic state's
|
||||
* successors [For (3), the term successor also includes report information]
|
||||
*
|
||||
* (2) and (3) allow us to get squash masks from .* as well as .+
|
||||
*
|
||||
* The squash masks are not optimal especially in the case where there
|
||||
* alternations on both sides - for example in:
|
||||
*
|
||||
* /foo(bar|baz).*(abc|xyz)/s
|
||||
*
|
||||
* 'foo' is irrelevant once the dot star is hit, but it has no post-dominators
|
||||
* so isn't picked up ('bar' and 'baz' are picked up by (2)). We may be able to
|
||||
* do a more complete analysis based on cutting the graph and seeing which
|
||||
* vertices are unreachable but the current approach is quick and probably
|
||||
* adequate.
|
||||
*
|
||||
*
|
||||
* ### Non-Dot Squash Masks:
|
||||
*
|
||||
* As for dot states. However, if anything in a pdom tree falls outside the
|
||||
* character range of the cyclic state the whole pdom tree is ignored. Also when
|
||||
* considering the predecessor's pdom tree it is necessary to verify that the
|
||||
* predecessor's character reachability falls within that of the cyclic state.
|
||||
*
|
||||
* We could do better in this case by not throwing away the whole pdom tree -
|
||||
* however the bits which we can keep are not clear from the pdom tree of the
|
||||
* cyclic state - it probably can be based on the dom or pdom tree of the bad
|
||||
* vertex.
|
||||
*
|
||||
* An example of us doing badly is:
|
||||
*
|
||||
* /HTTP.*Referer[^\n]*google/s
|
||||
*
|
||||
* as '[\\n]*' doesn't get a squash mask at all due to .* but we should be able
|
||||
* to squash 'Referer'.
|
||||
*
|
||||
* ### Extension:
|
||||
*
|
||||
* If a state leads solely to a squashable state (or its immediate successors)
|
||||
* with the same reachability we can make this state a squash state of any of
|
||||
* the original states squashees which we postdominate. Could probably tighten
|
||||
* this up but it would require thought. May not need to keep the original
|
||||
* squasher around but that would also require thought.
|
||||
*
|
||||
* ### SOM Notes:
|
||||
*
|
||||
* If (left) start of match is required, it is illegal to squash any state which
|
||||
* may result in an early start of match reaching the squashing state.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "ng_squash.h"
|
||||
|
||||
#include "ng_dominators.h"
|
||||
#include "ng_dump.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_region.h"
|
||||
#include "ng_restructuring.h"
|
||||
#include "ng_som_util.h"
|
||||
#include "ng_util.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/report_manager.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <deque>
|
||||
#include <map>
|
||||
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
typedef ue2::unordered_map<NFAVertex,
|
||||
ue2::unordered_set<NFAVertex> > PostDomTree;
|
||||
|
||||
static
|
||||
void buildPDomTree(const NGHolder &g, PostDomTree &tree) {
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> postdominators =
|
||||
findPostDominators(g);
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
NFAVertex pdom = postdominators[v];
|
||||
if (pdom) {
|
||||
DEBUG_PRINTF("vertex %u -> %u\n", g[pdom].index,
|
||||
g[v].index);
|
||||
tree[pdom].insert(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a squash mask based on the pdom tree of v and the given char reach.
|
||||
* The built squash mask is a bit conservative for non-dot cases and could
|
||||
* be improved with a bit of thought.
|
||||
*/
|
||||
static
|
||||
void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
|
||||
const CharReach &cr, const NFAStateSet &init,
|
||||
const vector<NFAVertex> &vByIndex, const PostDomTree &tree,
|
||||
som_type som, const vector<DepthMinMax> &som_depths,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
smgb_cache &cache) {
|
||||
DEBUG_PRINTF("build base squash mask for vertex %u)\n",
|
||||
g[v].index);
|
||||
|
||||
vector<NFAVertex> q;
|
||||
|
||||
PostDomTree::const_iterator it = tree.find(v);
|
||||
if (it != tree.end()) {
|
||||
q.insert(q.end(), it->second.begin(), it->second.end());
|
||||
}
|
||||
|
||||
const u32 v_index = g[v].index;
|
||||
|
||||
while (!q.empty()) {
|
||||
NFAVertex u = q.back();
|
||||
q.pop_back();
|
||||
const CharReach &cru = g[u].char_reach;
|
||||
|
||||
if ((cru & ~cr).any()) {
|
||||
/* bail: bad cr on vertex u */
|
||||
/* TODO: this could be better
|
||||
*
|
||||
* we still need to ensure that we record any paths leading to u.
|
||||
* Hence all vertices R which can reach u must be excluded from the
|
||||
* squash mask. Note: R != pdom(u) and there may exist an x in (R -
|
||||
* pdom(u)) which is in pdom(y) where y is in q. Clear ?
|
||||
*/
|
||||
mask.set();
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 u_index = g[u].index;
|
||||
|
||||
if (som) {
|
||||
/* We cannot add a state u to the squash mask of v if it may have an
|
||||
* earlier start of match offset. ie for us to add a state u to v
|
||||
* maxSomDist(u) <= minSomDist(v)
|
||||
*/
|
||||
const depth &max_som_dist_u = som_depths[u_index].max;
|
||||
const depth &min_som_dist_v = som_depths[v_index].min;
|
||||
|
||||
if (max_som_dist_u.is_infinite()) {
|
||||
/* it is hard to tell due to the INF if u can actually store an
|
||||
* earlier SOM than w (state we are building the squash mask
|
||||
* for) - need to think more deeply
|
||||
*/
|
||||
|
||||
if (mustBeSetBefore(u, v, g, cache)
|
||||
&& !somMayGoBackwards(u, g, region_map, cache)) {
|
||||
DEBUG_PRINTF("u %u v %u\n", u_index, v_index);
|
||||
goto squash_ok;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_som_dist_u > min_som_dist_v) {
|
||||
/* u can't be squashed as it may be storing an earlier SOM */
|
||||
goto add_children_to_queue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
squash_ok:
|
||||
mask.set(u_index);
|
||||
DEBUG_PRINTF("pdom'ed %u\n", u_index);
|
||||
add_children_to_queue:
|
||||
it = tree.find(u);
|
||||
if (it != tree.end()) {
|
||||
q.insert(q.end(), it->second.begin(), it->second.end());
|
||||
}
|
||||
}
|
||||
|
||||
if (cr.all()) {
|
||||
/* the init states aren't in the pdom tree. If all their succ states
|
||||
* are set (or v), we can consider them post dominated */
|
||||
|
||||
/* Note: init states will always result in a later som */
|
||||
for (size_t i = init.find_first(); i != init.npos;
|
||||
i = init.find_next(i)) {
|
||||
/* Yes vacuous patterns do exist */
|
||||
NFAVertex iv = vByIndex[i];
|
||||
for (auto w : adjacent_vertices_range(iv, g)) {
|
||||
if (w == g.accept || w == g.acceptEod) {
|
||||
DEBUG_PRINTF("skipping %zu due to vacuous accept\n", i);
|
||||
goto next_init_state;
|
||||
}
|
||||
|
||||
u32 vert_id = g[w].index;
|
||||
if (w != iv && w != v && !mask.test(vert_id)) {
|
||||
DEBUG_PRINTF("skipping %zu due to %u\n", i, vert_id);
|
||||
goto next_init_state;
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("pdom'ed %zu\n", i);
|
||||
mask.set(i);
|
||||
next_init_state:;
|
||||
}
|
||||
}
|
||||
|
||||
mask.flip();
|
||||
}
|
||||
|
||||
static
|
||||
void buildSucc(NFAStateSet &succ, const NGHolder &g, NFAVertex v) {
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (!is_special(w, g)) {
|
||||
succ.set(g[w].index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) {
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (!is_special(u, g)) {
|
||||
pred.set(g[u].index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
|
||||
const PostDomTree &pdom_tree, const NFAStateSet &init,
|
||||
map<NFAVertex, NFAStateSet> *squash, som_type som,
|
||||
const vector<DepthMinMax> &som_depths,
|
||||
const ue2::unordered_map<NFAVertex, u32> ®ion_map,
|
||||
smgb_cache &cache) {
|
||||
deque<NFAVertex> remaining;
|
||||
for (const auto &m : *squash) {
|
||||
remaining.push_back(m.first);
|
||||
}
|
||||
|
||||
while (!remaining.empty()) {
|
||||
NFAVertex v = remaining.back();
|
||||
remaining.pop_back();
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (is_special(u, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (g[v].char_reach != g[u].char_reach) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (out_degree(u, g) != 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
NFAStateSet u_squash(init.size());
|
||||
u32 u_index = g[u].index;
|
||||
|
||||
buildSquashMask(u_squash, g, u, g[u].char_reach, init, vByIndex,
|
||||
pdom_tree, som, som_depths, region_map, cache);
|
||||
|
||||
u_squash.set(u_index); /* never clear ourselves */
|
||||
|
||||
if ((~u_squash).any()) { // i.e. some bits unset in mask
|
||||
DEBUG_PRINTF("%u is an upstream squasher of %u\n", u_index,
|
||||
g[v].index);
|
||||
(*squash)[u] = u_squash;
|
||||
remaining.push_back(u);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) {
|
||||
map<NFAVertex, NFAStateSet> squash;
|
||||
|
||||
// Number of bits to use for all our masks. If we're a triggered graph,
|
||||
// tops have already been assigned, so we don't have to account for them.
|
||||
const u32 numStates = num_vertices(g);
|
||||
|
||||
// Build post-dominator tree.
|
||||
PostDomTree pdom_tree;
|
||||
buildPDomTree(g, pdom_tree);
|
||||
|
||||
// Build list of vertices by state ID and a set of init states.
|
||||
vector<NFAVertex> vByIndex(numStates, NFAGraph::null_vertex());
|
||||
NFAStateSet initStates(numStates);
|
||||
smgb_cache cache(g);
|
||||
|
||||
// Mappings used for SOM mode calculations, otherwise left empty.
|
||||
unordered_map<NFAVertex, u32> region_map;
|
||||
vector<DepthMinMax> som_depths;
|
||||
if (som) {
|
||||
region_map = assignRegions(g);
|
||||
som_depths = getDistancesFromSOM(g);
|
||||
}
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
const u32 vert_id = g[v].index;
|
||||
DEBUG_PRINTF("vertex %u/%u\n", vert_id, numStates);
|
||||
assert(vert_id < numStates);
|
||||
vByIndex[vert_id] = v;
|
||||
|
||||
if (is_any_start(v, g) || !in_degree(v, g)) {
|
||||
initStates.set(vert_id);
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < numStates; i++) {
|
||||
NFAVertex v = vByIndex[i];
|
||||
assert(v != NFAGraph::null_vertex());
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
|
||||
/* only non-init cyclics can be squashers */
|
||||
if (!hasSelfLoop(v, g) || initStates.test(i)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("state %u is cyclic\n", i);
|
||||
|
||||
NFAStateSet mask(numStates), succ(numStates), pred(numStates);
|
||||
buildSquashMask(mask, g, v, cr, initStates, vByIndex, pdom_tree, som,
|
||||
som_depths, region_map, cache);
|
||||
buildSucc(succ, g, v);
|
||||
buildPred(pred, g, v);
|
||||
const auto &reports = g[v].reports;
|
||||
|
||||
for (size_t j = succ.find_first(); j != succ.npos;
|
||||
j = succ.find_next(j)) {
|
||||
NFAVertex vj = vByIndex[j];
|
||||
NFAStateSet pred2(numStates);
|
||||
buildPred(pred2, g, vj);
|
||||
if (pred2 == pred) {
|
||||
DEBUG_PRINTF("adding the sm from %zu to %u's sm\n", j, i);
|
||||
NFAStateSet tmp(numStates);
|
||||
buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
|
||||
som, som_depths, region_map, cache);
|
||||
mask &= tmp;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t j = pred.find_first(); j != pred.npos;
|
||||
j = pred.find_next(j)) {
|
||||
NFAVertex vj = vByIndex[j];
|
||||
NFAStateSet succ2(numStates);
|
||||
buildSucc(succ2, g, vj);
|
||||
/* we can use j as a basis for squashing if its succs are a subset
|
||||
* of ours */
|
||||
if ((succ2 & ~succ).any()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (som) {
|
||||
/* We cannot use j to add to the squash mask of v if it may
|
||||
* have an earlier start of match offset. ie for us j as a
|
||||
* basis for the squash mask of v we require:
|
||||
* maxSomDist(j) <= minSomDist(v)
|
||||
*/
|
||||
|
||||
/* ** TODO ** */
|
||||
|
||||
const depth &max_som_dist_j =
|
||||
som_depths[g[vj].index].max;
|
||||
const depth &min_som_dist_v =
|
||||
som_depths[g[v].index].min;
|
||||
if (max_som_dist_j > min_som_dist_v ||
|
||||
max_som_dist_j.is_infinite()) {
|
||||
/* j can't be used as it may be storing an earlier SOM */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const CharReach &crv = g[vj].char_reach;
|
||||
|
||||
/* we also require that j's report information be a subset of ours
|
||||
*/
|
||||
bool seen_special = false;
|
||||
for (auto w : adjacent_vertices_range(vj, g)) {
|
||||
if (is_special(w, g)) {
|
||||
if (!edge(v, w, g).second) {
|
||||
goto next_j;
|
||||
}
|
||||
seen_special = true;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: should be subset check?
|
||||
if (seen_special && g[vj].reports != reports) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* ok we can use j */
|
||||
if ((crv & ~cr).none()) {
|
||||
NFAStateSet tmp(numStates);
|
||||
buildSquashMask(tmp, g, vj, cr, initStates, vByIndex, pdom_tree,
|
||||
som, som_depths, region_map, cache);
|
||||
mask &= tmp;
|
||||
mask.reset(j);
|
||||
}
|
||||
|
||||
next_j:;
|
||||
}
|
||||
|
||||
mask.set(i); /* never clear ourselves */
|
||||
|
||||
if ((~mask).any()) { // i.e. some bits unset in mask
|
||||
DEBUG_PRINTF("%u squashes %zu other states\n", i, (~mask).count());
|
||||
squash.emplace(v, mask);
|
||||
}
|
||||
}
|
||||
|
||||
findDerivedSquashers(g, vByIndex, pdom_tree, initStates, &squash, som,
|
||||
som_depths, region_map, cache);
|
||||
|
||||
return squash;
|
||||
}
|
||||
|
||||
#define MIN_PURE_ACYCLIC_SQUASH 10 /** magic number */
|
||||
|
||||
/** Some squash states are clearly not advantageous in the NFA, as they do
|
||||
* incur the cost of an exception:
|
||||
* -# acyclic states
|
||||
* -# squash only a few acyclic states
|
||||
*/
|
||||
void filterSquashers(const NGHolder &g,
|
||||
map<NFAVertex, NFAStateSet> &squash) {
|
||||
DEBUG_PRINTF("filtering\n");
|
||||
map<u32, NFAVertex> rev; /* vertex_index -> vertex */
|
||||
for (auto v : vertices_range(g)) {
|
||||
rev[g[v].index] = v;
|
||||
}
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (!contains(squash, v)) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("looking at squash set for vertex %u\n",
|
||||
g[v].index);
|
||||
|
||||
if (!hasSelfLoop(v, g)) {
|
||||
DEBUG_PRINTF("acyclic\n");
|
||||
squash.erase(v);
|
||||
continue;
|
||||
}
|
||||
|
||||
NFAStateSet squashed = squash[v];
|
||||
squashed.flip(); /* default sense for mask of survivors */
|
||||
for (NFAStateSet::size_type sq = squashed.find_first();
|
||||
sq != squashed.npos; sq = squashed.find_next(sq)) {
|
||||
NFAVertex u = rev[sq];
|
||||
if (hasSelfLoop(u, g)) {
|
||||
DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq);
|
||||
goto next_vertex;
|
||||
}
|
||||
}
|
||||
|
||||
if (squashed.count() < MIN_PURE_ACYCLIC_SQUASH) {
|
||||
DEBUG_PRINTF("squash set too small\n");
|
||||
squash.erase(v);
|
||||
continue;
|
||||
}
|
||||
|
||||
next_vertex:;
|
||||
DEBUG_PRINTF("squash set ok\n");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void getHighlanderReporters(const NGHolder &g, const NFAVertex accept,
|
||||
const ReportManager &rm,
|
||||
set<NFAVertex> &verts) {
|
||||
for (auto v : inv_adjacent_vertices_range(accept, g)) {
|
||||
if (v == g.accept) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto &reports = g[v].reports;
|
||||
if (reports.empty()) {
|
||||
assert(0);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Must be _all_ highlander callback reports.
|
||||
for (auto report : reports) {
|
||||
const Report &ir = rm.getReport(report);
|
||||
if (ir.ekey == INVALID_EKEY || ir.type != EXTERNAL_CALLBACK) {
|
||||
goto next_vertex;
|
||||
}
|
||||
|
||||
// If there's any bounds, these are handled outside the NFA and
|
||||
// probably shouldn't be pre-empted.
|
||||
if (ir.hasBounds()) {
|
||||
goto next_vertex;
|
||||
}
|
||||
}
|
||||
|
||||
verts.insert(v);
|
||||
next_vertex:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void removeEdgesToAccept(NGHolder &g, NFAVertex v) {
|
||||
const auto &reports = g[v].reports;
|
||||
assert(!reports.empty());
|
||||
|
||||
// We remove any accept edge with a non-empty subset of the reports of v.
|
||||
|
||||
set<NFAEdge> dead;
|
||||
|
||||
for (const auto &e : in_edges_range(g.accept, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
const auto &r = g[u].reports;
|
||||
if (!r.empty() && is_subset_of(r, reports)) {
|
||||
DEBUG_PRINTF("vertex %u\n", g[u].index);
|
||||
dead.insert(e);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &e : in_edges_range(g.acceptEod, g)) {
|
||||
NFAVertex u = source(e, g);
|
||||
const auto &r = g[u].reports;
|
||||
if (!r.empty() && is_subset_of(r, reports)) {
|
||||
DEBUG_PRINTF("vertex %u\n", g[u].index);
|
||||
dead.insert(e);
|
||||
}
|
||||
}
|
||||
|
||||
assert(!dead.empty());
|
||||
remove_edges(dead, g);
|
||||
}
|
||||
|
||||
static
|
||||
vector<NFAVertex> findUnreachable(const NGHolder &g) {
|
||||
const boost::reverse_graph<NFAGraph, const NFAGraph &> revg(g.g);
|
||||
|
||||
ue2::unordered_map<NFAVertex, boost::default_color_type> colours;
|
||||
colours.reserve(num_vertices(g));
|
||||
|
||||
depth_first_visit(revg, g.acceptEod,
|
||||
make_dfs_visitor(boost::null_visitor()),
|
||||
make_assoc_property_map(colours));
|
||||
|
||||
// Unreachable vertices are not in the colour map.
|
||||
vector<NFAVertex> unreach;
|
||||
for (auto v : vertices_range(revg)) {
|
||||
if (!contains(colours, v)) {
|
||||
unreach.push_back(v);
|
||||
}
|
||||
}
|
||||
return unreach;
|
||||
}
|
||||
|
||||
/** Populates squash masks for states that can be switched off by highlander
|
||||
* (single match) reporters. */
|
||||
map<NFAVertex, NFAStateSet>
|
||||
findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) {
|
||||
map<NFAVertex, NFAStateSet> squash;
|
||||
|
||||
set<NFAVertex> verts;
|
||||
getHighlanderReporters(g, g.accept, rm, verts);
|
||||
getHighlanderReporters(g, g.acceptEod, rm, verts);
|
||||
if (verts.empty()) {
|
||||
DEBUG_PRINTF("no highlander reports\n");
|
||||
return squash;
|
||||
}
|
||||
|
||||
const u32 numStates = num_vertices(g);
|
||||
|
||||
for (auto v : verts) {
|
||||
DEBUG_PRINTF("vertex %u with %zu reports\n", g[v].index,
|
||||
g[v].reports.size());
|
||||
|
||||
// Find the set of vertices that lead to v or any other reporter with a
|
||||
// subset of v's reports. We do this by creating a copy of the graph,
|
||||
// cutting the appropriate out-edges to accept and seeing which
|
||||
// vertices become unreachable.
|
||||
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
|
||||
NGHolder h;
|
||||
cloneHolder(h, g, &orig_to_copy);
|
||||
removeEdgesToAccept(h, orig_to_copy[v]);
|
||||
|
||||
vector<NFAVertex> unreach = findUnreachable(h);
|
||||
DEBUG_PRINTF("can squash %zu vertices\n", unreach.size());
|
||||
if (unreach.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!contains(squash, v)) {
|
||||
squash[v] = NFAStateSet(numStates);
|
||||
squash[v].set();
|
||||
}
|
||||
|
||||
NFAStateSet &mask = squash[v];
|
||||
|
||||
for (auto uv : unreach) {
|
||||
DEBUG_PRINTF("squashes index %u\n", h[uv].index);
|
||||
mask.reset(h[uv].index);
|
||||
}
|
||||
}
|
||||
|
||||
return squash;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
71
src/nfagraph/ng_squash.h
Normal file
71
src/nfagraph/ng_squash.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA graph state squashing analysis.
|
||||
*/
|
||||
#ifndef NG_SQUASH_H
|
||||
#define NG_SQUASH_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "som/som.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
/** Dynamically-sized bitset, as an NFA can have an arbitrary number of states. */
|
||||
typedef boost::dynamic_bitset<> NFAStateSet;
|
||||
|
||||
/**
|
||||
* Populates the squash mask for each vertex (i.e. the set of states to be left
|
||||
* on during squashing).
|
||||
*
|
||||
* The NFAStateSet in the output map is indexed by vertex_index.
|
||||
*/
|
||||
std::map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
|
||||
som_type som = SOM_NONE);
|
||||
|
||||
/** Filters out squash states intended only for use in DFA construction. */
|
||||
void filterSquashers(const NGHolder &g,
|
||||
std::map<NFAVertex, NFAStateSet> &squash);
|
||||
|
||||
/** Populates squash masks for states that can be switched off by highlander
|
||||
* (single match) reporters. */
|
||||
std::map<NFAVertex, NFAStateSet>
|
||||
findHighlanderSquashers(const NGHolder &g, const ReportManager &rm);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NG_SQUASH_H
|
||||
190
src/nfagraph/ng_stop.cpp
Normal file
190
src/nfagraph/ng_stop.cpp
Normal file
@@ -0,0 +1,190 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Stop Alphabet calculation.
|
||||
*/
|
||||
#include "ng_stop.h"
|
||||
|
||||
#include "ng_depth.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_misc_opt.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/castlecompile.h"
|
||||
#include "som/som.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Stop alphabet depth threshold. */
|
||||
static const u32 MAX_STOP_DEPTH = 8;
|
||||
|
||||
namespace {
|
||||
|
||||
/** Depths from start, startDs for this graph. */
|
||||
struct InitDepths {
|
||||
explicit InitDepths(const NGHolder &g) {
|
||||
calcDepthsFrom(g, g.start, start);
|
||||
calcDepthsFrom(g, g.startDs, startDs);
|
||||
}
|
||||
|
||||
depth maxDist(const NGHolder &g, NFAVertex v) const {
|
||||
u32 idx = g[v].index;
|
||||
assert(idx < start.size() && idx < startDs.size());
|
||||
const depth &d_start = start.at(idx).max;
|
||||
const depth &d_startDs = startDs.at(idx).max;
|
||||
if (d_start.is_unreachable()) {
|
||||
return d_startDs;
|
||||
} else if (d_startDs.is_unreachable()) {
|
||||
return d_start;
|
||||
}
|
||||
return max(d_start, d_startDs);
|
||||
}
|
||||
|
||||
private:
|
||||
vector<DepthMinMax> start;
|
||||
vector<DepthMinMax> startDs;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/** Find the set of characters that are not present in the reachability of
|
||||
* graph \p g after a certain depth (currently 8). If a character in this set
|
||||
* is encountered, it means that the NFA is either dead or has not progressed
|
||||
* more than 8 characters from its start states. */
|
||||
CharReach findStopAlphabet(const NGHolder &g, som_type som) {
|
||||
const depth max_depth(MAX_STOP_DEPTH);
|
||||
const InitDepths depths(g);
|
||||
const map<NFAVertex, BoundedRepeatSummary> no_vertices;
|
||||
|
||||
CharReach stopcr;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (depths.maxDist(g, v) >= max_depth) {
|
||||
if (som == SOM_NONE) {
|
||||
stopcr |= reduced_cr(v, g, no_vertices);
|
||||
} else {
|
||||
stopcr |= g[v].char_reach;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Turn alphabet into stops.
|
||||
stopcr.flip();
|
||||
|
||||
return stopcr;
|
||||
}
|
||||
|
||||
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
|
||||
* build an eight-bit mask per character C, with each bit representing the
|
||||
* depth before the location of character C (if encountered) that the NFA would
|
||||
* be in a predictable start state. */
|
||||
vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som) {
|
||||
const depth max_depth(MAX_STOP_DEPTH);
|
||||
const InitDepths depths(g);
|
||||
const map<NFAVertex, BoundedRepeatSummary> no_vertices;
|
||||
|
||||
vector<CharReach> reach(MAX_STOP_DEPTH);
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
CharReach v_cr;
|
||||
if (som == SOM_NONE) {
|
||||
v_cr = reduced_cr(v, g, no_vertices);
|
||||
} else {
|
||||
v_cr = g[v].char_reach;
|
||||
}
|
||||
|
||||
u32 d = min(max_depth, depths.maxDist(g, v));
|
||||
for (u32 i = 0; i < d; i++) {
|
||||
reach[i] |= v_cr;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
|
||||
DEBUG_PRINTF("depth %u, stop chars: ", i);
|
||||
describeClass(stdout, ~reach[i], 20, CC_OUT_TEXT);
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
vector<u8> stop(N_CHARS, 0);
|
||||
|
||||
for (u32 i = 0; i < MAX_STOP_DEPTH; i++) {
|
||||
CharReach cr = ~reach[i]; // invert reach for stop chars.
|
||||
const u8 mask = 1U << i;
|
||||
for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
|
||||
stop[c] |= mask;
|
||||
}
|
||||
}
|
||||
|
||||
return stop;
|
||||
}
|
||||
|
||||
vector<u8> findLeftOffsetStopAlphabet(const CastleProto &castle,
|
||||
UNUSED som_type som) {
|
||||
const depth max_width = findMaxWidth(castle);
|
||||
DEBUG_PRINTF("castle has reach %s and max width %s\n",
|
||||
describeClass(castle.reach()).c_str(),
|
||||
max_width.str().c_str());
|
||||
|
||||
const CharReach escape = ~castle.reach(); // invert reach for stop chars.
|
||||
|
||||
u32 d = min(max_width, depth(MAX_STOP_DEPTH));
|
||||
const u8 mask = verify_u8((1U << d) - 1);
|
||||
|
||||
vector<u8> stop(N_CHARS, 0);
|
||||
|
||||
for (size_t c = escape.find_first(); c != escape.npos;
|
||||
c = escape.find_next(c)) {
|
||||
stop[c] |= mask;
|
||||
}
|
||||
|
||||
return stop;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
62
src/nfagraph/ng_stop.h
Normal file
62
src/nfagraph/ng_stop.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Stop Alphabet calculation.
|
||||
*/
|
||||
|
||||
#ifndef NG_STOP_H
|
||||
#define NG_STOP_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "som/som.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CastleProto;
|
||||
class CharReach;
|
||||
class NGHolder;
|
||||
|
||||
/** Find the set of characters that are not present in the reachability of
|
||||
* graph \p g after a certain depth (currently 8). If a character in this set
|
||||
* is encountered, it means that the NFA is either dead or has not progressed
|
||||
* more than 8 characters from its start states. */
|
||||
CharReach findStopAlphabet(const NGHolder &g, som_type som);
|
||||
|
||||
/** Calculate the stop alphabet for each depth from 0 to MAX_STOP_DEPTH. Then
|
||||
* build an eight-bit mask per character C, with each bit representing the
|
||||
* depth before the location of character C (if encountered) that the NFA would
|
||||
* be in a predictable start state. */
|
||||
std::vector<u8> findLeftOffsetStopAlphabet(const NGHolder &g, som_type som);
|
||||
std::vector<u8> findLeftOffsetStopAlphabet(const CastleProto &c, som_type som);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
614
src/nfagraph/ng_uncalc_components.cpp
Normal file
614
src/nfagraph/ng_uncalc_components.cpp
Normal file
@@ -0,0 +1,614 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA graph merging ("uncalc")
|
||||
*
|
||||
* The file contains our collection of NFA graph merging strategies.
|
||||
*
|
||||
* NFAGraph merging is generally guided by the length of the common prefix
|
||||
* between NFAGraph pairs.
|
||||
*/
|
||||
#include "grey.h"
|
||||
#include "ng_holder.h"
|
||||
#include "ng_limex.h"
|
||||
#include "ng_redundancy.h"
|
||||
#include "ng_region.h"
|
||||
#include "ng_restructuring.h"
|
||||
#include "ng_uncalc_components.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static const u32 FAST_STATE_LIMIT = 256; /**< largest possible desirable NFA */
|
||||
|
||||
/** Sentinel value meaning no component has yet been selected. */
|
||||
static const u32 NO_COMPONENT = 0xffffffffu;
|
||||
|
||||
static
|
||||
vector<NFAVertex> getSortedVA(const NGHolder &g,
|
||||
const ue2::unordered_map<NFAVertex, u32> &state_ids) {
|
||||
vector<NFAVertex> out;
|
||||
out.reserve(num_vertices(g));
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
assert(contains(state_ids, v));
|
||||
if (state_ids.at(v) == NO_STATE) {
|
||||
continue;
|
||||
}
|
||||
out.push_back(v);
|
||||
}
|
||||
|
||||
// Order vertices by their state indices.
|
||||
sort(begin(out), end(out), [&state_ids](NFAVertex a, NFAVertex b) {
|
||||
return state_ids.at(a) < state_ids.at(b);
|
||||
});
|
||||
|
||||
#ifndef NDEBUG
|
||||
// State indices should match vector indices.
|
||||
for (u32 i = 0; i < out.size(); i++) {
|
||||
assert(state_ids.at(out.at(i)) == i);
|
||||
}
|
||||
#endif
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
bool cplVerticesMatch(const NGHolder &ga, NFAVertex va,
|
||||
const NGHolder &gb, NFAVertex vb) {
|
||||
// Must have the same reachability.
|
||||
if (ga[va].char_reach != gb[vb].char_reach) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// If they're start vertices, they must be the same one.
|
||||
if (is_any_start(va, ga) || is_any_start(vb, gb)) {
|
||||
if (ga[va].index != gb[vb].index) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool va_accept = edge(va, ga.accept, ga).second;
|
||||
bool vb_accept = edge(vb, gb.accept, gb).second;
|
||||
bool va_acceptEod = edge(va, ga.acceptEod, ga).second;
|
||||
bool vb_acceptEod = edge(vb, gb.acceptEod, gb).second;
|
||||
|
||||
// Must have the same accept/acceptEod edges.
|
||||
if (va_accept != vb_accept || va_acceptEod != vb_acceptEod) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
u32 cplCommonReachAndSimple(const NGHolder &ga, const vector<NFAVertex> &a,
|
||||
const NGHolder &gb, const vector<NFAVertex> &b) {
|
||||
u32 ml = min(a.size(), b.size());
|
||||
if (ml > 65535) {
|
||||
ml = 65535;
|
||||
}
|
||||
|
||||
// Count the number of common vertices which share reachability, report and
|
||||
// "startedness" properties.
|
||||
u32 max = 0;
|
||||
for (; max < ml; max++) {
|
||||
if (!cplVerticesMatch(ga, a[max], gb, b[max])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return max;
|
||||
}
|
||||
|
||||
u32 commonPrefixLength(const NGHolder &ga,
|
||||
const ue2::unordered_map<NFAVertex, u32> &a_state_ids,
|
||||
const NGHolder &gb,
|
||||
const ue2::unordered_map<NFAVertex, u32> &b_state_ids) {
|
||||
vector<NFAVertex> a = getSortedVA(ga, a_state_ids);
|
||||
vector<NFAVertex> b = getSortedVA(gb, b_state_ids);
|
||||
|
||||
/* upper bound on the common region based on local properties */
|
||||
u32 max = cplCommonReachAndSimple(ga, a, gb, b);
|
||||
DEBUG_PRINTF("cpl upper bound %u\n", max);
|
||||
|
||||
while (max > 0) {
|
||||
bool ok = true;
|
||||
|
||||
/* shrink max region based on in-edges from outside the region */
|
||||
for (size_t j = max; j > 0; j--) {
|
||||
for (auto u : inv_adjacent_vertices_range(a[j - 1], ga)) {
|
||||
u32 state_id = a_state_ids.at(u);
|
||||
if (state_id != NO_STATE && state_id >= max) {
|
||||
max = j - 1;
|
||||
DEBUG_PRINTF("lowering max to %u\n", max);
|
||||
goto next_vertex;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto u : inv_adjacent_vertices_range(b[j - 1], gb)) {
|
||||
u32 state_id = b_state_ids.at(u);
|
||||
if (state_id != NO_STATE && state_id >= max) {
|
||||
max = j - 1;
|
||||
DEBUG_PRINTF("lowering max to %u\n", max);
|
||||
goto next_vertex;
|
||||
}
|
||||
}
|
||||
|
||||
next_vertex:;
|
||||
}
|
||||
|
||||
/* Ensure that every pair of vertices has same out-edges to vertices in
|
||||
the region. */
|
||||
for (size_t i = 0; ok && i < max; i++) {
|
||||
size_t a_count = 0;
|
||||
size_t b_count = 0;
|
||||
|
||||
NFAGraph::out_edge_iterator ei, ee;
|
||||
for (tie(ei, ee) = out_edges(a[i], ga); ok && ei != ee; ++ei) {
|
||||
u32 sid = a_state_ids.at(target(*ei, ga));
|
||||
if (sid == NO_STATE || sid >= max) {
|
||||
continue;
|
||||
}
|
||||
|
||||
a_count++;
|
||||
|
||||
NFAEdge b_edge;
|
||||
bool has_b_edge;
|
||||
tie(b_edge, has_b_edge) = edge(b[i], b[sid], gb);
|
||||
|
||||
if (!has_b_edge) {
|
||||
max = i;
|
||||
ok = false;
|
||||
DEBUG_PRINTF("lowering max to %u due to edge %zu->%u\n",
|
||||
max, i, sid);
|
||||
break;
|
||||
}
|
||||
|
||||
if (ga[*ei].top != gb[b_edge].top) {
|
||||
max = i;
|
||||
ok = false;
|
||||
DEBUG_PRINTF("tops don't match on edge %zu->%u\n",
|
||||
i, sid);
|
||||
}
|
||||
}
|
||||
|
||||
NFAGraph::adjacency_iterator ai, ae;
|
||||
for (tie(ai, ae) = adjacent_vertices(b[i], gb); ok && ai != ae;
|
||||
++ai) {
|
||||
u32 sid = b_state_ids.at(*ai);
|
||||
if (sid == NO_STATE || sid >= max) {
|
||||
continue;
|
||||
}
|
||||
|
||||
b_count++;
|
||||
}
|
||||
|
||||
if (a_count != b_count) {
|
||||
max = i;
|
||||
DEBUG_PRINTF("lowering max to %u due to a,b count "
|
||||
"(a_count=%zu, b_count=%zu)\n", max, a_count,
|
||||
b_count);
|
||||
ok = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (ok) {
|
||||
DEBUG_PRINTF("survived checks, returning cpl %u\n", max);
|
||||
return max;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("failed to find any common region\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
void mergeNfa(NGHolder &dest, vector<NFAVertex> &destStateMap,
|
||||
ue2::unordered_map<NFAVertex, u32> &dest_state_ids,
|
||||
NGHolder &vic, vector<NFAVertex> &vicStateMap,
|
||||
size_t common_len) {
|
||||
map<NFAVertex, NFAVertex> vmap; // vic -> dest
|
||||
|
||||
vmap[vic.start] = dest.start;
|
||||
vmap[vic.startDs] = dest.startDs;
|
||||
vmap[vic.accept] = dest.accept;
|
||||
vmap[vic.acceptEod] = dest.acceptEod;
|
||||
vmap[nullptr] = nullptr;
|
||||
|
||||
u32 stateNum = countStates(dest, dest_state_ids);
|
||||
|
||||
// For vertices in the common len, add to vmap and merge in the reports, if
|
||||
// any.
|
||||
for (u32 i = 0; i < common_len; i++) {
|
||||
NFAVertex v_old = vicStateMap[i], v = destStateMap[i];
|
||||
vmap[v_old] = v;
|
||||
|
||||
const auto &reports = vic[v_old].reports;
|
||||
dest[v].reports.insert(reports.begin(), reports.end());
|
||||
}
|
||||
|
||||
// Add in vertices beyond the common len, giving them state numbers
|
||||
// starting at stateNum.
|
||||
for (u32 i = common_len; i < vicStateMap.size(); i++) {
|
||||
NFAVertex v_old = vicStateMap[i];
|
||||
|
||||
if (is_special(v_old, vic)) {
|
||||
// Dest already has start vertices, just merge the reports.
|
||||
u32 idx = vic[v_old].index;
|
||||
NFAVertex v = dest.getSpecialVertex(idx);
|
||||
const auto &reports = vic[v_old].reports;
|
||||
dest[v].reports.insert(reports.begin(), reports.end());
|
||||
continue;
|
||||
}
|
||||
|
||||
NFAVertex v = add_vertex(vic[v_old], dest);
|
||||
dest_state_ids[v] = stateNum++;
|
||||
vmap[v_old] = v;
|
||||
}
|
||||
|
||||
/* add edges */
|
||||
DEBUG_PRINTF("common_len=%zu\n", common_len);
|
||||
for (const auto &e : edges_range(vic)) {
|
||||
NFAVertex u_old = source(e, vic), v_old = target(e, vic);
|
||||
NFAVertex u = vmap[u_old], v = vmap[v_old];
|
||||
bool uspecial = is_special(u, dest);
|
||||
bool vspecial = is_special(v, dest);
|
||||
|
||||
// Skip stylised edges that are already present.
|
||||
if (uspecial && vspecial && edge(u, v, dest).second) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// We're in the common region if v's state ID is low enough, unless v
|
||||
// is a special (an accept), in which case we use u's state ID.
|
||||
assert(contains(dest_state_ids, v));
|
||||
bool in_common_region = dest_state_ids.at(v) < common_len;
|
||||
if (vspecial && dest_state_ids.at(u) < common_len) {
|
||||
in_common_region = true;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("adding idx=%u (state %u) -> idx=%u (state %u)%s\n",
|
||||
dest[u].index, dest_state_ids.at(u),
|
||||
dest[v].index, dest_state_ids.at(v),
|
||||
in_common_region ? " [common]" : "");
|
||||
|
||||
if (in_common_region) {
|
||||
if (!is_special(v, dest)) {
|
||||
DEBUG_PRINTF("skipping common edge\n");
|
||||
assert(edge(u, v, dest).second);
|
||||
// Should never merge edges with different top values.
|
||||
assert(vic[e].top == dest[edge(u, v, dest).first].top);
|
||||
continue;
|
||||
} else {
|
||||
assert(is_any_accept(v, dest));
|
||||
// If the edge exists in both graphs, skip it.
|
||||
if (edge(u, v, dest).second) {
|
||||
DEBUG_PRINTF("skipping common edge to accept\n");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(!edge(u, v, dest).second);
|
||||
add_edge(u, v, vic[e], dest);
|
||||
}
|
||||
|
||||
dest.renumberEdges();
|
||||
dest.renumberVertices();
|
||||
}
|
||||
|
||||
static never_inline
|
||||
void mergeNfaComponent(NGHolder &pholder, NGHolder &vholder, size_t cpl) {
|
||||
assert(&pholder != &vholder);
|
||||
|
||||
auto v_state_ids = numberStates(vholder);
|
||||
auto p_state_ids = numberStates(pholder);
|
||||
auto vhvmap = getSortedVA(vholder, v_state_ids);
|
||||
auto phvmap = getSortedVA(pholder, p_state_ids);
|
||||
|
||||
mergeNfa(pholder, phvmap, p_state_ids, vholder, vhvmap, cpl);
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct NfaMergeCandidateH {
|
||||
NfaMergeCandidateH(size_t cpl_in, NGHolder *first_in, NGHolder *second_in,
|
||||
u32 tb_in)
|
||||
: cpl(cpl_in), first(first_in), second(second_in), tie_breaker(tb_in) {}
|
||||
|
||||
size_t cpl; //!< common prefix length
|
||||
NGHolder *first; //!< first component to merge
|
||||
NGHolder *second; //!< second component to merge
|
||||
u32 tie_breaker; //!< for determinism
|
||||
|
||||
bool operator<(const NfaMergeCandidateH &other) const {
|
||||
if (cpl != other.cpl) {
|
||||
return cpl < other.cpl;
|
||||
} else {
|
||||
return tie_breaker < other.tie_breaker;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace
|
||||
|
||||
/** Returns true if graphs \p h1 and \p h2 can (and should) be merged. */
|
||||
static
|
||||
bool shouldMerge(NGHolder &ha,
|
||||
const ue2::unordered_map<NFAVertex, u32> &a_state_ids,
|
||||
NGHolder &hb,
|
||||
const ue2::unordered_map<NFAVertex, u32> &b_state_ids,
|
||||
size_t cpl, const ReportManager *rm,
|
||||
const CompileContext &cc) {
|
||||
size_t combinedStateCount =
|
||||
countStates(ha, a_state_ids) + countStates(hb, b_state_ids) - cpl;
|
||||
|
||||
if (combinedStateCount > FAST_STATE_LIMIT) {
|
||||
// More complex implementability check.
|
||||
NGHolder h_temp;
|
||||
cloneHolder(h_temp, ha);
|
||||
assert(h_temp.kind == hb.kind);
|
||||
mergeNfaComponent(h_temp, hb, cpl);
|
||||
reduceImplementableGraph(h_temp, SOM_NONE, rm, cc);
|
||||
u32 numStates = isImplementableNFA(h_temp, rm, cc);
|
||||
DEBUG_PRINTF("isImplementableNFA returned %u states\n", numStates);
|
||||
if (!numStates) {
|
||||
DEBUG_PRINTF("not implementable\n");
|
||||
return false;
|
||||
} else if (numStates > FAST_STATE_LIMIT) {
|
||||
DEBUG_PRINTF("too many states to merge\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Returns true if the graph has start vertices that are compatible for
|
||||
* merging. Rose may generate all sorts of wacky vacuous cases, and the merge
|
||||
* code isn't currently up to handling them. */
|
||||
static
|
||||
bool compatibleStarts(const NGHolder &ga, const NGHolder &gb) {
|
||||
// Start and startDs must have the same self-loops.
|
||||
return (edge(ga.startDs, ga.startDs, ga).second ==
|
||||
edge(gb.startDs, gb.startDs, gb).second) &&
|
||||
(edge(ga.start, ga.start, ga).second ==
|
||||
edge(gb.start, gb.start, gb).second);
|
||||
}
|
||||
|
||||
static never_inline
|
||||
void buildNfaMergeQueue(const vector<NGHolder *> &cluster,
|
||||
priority_queue<NfaMergeCandidateH> *pq) {
|
||||
const size_t cs = cluster.size();
|
||||
assert(cs < NO_COMPONENT);
|
||||
|
||||
// First, make sure all holders have numbered states and collect their
|
||||
// counts.
|
||||
vector<ue2::unordered_map<NFAVertex, u32>> states_map(cs);
|
||||
for (size_t i = 0; i < cs; i++) {
|
||||
assert(cluster[i]);
|
||||
NGHolder &g = *(cluster[i]);
|
||||
states_map[i] = numberStates(g);
|
||||
}
|
||||
|
||||
vector<u16> seen_cpl(cs * cs, 0);
|
||||
vector<u32> best_comp(cs, NO_COMPONENT);
|
||||
|
||||
/* TODO: understand, explain */
|
||||
for (u32 ci = 0; ci < cs; ci++) {
|
||||
for (u32 cj = ci + 1; cj < cs; cj++) {
|
||||
u16 cpl = 0;
|
||||
bool calc = false;
|
||||
|
||||
if (best_comp[ci] != NO_COMPONENT) {
|
||||
u32 bc = best_comp[ci];
|
||||
if (seen_cpl[bc + cs * cj] < seen_cpl[bc + cs * ci]) {
|
||||
cpl = seen_cpl[bc + cs * cj];
|
||||
DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
|
||||
calc = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!calc && best_comp[cj] != NO_COMPONENT) {
|
||||
u32 bc = best_comp[cj];
|
||||
if (seen_cpl[bc + cs * ci] < seen_cpl[bc + cs * cj]) {
|
||||
cpl = seen_cpl[bc + cs * ci];
|
||||
DEBUG_PRINTF("using cached cpl from %u %u\n", bc, cpl);
|
||||
calc = true;
|
||||
}
|
||||
}
|
||||
|
||||
NGHolder &g_i = *(cluster[ci]);
|
||||
NGHolder &g_j = *(cluster[cj]);
|
||||
|
||||
if (!compatibleStarts(g_i, g_j)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!calc) {
|
||||
cpl = commonPrefixLength(g_i, states_map[ci],
|
||||
g_j, states_map[cj]);
|
||||
}
|
||||
|
||||
seen_cpl[ci + cs * cj] = cpl;
|
||||
seen_cpl[cj + cs * ci] = cpl;
|
||||
|
||||
if (best_comp[cj] == NO_COMPONENT
|
||||
|| seen_cpl[best_comp[cj] + cs * cj] < cpl) {
|
||||
best_comp[cj] = ci;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("cpl %u %u = %u\n", ci, cj, cpl);
|
||||
|
||||
pq->push(NfaMergeCandidateH(cpl, cluster[ci], cluster[cj],
|
||||
ci * cs + cj));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** True if the graphs have compatible starts for merging, i.e. they are NOT
|
||||
* both vacuous with different reports on the starts. */
|
||||
static
|
||||
bool mergeableStarts(const NGHolder &h1, const NGHolder &h2) {
|
||||
bool vac1 = isVacuous(h1), vac2 = isVacuous(h2);
|
||||
|
||||
// Safety tests: reports should be empty on non-vacuous graphs.
|
||||
if (!vac1) {
|
||||
assert(h1[h1.start].reports.empty());
|
||||
assert(h1[h1.startDs].reports.empty());
|
||||
}
|
||||
if (!vac2) {
|
||||
assert(h2[h2.start].reports.empty());
|
||||
assert(h2[h2.startDs].reports.empty());
|
||||
}
|
||||
|
||||
if (vac1 && vac2) {
|
||||
// Graphs must have the same reports on their starts to be mergeable
|
||||
// (and top on start->accept).
|
||||
if (h1[h1.start].reports
|
||||
!= h2[h2.start].reports) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (h1[h1.startDs].reports
|
||||
!= h2[h2.startDs].reports) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pair<NFAEdge, bool> e1, e2;
|
||||
e1 = edge(h1.start, h1.accept, h1);
|
||||
e2 = edge(h2.start, h2.accept, h2);
|
||||
if (e1.second || e2.second) {
|
||||
if (e1.second && e2.second &&
|
||||
h1[e1.first].top != h2[e2.first].top) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
e1 = edge(h1.start, h1.acceptEod, h1);
|
||||
e2 = edge(h2.start, h2.acceptEod, h2);
|
||||
if (e1.second || e2.second) {
|
||||
if (e1.second && e2.second &&
|
||||
h1[e1.first].top != h2[e2.first].top) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Merge graph \p ga into graph \p gb. Returns false on failure. */
|
||||
bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm,
|
||||
const CompileContext &cc) {
|
||||
assert(ga.kind == gb.kind);
|
||||
auto a_state_ids = numberStates(ga);
|
||||
auto b_state_ids = numberStates(gb);
|
||||
|
||||
// At the moment, since our vertices can only have one report ID each,
|
||||
// we must ensure that our start vertices have the same report ID,
|
||||
// otherwise they can't be merged. This happens in vacuous NFAs, used
|
||||
// by Rose.
|
||||
// XXX: the multi-top code has this limitation, too.
|
||||
if (!mergeableStarts(ga, gb)) {
|
||||
DEBUG_PRINTF("starts aren't mergeable\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// NOTE: states must be numbered already.
|
||||
|
||||
u32 cpl = commonPrefixLength(ga, a_state_ids, gb, b_state_ids);
|
||||
|
||||
if (!shouldMerge(gb, b_state_ids, ga, a_state_ids, cpl, rm, cc)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
mergeNfaComponent(gb, ga, cpl);
|
||||
reduceImplementableGraph(gb, SOM_NONE, rm, cc);
|
||||
b_state_ids = numberStates(gb);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Merge the group of graphs in \p cluster where possible. The (from, to)
|
||||
* mapping of merged graphs is returned in \p merged. */
|
||||
void mergeNfaCluster(const vector<NGHolder *> &cluster,
|
||||
const ReportManager *rm,
|
||||
map<NGHolder *, NGHolder *> &merged,
|
||||
const CompileContext &cc) {
|
||||
if (cluster.size() < 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("new cluster, size %zu\n", cluster.size());
|
||||
merged.clear();
|
||||
|
||||
priority_queue<NfaMergeCandidateH> pq;
|
||||
buildNfaMergeQueue(cluster, &pq);
|
||||
|
||||
while (!pq.empty()) {
|
||||
NGHolder &pholder = *pq.top().first;
|
||||
NGHolder &vholder = *pq.top().second;
|
||||
pq.pop();
|
||||
|
||||
if (contains(merged, &pholder) || contains(merged, &vholder)) {
|
||||
DEBUG_PRINTF("dead\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!mergeNfaPair(vholder, pholder, rm, cc)) {
|
||||
DEBUG_PRINTF("merge failed\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
merged.emplace(&vholder, &pholder);
|
||||
|
||||
// Seek closure.
|
||||
for (auto &m : merged) {
|
||||
if (m.second == &vholder) {
|
||||
m.second = &pholder;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
81
src/nfagraph/ng_uncalc_components.h
Normal file
81
src/nfagraph/ng_uncalc_components.h
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA graph merging ("uncalc")
|
||||
*/
|
||||
|
||||
#ifndef NG_UNCALC_COMPONENTS_H
|
||||
#define NG_UNCALC_COMPONENTS_H
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "nfagraph/ng_graph.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CompileContext;
|
||||
struct Grey;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
|
||||
/**
|
||||
* \brief Returns the common prefix length for a pair of graphs.
|
||||
*
|
||||
* The CPL is calculated based the topological ordering given by the state
|
||||
* indices for each graph.
|
||||
*/
|
||||
u32 commonPrefixLength(const NGHolder &ga,
|
||||
const ue2::unordered_map<NFAVertex, u32> &a_state_ids,
|
||||
const NGHolder &gb,
|
||||
const ue2::unordered_map<NFAVertex, u32> &b_state_ids);
|
||||
|
||||
/**
|
||||
* \brief Merge the group of graphs in \p cluster where possible.
|
||||
*
|
||||
* The (from, to) mapping of merged graphs is returned in \p merged.
|
||||
*/
|
||||
void mergeNfaCluster(const std::vector<NGHolder *> &cluster,
|
||||
const ReportManager *rm,
|
||||
std::map<NGHolder *, NGHolder *> &merged,
|
||||
const CompileContext &cc);
|
||||
|
||||
/**
|
||||
* \brief Merge graph \p ga into graph \p gb.
|
||||
*
|
||||
* Returns false on failure. On success, \p gb is reduced via \ref
|
||||
* reduceImplementableGraph and renumbered.
|
||||
*/
|
||||
bool mergeNfaPair(NGHolder &ga, NGHolder &gb, const ReportManager *rm,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
114
src/nfagraph/ng_undirected.h
Normal file
114
src/nfagraph/ng_undirected.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Create an undirected graph from an NFAGraph.
|
||||
*/
|
||||
|
||||
#ifndef NG_UNDIRECTED_H_CB42C71CF38E3D
|
||||
#define NG_UNDIRECTED_H_CB42C71CF38E3D
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "ng_util.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/**
|
||||
* \brief BGL graph type for the undirected NFA graph.
|
||||
*
|
||||
* Note that we use a set for the out-edge lists: this avoids the construction
|
||||
* of parallel edges. The only vertex property constructed is \a
|
||||
* vertex_index_t.
|
||||
*/
|
||||
typedef boost::adjacency_list<boost::setS, // out edges
|
||||
boost::listS, // vertices
|
||||
boost::undirectedS, // graph is undirected
|
||||
boost::property<boost::vertex_index_t, u32> >
|
||||
NFAUndirectedGraph;
|
||||
|
||||
typedef NFAUndirectedGraph::vertex_descriptor NFAUndirectedVertex;
|
||||
|
||||
/**
|
||||
* Make a copy of an NFAGraph with undirected edges, optionally without start
|
||||
* vertices. Mappings from the original graph to the new one are provided.
|
||||
*
|
||||
* Note that new vertex indices are assigned contiguously in \a vertices(g) order.
|
||||
*/
|
||||
template <typename GraphT>
|
||||
void createUnGraph(const GraphT &g,
|
||||
bool excludeStarts,
|
||||
bool excludeAccepts,
|
||||
NFAUndirectedGraph &ug,
|
||||
ue2::unordered_map<NFAVertex, NFAUndirectedVertex> &old2new,
|
||||
ue2::unordered_map<u32, NFAVertex> &newIdx2old) {
|
||||
u32 idx = 0;
|
||||
|
||||
for (auto v : ue2::vertices_range(g)) {
|
||||
// skip all accept nodes
|
||||
if (excludeAccepts && is_any_accept(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip starts if required
|
||||
if (excludeStarts && is_any_start(v, g)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
NFAUndirectedVertex nuv = boost::add_vertex(ug);
|
||||
old2new[v] = nuv;
|
||||
newIdx2old[idx] = v;
|
||||
boost::put(boost::vertex_index, ug, nuv, idx++);
|
||||
}
|
||||
|
||||
for (const auto &e : ue2::edges_range(g)) {
|
||||
NFAVertex src = source(e, g);
|
||||
NFAVertex targ = target(e, g);
|
||||
|
||||
if ((excludeAccepts && is_any_accept(src, g))
|
||||
|| (excludeStarts && is_any_start(src, g))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((excludeAccepts && is_any_accept(targ, g))
|
||||
|| (excludeStarts && is_any_start(targ, g))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
NFAUndirectedVertex new_src = old2new[src];
|
||||
NFAUndirectedVertex new_targ = old2new[targ];
|
||||
|
||||
boost::add_edge(new_src, new_targ, ug);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif /* NG_UNDIRECTED_H_CB42C71CF38E3D */
|
||||
305
src/nfagraph/ng_utf8.cpp
Normal file
305
src/nfagraph/ng_utf8.cpp
Normal file
@@ -0,0 +1,305 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief UTF-8 transforms and operations.
|
||||
*/
|
||||
#include "ng_utf8.h"
|
||||
|
||||
#include "ng.h"
|
||||
#include "ng_prune.h"
|
||||
#include "ng_util.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/unicode_def.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void allowIllegal(NGWrapper &w, NFAVertex v, u8 pred_char) {
|
||||
if (in_degree(v, w) != 1) {
|
||||
DEBUG_PRINTF("unexpected pred\n");
|
||||
assert(0); /* should be true due to the early stage of this analysis */
|
||||
return;
|
||||
}
|
||||
|
||||
CharReach &cr = w[v].char_reach;
|
||||
if (pred_char == 0xe0) {
|
||||
assert(cr.isSubsetOf(CharReach(0xa0, 0xbf)));
|
||||
if (cr == CharReach(0xa0, 0xbf)) {
|
||||
cr |= CharReach(0x80, 0x9f);
|
||||
}
|
||||
} else if (pred_char == 0xf0) {
|
||||
assert(cr.isSubsetOf(CharReach(0x90, 0xbf)));
|
||||
if (cr == CharReach(0x90, 0xbf)) {
|
||||
cr |= CharReach(0x80, 0x8f);
|
||||
}
|
||||
} else if (pred_char == 0xf4) {
|
||||
assert(cr.isSubsetOf(CharReach(0x80, 0x8f)));
|
||||
if (cr == CharReach(0x80, 0x8f)) {
|
||||
cr |= CharReach(0x90, 0xbf);
|
||||
}
|
||||
} else {
|
||||
assert(0); /* unexpected pred */
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Relax forbidden UTF-8 sequences.
|
||||
*
|
||||
* Some byte sequences can not appear in valid UTF-8 as they encode code points
|
||||
* above \\x{10ffff} or they represent overlong encodings. As we require valid
|
||||
* UTF-8 input, we have no defined behaviour in these cases, as a result we can
|
||||
* accept them if it simplifies the graph. */
|
||||
void relaxForbiddenUtf8(NGWrapper &w) {
|
||||
if (!w.utf8) {
|
||||
return;
|
||||
}
|
||||
|
||||
const CharReach e0(0xe0);
|
||||
const CharReach f0(0xf0);
|
||||
const CharReach f4(0xf4);
|
||||
|
||||
for (auto v : vertices_range(w)) {
|
||||
const CharReach &cr = w[v].char_reach;
|
||||
if (cr == e0 || cr == f0 || cr == f4) {
|
||||
u8 pred_char = cr.find_first();
|
||||
for (auto t : adjacent_vertices_range(v, w)) {
|
||||
allowIllegal(w, t, pred_char);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool hasPredInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (contains(s, u)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool hasSuccInSet(const NGHolder &g, NFAVertex v, const set<NFAVertex> &s) {
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (contains(s, w)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
void findSeeds(const NGHolder &h, const bool som, vector<NFAVertex> *seeds) {
|
||||
set<NFAVertex> bad; /* from zero-width asserts near accepts, etc */
|
||||
for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
|
||||
const CharReach &cr = h[v].char_reach;
|
||||
if (!isutf8ascii(cr) && !isutf8start(cr)) {
|
||||
bad.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
|
||||
const CharReach &cr = h[v].char_reach;
|
||||
if (!isutf8ascii(cr) && !isutf8start(cr)) {
|
||||
bad.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
// we want to be careful with asserts connected to starts
|
||||
// as well as they may not finish a code point
|
||||
for (auto v : vertices_range(h)) {
|
||||
if (is_virtual_start(v, h)) {
|
||||
bad.insert(v);
|
||||
insert(&bad, adjacent_vertices(v, h));
|
||||
}
|
||||
}
|
||||
|
||||
/* we cannot handle vertices connected to accept as would report matches in
|
||||
* the middle of codepoints. acceptEod is not a problem as the input must
|
||||
* end at a codepoint boundary */
|
||||
bad.insert(h.accept);
|
||||
|
||||
// If we're in SOM mode, we don't want to mess with vertices that have a
|
||||
// direct edge from startDs.
|
||||
if (som) {
|
||||
insert(&bad, adjacent_vertices(h.startDs, h));
|
||||
}
|
||||
|
||||
set<NFAVertex> already_seeds; /* already marked as seeds */
|
||||
for (auto v : vertices_range(h)) {
|
||||
const CharReach &cr = h[v].char_reach;
|
||||
|
||||
if (!isutf8ascii(cr) || !hasSelfLoop(v, h)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (hasSuccInSet(h, v, bad)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip vertices that are directly connected to other vertices already
|
||||
// in the seeds list: we can't collapse two of these directly next to
|
||||
// each other.
|
||||
if (hasPredInSet(h, v, already_seeds) ||
|
||||
hasSuccInSet(h, v, already_seeds)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%u is a seed\n", h[v].index);
|
||||
seeds->push_back(v);
|
||||
already_seeds.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool expandCyclic(NGHolder &h, NFAVertex v) {
|
||||
DEBUG_PRINTF("inspecting %u\n", h[v].index);
|
||||
bool changes = false;
|
||||
|
||||
set<NFAVertex> v_preds;
|
||||
set<NFAVertex> v_succs;
|
||||
pred(h, v, &v_preds);
|
||||
succ(h, v, &v_succs);
|
||||
set<NFAVertex> start_siblings;
|
||||
set<NFAVertex> end_siblings;
|
||||
|
||||
CharReach &v_cr = h[v].char_reach;
|
||||
|
||||
/* We need to find start vertices which have all of our preds.
|
||||
* As we have a self loop, it must be one of our succs. */
|
||||
for (auto a : adjacent_vertices_range(v, h)) {
|
||||
set<NFAVertex> a_preds;
|
||||
pred(h, a, &a_preds);
|
||||
|
||||
if (a_preds == v_preds && isutf8start(h[a].char_reach)) {
|
||||
DEBUG_PRINTF("%u is a start v\n", h[a].index);
|
||||
start_siblings.insert(a);
|
||||
}
|
||||
}
|
||||
|
||||
/* We also need to find full cont vertices which have all our own succs;
|
||||
* As we have a self loop, it must be one of our preds. */
|
||||
for (auto a : inv_adjacent_vertices_range(v, h)) {
|
||||
set<NFAVertex> a_succs;
|
||||
succ(h, a, &a_succs);
|
||||
|
||||
if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) {
|
||||
DEBUG_PRINTF("%u is a full tail cont\n", h[a].index);
|
||||
end_siblings.insert(a);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto s : start_siblings) {
|
||||
if (out_degree(s, h) != 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const CharReach &cr = h[s].char_reach;
|
||||
if (cr.isSubsetOf(UTF_TWO_START_CR)) {
|
||||
if (end_siblings.find(*adjacent_vertices(s, h).first)
|
||||
== end_siblings.end()) {
|
||||
DEBUG_PRINTF("%u is odd\n", h[s].index);
|
||||
continue;
|
||||
}
|
||||
} else if (cr.isSubsetOf(UTF_THREE_START_CR)) {
|
||||
NFAVertex m = *adjacent_vertices(s, h).first;
|
||||
|
||||
if (h[m].char_reach != UTF_CONT_CR
|
||||
|| out_degree(m, h) != 1) {
|
||||
continue;
|
||||
}
|
||||
if (end_siblings.find(*adjacent_vertices(m, h).first)
|
||||
== end_siblings.end()) {
|
||||
DEBUG_PRINTF("%u is odd\n", h[s].index);
|
||||
continue;
|
||||
}
|
||||
} else if (cr.isSubsetOf(UTF_FOUR_START_CR)) {
|
||||
NFAVertex m1 = *adjacent_vertices(s, h).first;
|
||||
|
||||
if (h[m1].char_reach != UTF_CONT_CR
|
||||
|| out_degree(m1, h) != 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
NFAVertex m2 = *adjacent_vertices(m1, h).first;
|
||||
|
||||
if (h[m2].char_reach != UTF_CONT_CR
|
||||
|| out_degree(m2, h) != 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (end_siblings.find(*adjacent_vertices(m2, h).first)
|
||||
== end_siblings.end()) {
|
||||
DEBUG_PRINTF("%u is odd\n", h[s].index);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
DEBUG_PRINTF("%u is bad\n", h[s].index);
|
||||
continue;
|
||||
}
|
||||
|
||||
v_cr |= cr;
|
||||
clear_vertex(s, h);
|
||||
changes = true;
|
||||
}
|
||||
|
||||
if (changes) {
|
||||
v_cr |= UTF_CONT_CR; /* we need to add in cont reach */
|
||||
v_cr.set(0xc0); /* we can also add in the forbidden bytes as we require
|
||||
* valid unicode data */
|
||||
v_cr.set(0xc1);
|
||||
v_cr |= CharReach(0xf5, 0xff);
|
||||
}
|
||||
|
||||
return changes;
|
||||
}
|
||||
|
||||
/** \brief Contract cycles of UTF-8 code points down to a single cyclic vertex
|
||||
* where possible, based on the assumption that we will always be matching
|
||||
* against well-formed input. */
|
||||
void utf8DotRestoration(NGHolder &h, bool som) {
|
||||
vector<NFAVertex> seeds; /* cyclic ascii vertices */
|
||||
findSeeds(h, som, &seeds);
|
||||
|
||||
bool changes = false;
|
||||
for (auto v : seeds) {
|
||||
changes |= expandCyclic(h, v);
|
||||
}
|
||||
|
||||
if (changes) {
|
||||
pruneUseless(h);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user