mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
smallwrite: prune overlong nfa graphs early
This commit is contained in:
parent
21a1b47637
commit
c67a361080
@ -36,10 +36,11 @@
|
|||||||
#include "nfa/rdfa_merge.h"
|
#include "nfa/rdfa_merge.h"
|
||||||
#include "nfa/shengcompile.h"
|
#include "nfa/shengcompile.h"
|
||||||
#include "nfagraph/ng.h"
|
#include "nfagraph/ng.h"
|
||||||
|
#include "nfagraph/ng_depth.h"
|
||||||
#include "nfagraph/ng_holder.h"
|
#include "nfagraph/ng_holder.h"
|
||||||
#include "nfagraph/ng_mcclellan.h"
|
#include "nfagraph/ng_mcclellan.h"
|
||||||
|
#include "nfagraph/ng_prune.h"
|
||||||
#include "nfagraph/ng_util.h"
|
#include "nfagraph/ng_util.h"
|
||||||
#include "nfagraph/ng_width.h"
|
|
||||||
#include "smallwrite/smallwrite_internal.h"
|
#include "smallwrite/smallwrite_internal.h"
|
||||||
#include "util/alloc.h"
|
#include "util/alloc.h"
|
||||||
#include "util/charreach.h"
|
#include "util/charreach.h"
|
||||||
@ -101,6 +102,74 @@ SmallWriteBuildImpl::SmallWriteBuildImpl(size_t num_patterns,
|
|||||||
|| num_patterns > cc.grey.smallWriteMaxPatterns) {
|
|| num_patterns > cc.grey.smallWriteMaxPatterns) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Remove any reports from the given vertex that cannot match within
|
||||||
|
* max_depth due to their constraints.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
bool pruneOverlongReports(NFAVertex v, NGHolder &g, const depth &max_depth,
|
||||||
|
const ReportManager &rm) {
|
||||||
|
assert(!g[v].reports.empty());
|
||||||
|
|
||||||
|
vector<ReportID> bad_reports;
|
||||||
|
|
||||||
|
for (ReportID id : g[v].reports) {
|
||||||
|
const auto &report = rm.getReport(id);
|
||||||
|
if (report.minOffset > max_depth) {
|
||||||
|
bad_reports.push_back(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ReportID id : bad_reports) {
|
||||||
|
g[v].reports.erase(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g[v].reports.empty()) {
|
||||||
|
DEBUG_PRINTF("none of vertex %u's reports can match, cut accepts\n",
|
||||||
|
g[v].index);
|
||||||
|
remove_edge(v, g.accept, g);
|
||||||
|
remove_edge(v, g.acceptEod, g);
|
||||||
|
}
|
||||||
|
|
||||||
|
return !bad_reports.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Prune vertices and reports from the graph that cannot match within
|
||||||
|
* max_depth.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
bool pruneOverlong(NGHolder &g, const depth &max_depth,
|
||||||
|
const ReportManager &rm) {
|
||||||
|
bool modified = false;
|
||||||
|
std::vector<NFAVertexDepth> depths;
|
||||||
|
calcDepths(g, depths);
|
||||||
|
|
||||||
|
for (auto v : vertices_range(g)) {
|
||||||
|
if (is_special(v, g)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto &d = depths.at(g[v].index);
|
||||||
|
depth min_depth = min(d.fromStart.min, d.fromStartDotStar.min);
|
||||||
|
if (min_depth > max_depth) {
|
||||||
|
clear_vertex(v, g);
|
||||||
|
modified = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_match_vertex(v, g)) {
|
||||||
|
modified |= pruneOverlongReports(v, g, max_depth, rm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (modified) {
|
||||||
|
pruneUseless(g);
|
||||||
|
DEBUG_PRINTF("pruned graph down to %zu vertices\n", num_vertices(g));
|
||||||
|
}
|
||||||
|
|
||||||
|
return modified;
|
||||||
|
}
|
||||||
|
|
||||||
void SmallWriteBuildImpl::add(const NGWrapper &w) {
|
void SmallWriteBuildImpl::add(const NGWrapper &w) {
|
||||||
// If the graph is poisoned (i.e. we can't build a SmallWrite version),
|
// If the graph is poisoned (i.e. we can't build a SmallWrite version),
|
||||||
// we don't even try.
|
// we don't even try.
|
||||||
@ -118,13 +187,12 @@ void SmallWriteBuildImpl::add(const NGWrapper &w) {
|
|||||||
// make a copy of the graph so that we can modify it for our purposes
|
// make a copy of the graph so that we can modify it for our purposes
|
||||||
unique_ptr<NGHolder> h = cloneHolder(w);
|
unique_ptr<NGHolder> h = cloneHolder(w);
|
||||||
|
|
||||||
|
pruneOverlong(*h, depth(cc.grey.smallWriteLargestBuffer), rm);
|
||||||
|
|
||||||
reduceGraph(*h, SOM_NONE, w.utf8, cc);
|
reduceGraph(*h, SOM_NONE, w.utf8, cc);
|
||||||
|
|
||||||
// If the earliest match location is outside the small write region,
|
if (can_never_match(*h)) {
|
||||||
// then we don't need to build a SmallWrite version.
|
DEBUG_PRINTF("graph can never match in small block\n");
|
||||||
// However, we don't poison this case either, since it is simply a case,
|
|
||||||
// where we know the resulting graph won't match.
|
|
||||||
if (findMinWidth(*h) > depth(cc.grey.smallWriteLargestBuffer)) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user