mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
2003 lines
65 KiB
C++
2003 lines
65 KiB
C++
/*
|
|
* Copyright (c) 2015-2018, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "rose_build_add_internal.h"
|
|
#include "rose_build_impl.h"
|
|
|
|
#include "ue2common.h"
|
|
#include "grey.h"
|
|
#include "rose_build_anchored.h"
|
|
#include "rose_in_util.h"
|
|
#include "hwlm/hwlm_literal.h"
|
|
#include "nfa/goughcompile.h"
|
|
#include "nfa/nfa_api_queue.h"
|
|
#include "nfagraph/ng_depth.h"
|
|
#include "nfagraph/ng_dump.h"
|
|
#include "nfagraph/ng_holder.h"
|
|
#include "nfagraph/ng_limex.h"
|
|
#include "nfagraph/ng_mcclellan.h"
|
|
#include "nfagraph/ng_prefilter.h"
|
|
#include "nfagraph/ng_prune.h"
|
|
#include "nfagraph/ng_region.h"
|
|
#include "nfagraph/ng_repeat.h"
|
|
#include "nfagraph/ng_reports.h"
|
|
#include "nfagraph/ng_util.h"
|
|
#include "nfagraph/ng_width.h"
|
|
#include "util/charreach.h"
|
|
#include "util/charreach_util.h"
|
|
#include "util/compare.h"
|
|
#include "util/compile_context.h"
|
|
#include "util/container.h"
|
|
#include "util/dump_charclass.h"
|
|
#include "util/graph_range.h"
|
|
#include "util/insertion_ordered.h"
|
|
#include "util/noncopyable.h"
|
|
#include "util/order_check.h"
|
|
#include "util/report_manager.h"
|
|
#include "util/ue2string.h"
|
|
#include "util/verify_types.h"
|
|
|
|
#include <algorithm>
|
|
#include <map>
|
|
#include <set>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <utility>
|
|
|
|
using namespace std;
|
|
|
|
namespace ue2 {
|
|
|
|
/**
|
|
* \brief Data used by most of the construction code in this file.
|
|
*/
|
|
struct RoseBuildData : noncopyable {
|
|
RoseBuildData(const RoseInGraph &ig_in, bool som_in)
|
|
: ig(ig_in), som(som_in) {}
|
|
|
|
/** Input rose graph. */
|
|
const RoseInGraph &ig;
|
|
|
|
/** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which
|
|
* require ANCH history to prevent overlap. */
|
|
unordered_set<RoseInEdge> anch_history_edges;
|
|
|
|
/** True if we're tracking Start of Match. */
|
|
bool som;
|
|
};
|
|
|
|
static
|
|
ReportID findReportId(const NGHolder &g) {
|
|
/* prefix/infix always have an edge to accept and only 1 reportid initially
|
|
*/
|
|
assert(in_degree(g.accept, g));
|
|
const auto &rep = g[*inv_adjacent_vertices(g.accept, g).first].reports;
|
|
assert(!rep.empty());
|
|
return *rep.begin();
|
|
}
|
|
|
|
static
|
|
RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset,
|
|
u32 max_offset) {
|
|
RoseGraph &g = build->g;
|
|
// add to tree
|
|
RoseVertex v = add_vertex(g);
|
|
g[v].min_offset = min_offset;
|
|
g[v].max_offset = max_offset;
|
|
|
|
DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].index,
|
|
literalId);
|
|
g[v].literals.insert(literalId);
|
|
build->literal_info[literalId].vertices.insert(v);
|
|
|
|
return v;
|
|
}
|
|
|
|
RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
|
|
u32 minBound, u32 maxBound, u32 literalId,
|
|
size_t literalLength,
|
|
const flat_set<ReportID> &reports) {
|
|
assert(parent != RoseGraph::null_vertex());
|
|
|
|
RoseGraph &g = build->g;
|
|
// add to tree (offsets set latter)
|
|
RoseVertex v = createVertex(build, literalId, 0U, 0U);
|
|
|
|
/* fill in report information */
|
|
g[v].reports.insert(reports.begin(), reports.end());
|
|
|
|
RoseEdge e = add_edge(parent, v, g);
|
|
DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound);
|
|
|
|
g[e].minBound = minBound;
|
|
g[e].maxBound = maxBound;
|
|
g[e].rose_top = 0;
|
|
|
|
u32 min_offset = add_rose_depth(g[parent].min_offset, minBound);
|
|
u32 max_offset = add_rose_depth(g[parent].max_offset, maxBound);
|
|
|
|
/* take literal length into account for offsets */
|
|
const u32 lit_len = verify_u32(literalLength);
|
|
min_offset = add_rose_depth(min_offset, lit_len);
|
|
max_offset = add_rose_depth(max_offset, lit_len);
|
|
|
|
g[v].min_offset = min_offset;
|
|
g[v].max_offset = max_offset;
|
|
|
|
return v;
|
|
}
|
|
|
|
static
|
|
RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId,
|
|
u32 min_offset, u32 max_offset) {
|
|
RoseGraph &g = build->g;
|
|
RoseVertex v = createVertex(build, literalId, min_offset, max_offset);
|
|
|
|
DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index,
|
|
literalId);
|
|
|
|
RoseEdge e = add_edge(build->anchored_root, v, g);
|
|
g[e].minBound = min_offset;
|
|
g[e].maxBound = max_offset;
|
|
|
|
return v;
|
|
}
|
|
|
|
static
|
|
RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) {
|
|
RoseGraph &g = build->g;
|
|
RoseVertex w = add_vertex(g[v], g);
|
|
DEBUG_PRINTF("added vertex %zu\n", g[w].index);
|
|
|
|
for (auto lit_id : g[w].literals) {
|
|
build->literal_info[lit_id].vertices.insert(w);
|
|
}
|
|
|
|
for (const auto &e : in_edges_range(v, g)) {
|
|
RoseVertex s = source(e, g);
|
|
add_edge(s, w, g[e], g);
|
|
DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].index, g[w].index);
|
|
}
|
|
|
|
return w;
|
|
}
|
|
|
|
namespace {
|
|
struct created_key {
|
|
explicit created_key(const RoseInEdgeProps &trep)
|
|
: prefix(trep.graph.get()), lag(trep.graph_lag) {
|
|
}
|
|
bool operator<(const created_key &b) const {
|
|
const created_key &a = *this;
|
|
ORDER_CHECK(prefix);
|
|
ORDER_CHECK(lag);
|
|
return false;
|
|
}
|
|
NGHolder *prefix;
|
|
u32 lag;
|
|
};
|
|
}
|
|
|
|
static
|
|
bool isPureAnchored(const NGHolder &h) {
|
|
return !proper_out_degree(h.startDs, h);
|
|
}
|
|
|
|
static
|
|
RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
|
|
const RoseInEdge &rose_edge, const RoseEdge &e) {
|
|
const RoseGraph &g = tbi.g;
|
|
const RoseVertex u = source(e, g), v = target(e, g);
|
|
const bool fixed_offset_src = g[u].fixedOffset();
|
|
const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
|
|
|
|
/*DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
|
|
g[u].index, g[v].index, g[e].minBound, g[e].maxBound,
|
|
(int)g[u].fixedOffset(), (int)g[v].left);*/
|
|
|
|
if (g[v].left) {
|
|
// Roles with prefix engines have their history handled by that prefix.
|
|
assert(!contains(bd.anch_history_edges, rose_edge));
|
|
return ROSE_ROLE_HISTORY_NONE;
|
|
}
|
|
|
|
if (contains(bd.anch_history_edges, rose_edge)) {
|
|
DEBUG_PRINTF("needs anch history\n");
|
|
return ROSE_ROLE_HISTORY_ANCH;
|
|
}
|
|
|
|
if (fixed_offset_src && has_bounds) {
|
|
DEBUG_PRINTF("needs anch history\n");
|
|
return ROSE_ROLE_HISTORY_ANCH;
|
|
}
|
|
|
|
return ROSE_ROLE_HISTORY_NONE;
|
|
}
|
|
|
|
static
|
|
bool hasSuccessorLiterals(RoseInVertex iv, const RoseInGraph &ig) {
|
|
// cppcheck-suppress useStlAlgorithm
|
|
for (auto v : adjacent_vertices_range(iv, ig)) {
|
|
if (ig[v].type != RIV_ACCEPT) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static
|
|
void createVertices(RoseBuildImpl *tbi,
|
|
map<RoseInVertex, vector<RoseVertex> > &vertex_map,
|
|
const vector<pair<RoseVertex, RoseInEdge> > &parents,
|
|
RoseInVertex iv, u32 min_offset, u32 max_offset,
|
|
u32 literalId, u32 delay, const RoseBuildData &bd) {
|
|
RoseGraph &g = tbi->g;
|
|
|
|
DEBUG_PRINTF("vertex has %zu parents\n", parents.size());
|
|
|
|
map<created_key, RoseVertex> created;
|
|
|
|
for (const auto &pv : parents) {
|
|
RoseVertex w;
|
|
const RoseInEdgeProps &edge_props = bd.ig[pv.second];
|
|
shared_ptr<NGHolder> prefix_graph = edge_props.graph;
|
|
u32 prefix_lag = edge_props.graph_lag;
|
|
|
|
created_key key(edge_props);
|
|
|
|
if (!contains(created, key)) {
|
|
assert(prefix_graph || !edge_props.haig);
|
|
w = createVertex(tbi, literalId, min_offset, max_offset);
|
|
created[key] = w;
|
|
|
|
if (prefix_graph) {
|
|
g[w].left.graph = prefix_graph;
|
|
if (edge_props.dfa) {
|
|
g[w].left.dfa = edge_props.dfa;
|
|
}
|
|
g[w].left.haig = edge_props.haig;
|
|
g[w].left.lag = prefix_lag;
|
|
|
|
// The graph already has its report id allocated - find it.
|
|
g[w].left.leftfix_report = findReportId(*prefix_graph);
|
|
|
|
if (g[w].left.dfa || g[w].left.haig) {
|
|
assert(prefix_graph);
|
|
g[w].left.dfa_min_width = findMinWidth(*prefix_graph);
|
|
g[w].left.dfa_max_width = findMaxWidth(*prefix_graph);
|
|
}
|
|
}
|
|
|
|
if (bd.som && !g[w].left.haig) {
|
|
/* no prefix - som based on literal start */
|
|
assert(!prefix_graph);
|
|
g[w].som_adjust = tbi->literals.at(literalId).elength();
|
|
DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust);
|
|
}
|
|
|
|
DEBUG_PRINTF(" adding new vertex index=%zu\n", tbi->g[w].index);
|
|
vertex_map[iv].emplace_back(w);
|
|
} else {
|
|
w = created[key];
|
|
}
|
|
|
|
RoseVertex p = pv.first;
|
|
|
|
RoseEdge e = add_edge(p, w, g);
|
|
DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound,
|
|
edge_props.maxBound);
|
|
g[e].minBound = edge_props.minBound;
|
|
if (p != tbi->root && g[w].left.graph
|
|
&& (!tbi->isAnyStart(p) || isPureAnchored(*g[w].left.graph))) {
|
|
depth mw = findMaxWidth(*g[w].left.graph);
|
|
if (mw.is_infinite()) {
|
|
g[e].maxBound = ROSE_BOUND_INF;
|
|
} else {
|
|
DEBUG_PRINTF("setting max to %s + %u\n", mw.str().c_str(),
|
|
prefix_lag);
|
|
g[e].maxBound = prefix_lag + mw;
|
|
}
|
|
} else {
|
|
g[e].maxBound = edge_props.maxBound;
|
|
}
|
|
g[e].rose_top = 0;
|
|
g[e].history = selectHistory(*tbi, bd, pv.second, e);
|
|
}
|
|
|
|
if (delay && hasSuccessorLiterals(iv, bd.ig)) {
|
|
// Add an undelayed "ghost" vertex for this literal.
|
|
u32 ghostId = tbi->literal_info[literalId].undelayed_id;
|
|
DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId);
|
|
assert(ghostId != literalId);
|
|
assert(tbi->literals.at(ghostId).delay == 0);
|
|
|
|
// Adjust offsets, removing delay.
|
|
u32 ghost_min = min_offset, ghost_max = max_offset;
|
|
assert(ghost_min < ROSE_BOUND_INF && ghost_min >= delay);
|
|
ghost_min -= delay;
|
|
ghost_max -= ghost_max == ROSE_BOUND_INF ? 0 : delay;
|
|
|
|
RoseVertex g_v = createVertex(tbi, ghostId, ghost_min, ghost_max);
|
|
|
|
for (const auto &pv : parents) {
|
|
const RoseInEdgeProps &edge_props = bd.ig[pv.second];
|
|
RoseEdge e = add_edge(pv.first, g_v, tbi->g);
|
|
g[e].minBound = edge_props.minBound;
|
|
g[e].maxBound = edge_props.maxBound;
|
|
g[e].history = selectHistory(*tbi, bd, pv.second, e);
|
|
DEBUG_PRINTF("parent edge has bounds [%u,%u]\n",
|
|
edge_props.minBound, edge_props.maxBound);
|
|
}
|
|
|
|
for (auto &m : created) {
|
|
tbi->ghost[m.second] = g_v;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* ensure the holder does not accept any paths which do not end with lit */
|
|
static
|
|
void removeFalsePaths(NGHolder &g, const ue2_literal &lit) {
|
|
DEBUG_PRINTF("strip '%s'\n", dumpString(lit).c_str());
|
|
set<NFAVertex> curr, next;
|
|
curr.insert(g.accept);
|
|
curr.insert(g.acceptEod);
|
|
|
|
for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
|
|
next.clear();
|
|
for (auto curr_v : curr) {
|
|
DEBUG_PRINTF("handling %zu\n", g[curr_v].index);
|
|
vector<NFAVertex> next_cand;
|
|
insert(&next_cand, next_cand.end(),
|
|
inv_adjacent_vertices(curr_v, g));
|
|
clear_in_edges(curr_v, g);
|
|
if (curr_v == g.acceptEod) {
|
|
add_edge(g.accept, g.acceptEod, g);
|
|
}
|
|
|
|
for (auto v : next_cand) {
|
|
assert(v != g.startDs);
|
|
if (v == g.start || v == g.startDs || v == g.accept) {
|
|
continue;
|
|
}
|
|
|
|
const CharReach &cr = g[v].char_reach;
|
|
|
|
if (!overlaps(*it, cr)) {
|
|
DEBUG_PRINTF("false edge %zu\n", g[v].index);
|
|
continue;
|
|
}
|
|
|
|
NFAVertex v2 = clone_vertex(g, v);
|
|
clone_in_edges(g, v, v2);
|
|
add_edge(v2, curr_v, g);
|
|
g[v2].char_reach &= *it;
|
|
DEBUG_PRINTF("next <- %zu\n", g[v2].index);
|
|
next.insert(v2);
|
|
}
|
|
}
|
|
|
|
curr.swap(next);
|
|
}
|
|
|
|
pruneUseless(g);
|
|
clearReports(g);
|
|
assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1);
|
|
assert(allMatchStatesHaveReports(g));
|
|
|
|
DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g));
|
|
}
|
|
|
|
static
|
|
RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
|
|
const RoseInVertexProps &iv_info,
|
|
const RoseInEdgeProps &ep) {
|
|
if (ep.graph_lag && ep.graph_lag != iv_info.s.length()) {
|
|
DEBUG_PRINTF("bad lag %u != %zu\n", ep.graph_lag, iv_info.s.length());
|
|
return RoseGraph::null_vertex(); /* TODO: better */
|
|
}
|
|
|
|
const depth anchored_max_depth(tbi->cc.grey.maxAnchoredRegion);
|
|
depth min_width(0), max_width(0);
|
|
|
|
if (ep.graph.get()) {
|
|
const depth graph_lag(ep.graph_lag);
|
|
max_width = findMaxWidth(*ep.graph) + graph_lag;
|
|
min_width = findMinWidth(*ep.graph) + graph_lag;
|
|
if (proper_out_degree(ep.graph->startDs, *ep.graph)) {
|
|
max_width = depth::infinity();
|
|
}
|
|
}
|
|
|
|
DEBUG_PRINTF("mw = %s; lag = %u\n", max_width.str().c_str(), ep.graph_lag);
|
|
|
|
NGHolder h;
|
|
|
|
if (ep.graph.get() && max_width <= anchored_max_depth) {
|
|
cloneHolder(h, *ep.graph);
|
|
|
|
/* add literal/dots */
|
|
if (ep.graph_lag) {
|
|
assert(ep.graph_lag == iv_info.s.length());
|
|
appendLiteral(h, iv_info.s);
|
|
} else {
|
|
removeFalsePaths(h, iv_info.s);
|
|
}
|
|
} else if (!ep.graph.get() && ep.maxBound < ROSE_BOUND_INF
|
|
&& iv_info.s.length() + ep.maxBound
|
|
<= tbi->cc.grey.maxAnchoredRegion) {
|
|
if (ep.maxBound || ep.minBound) {
|
|
/* TODO: handle, however these cases are not generated currently by
|
|
ng_violet */
|
|
return RoseGraph::null_vertex();
|
|
}
|
|
max_width = depth(ep.maxBound + iv_info.s.length());
|
|
min_width = depth(ep.minBound + iv_info.s.length());
|
|
add_edge(h.start, h.accept, h);
|
|
appendLiteral(h, iv_info.s);
|
|
} else {
|
|
return RoseGraph::null_vertex();
|
|
}
|
|
|
|
u32 anchored_exit_id = tbi->getNewLiteralId();
|
|
u32 remap_id = 0;
|
|
DEBUG_PRINTF(" trying to add dfa stuff\n");
|
|
int rv = addToAnchoredMatcher(*tbi, h, anchored_exit_id, &remap_id);
|
|
|
|
if (rv == ANCHORED_FAIL) {
|
|
return RoseGraph::null_vertex();
|
|
} else if (rv == ANCHORED_REMAP) {
|
|
anchored_exit_id = remap_id;
|
|
} else {
|
|
assert(rv == ANCHORED_SUCCESS);
|
|
}
|
|
|
|
// Store the literal itself in a side structure so that we can use it for
|
|
// overlap calculations later. This may be obsolete when the old Rose
|
|
// construction path (and its history selection code) goes away.
|
|
rose_literal_id lit(iv_info.s, ROSE_ANCHORED, 0);
|
|
tbi->anchoredLitSuffix.insert(make_pair(anchored_exit_id, lit));
|
|
|
|
assert(min_width <= anchored_max_depth);
|
|
assert(max_width <= anchored_max_depth);
|
|
assert(min_width <= max_width);
|
|
|
|
/* Note: bounds are end-to-end as anchored lits are considered
|
|
* to have 0 length. */
|
|
RoseVertex v = createAnchoredVertex(tbi, anchored_exit_id, min_width,
|
|
max_width);
|
|
return v;
|
|
}
|
|
|
|
static
|
|
u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep,
|
|
const RoseInVertexProps &succ_vp) {
|
|
/* we need to ensure there is enough history to find the successor literal
|
|
* when we enable its group.
|
|
*/
|
|
|
|
if (!ep.graph.get()) {
|
|
return 0; /* non overlapping */
|
|
}
|
|
depth graph_min_width = findMinWidth(*ep.graph);
|
|
u32 min_width = ep.graph_lag + graph_min_width;
|
|
u32 s_len = succ_vp.s.length();
|
|
|
|
if (s_len <= min_width) {
|
|
return 0; /* no overlap */
|
|
}
|
|
|
|
u32 overlap = s_len - min_width;
|
|
DEBUG_PRINTF("found overlap of %u\n", overlap);
|
|
return overlap;
|
|
}
|
|
|
|
static
|
|
void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk,
|
|
vector<u8> &cmp) {
|
|
if (lag >= HWLM_MASKLEN) {
|
|
msk.clear(); cmp.clear();
|
|
return;
|
|
}
|
|
|
|
assert(in_degree(h.acceptEod, h) == 1); // no eod reports
|
|
|
|
// Start with the set of reporter vertices for this rose.
|
|
set<NFAVertex> curr, next;
|
|
insert(&curr, inv_adjacent_vertices(h.accept, h));
|
|
assert(!curr.empty());
|
|
|
|
msk.assign(HWLM_MASKLEN, 0);
|
|
cmp.assign(HWLM_MASKLEN, 0);
|
|
size_t i = HWLM_MASKLEN - lag - 1;
|
|
do {
|
|
if (curr.empty() || contains(curr, h.start) ||
|
|
contains(curr, h.startDs)) {
|
|
DEBUG_PRINTF("end of the road\n");
|
|
break;
|
|
}
|
|
|
|
next.clear();
|
|
CharReach cr;
|
|
for (auto v : curr) {
|
|
DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index,
|
|
describeClass(h[v].char_reach).c_str());
|
|
cr |= h[v].char_reach;
|
|
insert(&next, inv_adjacent_vertices(v, h));
|
|
}
|
|
make_and_cmp_mask(cr, &msk[i], &cmp[i]);
|
|
DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
|
|
describeClass(cr).c_str(), msk.at(i), cmp.at(i));
|
|
curr.swap(next);
|
|
} while (i-- > 0);
|
|
}
|
|
|
|
static
|
|
void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
|
|
map<RoseInVertex, vector<RoseVertex> > &vertex_map,
|
|
const vector<pair<RoseVertex, RoseInEdge> > &parents,
|
|
RoseInVertex iv, const RoseBuildData &bd) {
|
|
const RoseInGraph &ig = bd.ig;
|
|
const RoseInVertexProps &iv_info = ig[iv];
|
|
assert(iv_info.type == RIV_LITERAL);
|
|
assert(!parents.empty()); /* start vertices should not be here */
|
|
|
|
// ng_violet should have ensured that mixed-sensitivity literals are no
|
|
// longer than the benefits max width.
|
|
assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
|
|
!mixed_sensitivity(iv_info.s));
|
|
|
|
// Rose graph construction process should have given us a min_offset.
|
|
assert(iv_info.min_offset > 0);
|
|
|
|
if (use_eod_table) {
|
|
goto floating;
|
|
}
|
|
|
|
DEBUG_PRINTF("rose find vertex\n");
|
|
if (parents.size() == 1) {
|
|
const RoseVertex u = parents.front().first;
|
|
const RoseInEdgeProps &ep = ig[parents.front().second];
|
|
|
|
if (!tbi->isAnyStart(u)) {
|
|
goto floating;
|
|
}
|
|
|
|
if (!ep.graph && ep.maxBound == ROSE_BOUND_INF) {
|
|
goto floating;
|
|
}
|
|
if (ep.graph && !isAnchored(*ep.graph)) {
|
|
goto floating;
|
|
}
|
|
|
|
DEBUG_PRINTF("cand for anchored maxBound %u, %p (%d)\n", ep.maxBound,
|
|
ep.graph.get(), ep.graph ? (int)isAnchored(*ep.graph) : 3);
|
|
|
|
/* need to check if putting iv into the anchored table would create
|
|
* any bad_overlap relationships with its successor literals */
|
|
for (const auto &e : out_edges_range(iv, ig)) {
|
|
RoseInVertex t = target(e, ig);
|
|
u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[t]);
|
|
DEBUG_PRINTF("found overlap of %u\n", overlap);
|
|
if (overlap > tbi->cc.grey.maxHistoryAvailable + 1) {
|
|
goto floating;
|
|
}
|
|
}
|
|
|
|
RoseVertex v = tryForAnchoredVertex(tbi, iv_info, ep);
|
|
if (v != RoseGraph::null_vertex()) {
|
|
DEBUG_PRINTF("add anchored literal vertex\n");
|
|
vertex_map[iv].emplace_back(v);
|
|
return;
|
|
}
|
|
}
|
|
|
|
floating:
|
|
vector<u8> msk, cmp;
|
|
if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) {
|
|
RoseInEdge e = *in_edges(iv, ig).first;
|
|
if (ig[e].graph) {
|
|
findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp);
|
|
}
|
|
}
|
|
|
|
u32 delay = iv_info.delay;
|
|
rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING;
|
|
|
|
u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table);
|
|
|
|
DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n",
|
|
literalId, iv_info.s.length(), delay, iv_info.min_offset,
|
|
iv_info.max_offset, dumpString(iv_info.s).c_str());
|
|
|
|
createVertices(tbi, vertex_map, parents, iv, iv_info.min_offset,
|
|
iv_info.max_offset, literalId, delay, bd);
|
|
}
|
|
|
|
static
|
|
unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build,
|
|
map<flat_set<ReportID>, ReportID> &remap) {
|
|
assert(generates_callbacks(h));
|
|
assert(!in_degree(h.accept, h));
|
|
auto gg = cloneHolder(h);
|
|
NGHolder &g = *gg;
|
|
g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX;
|
|
|
|
// Move acceptEod edges over to accept.
|
|
vector<NFAEdge> dead;
|
|
for (const auto &e : in_edges_range(g.acceptEod, g)) {
|
|
NFAVertex u = source(e, g);
|
|
if (u == g.accept) {
|
|
continue;
|
|
}
|
|
add_edge_if_not_present(u, g.accept, g);
|
|
dead.emplace_back(e);
|
|
|
|
if (!contains(remap, g[u].reports)) {
|
|
remap[g[u].reports] = build.getNewNfaReport();
|
|
}
|
|
|
|
g[u].reports = { remap[g[u].reports] };
|
|
}
|
|
|
|
remove_edges(dead, g);
|
|
return gg;
|
|
}
|
|
|
|
static
|
|
u32 getEodEventID(RoseBuildImpl &build) {
|
|
// Allocate the EOD event if it hasn't been already.
|
|
if (build.eod_event_literal_id == MO_INVALID_IDX) {
|
|
build.eod_event_literal_id = build.getLiteralId({}, 0, ROSE_EVENT);
|
|
}
|
|
|
|
return build.eod_event_literal_id;
|
|
}
|
|
|
|
static
|
|
void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u,
|
|
const NGHolder &h) {
|
|
assert(!build.isInETable(u));
|
|
|
|
RoseGraph &g = build.g;
|
|
map<flat_set<ReportID>, ReportID> report_remap;
|
|
shared_ptr<NGHolder> eod_leftfix
|
|
= makeRoseEodPrefix(h, build, report_remap);
|
|
|
|
u32 eod_event = getEodEventID(build);
|
|
|
|
for (const auto &report_mapping : report_remap) {
|
|
RoseVertex v = add_vertex(g);
|
|
g[v].literals.insert(eod_event);
|
|
build.literal_info[eod_event].vertices.insert(v);
|
|
|
|
g[v].left.graph = eod_leftfix;
|
|
g[v].left.leftfix_report = report_mapping.second;
|
|
g[v].left.lag = 0;
|
|
RoseEdge e1 = add_edge(u, v, g);
|
|
g[e1].minBound = 0;
|
|
g[e1].maxBound = ROSE_BOUND_INF;
|
|
g[v].min_offset = add_rose_depth(g[u].min_offset,
|
|
findMinWidth(*g[v].left.graph));
|
|
g[v].max_offset = ROSE_BOUND_INF;
|
|
|
|
depth max_width = findMaxWidth(*g[v].left.graph);
|
|
if (u != build.root && max_width.is_finite()
|
|
&& (!build.isAnyStart(u) || isPureAnchored(*g[v].left.graph))) {
|
|
g[e1].maxBound = max_width;
|
|
g[v].max_offset = add_rose_depth(g[u].max_offset, max_width);
|
|
}
|
|
|
|
g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix
|
|
RoseVertex w = add_vertex(g);
|
|
g[w].eod_accept = true;
|
|
g[w].reports = report_mapping.first;
|
|
g[w].min_offset = g[v].min_offset;
|
|
g[w].max_offset = g[v].max_offset;
|
|
RoseEdge e = add_edge(v, w, g);
|
|
g[e].minBound = 0;
|
|
g[e].maxBound = 0;
|
|
/* No need to set history as the event is only delivered at the last
|
|
* byte anyway - no need to invalidate stale entries. */
|
|
g[e].history = ROSE_ROLE_HISTORY_NONE;
|
|
DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index);
|
|
}
|
|
}
|
|
|
|
static
|
|
void doRoseAcceptVertex(RoseBuildImpl *tbi,
|
|
const vector<pair<RoseVertex, RoseInEdge> > &parents,
|
|
RoseInVertex iv, const RoseBuildData &bd) {
|
|
const RoseInGraph &ig = bd.ig;
|
|
assert(ig[iv].type == RIV_ACCEPT || ig[iv].type == RIV_ACCEPT_EOD);
|
|
|
|
RoseGraph &g = tbi->g;
|
|
|
|
for (const auto &pv : parents) {
|
|
RoseVertex u = pv.first;
|
|
const RoseInEdgeProps &edge_props = bd.ig[pv.second];
|
|
|
|
/* We need to duplicate the parent vertices if:
|
|
*
|
|
* 1) It already has a suffix, etc as we are going to add the specified
|
|
* suffix, etc to the parents and we do not want to overwrite the
|
|
* existing information.
|
|
*
|
|
* 2) We are making the an EOD accept and the vertex already has other
|
|
* out-edges - The LAST_BYTE history used for EOD accepts is
|
|
* incompatible with normal successors. As accepts are processed last we
|
|
* do not need to worry about other normal successors being added later.
|
|
*/
|
|
if (g[u].suffix || !g[u].reports.empty()
|
|
|| (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g)
|
|
&& !edge_props.graph)
|
|
|| (!isLeafNode(u, g) && !tbi->isAnyStart(u))) {
|
|
DEBUG_PRINTF("duplicating for parent %zu\n", g[u].index);
|
|
assert(!tbi->isAnyStart(u));
|
|
u = duplicate(tbi, u);
|
|
g[u].suffix.reset();
|
|
g[u].eod_accept = false;
|
|
}
|
|
|
|
assert(!g[u].suffix);
|
|
if (ig[iv].type == RIV_ACCEPT) {
|
|
assert(!tbi->isAnyStart(u));
|
|
if (edge_props.dfa) {
|
|
DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index);
|
|
g[u].suffix.rdfa = edge_props.dfa;
|
|
g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph);
|
|
g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph);
|
|
} else if (edge_props.graph) {
|
|
DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
|
|
g[u].suffix.graph = edge_props.graph;
|
|
assert(g[u].suffix.graph->kind == NFA_SUFFIX);
|
|
/* TODO: set dfa_(min|max)_width */
|
|
} else if (edge_props.haig) {
|
|
DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].index);
|
|
g[u].suffix.haig = edge_props.haig;
|
|
} else {
|
|
DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].index);
|
|
assert(!g[u].eod_accept);
|
|
g[u].reports = ig[iv].reports;
|
|
}
|
|
} else {
|
|
assert(ig[iv].type == RIV_ACCEPT_EOD);
|
|
assert(!edge_props.haig);
|
|
|
|
if (!edge_props.graph) {
|
|
RoseVertex w = add_vertex(g);
|
|
g[w].eod_accept = true;
|
|
g[w].reports = ig[iv].reports;
|
|
g[w].min_offset = g[u].min_offset;
|
|
g[w].max_offset = g[u].max_offset;
|
|
RoseEdge e = add_edge(u, w, g);
|
|
g[e].minBound = 0;
|
|
g[e].maxBound = 0;
|
|
g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
|
|
DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index);
|
|
continue;
|
|
}
|
|
|
|
const NGHolder &h = *edge_props.graph;
|
|
assert(!in_degree(h.accept, h));
|
|
assert(generates_callbacks(h));
|
|
|
|
if (tbi->isInETable(u)) {
|
|
assert(h.kind == NFA_SUFFIX);
|
|
assert(!tbi->isAnyStart(u));
|
|
/* etable can't/shouldn't use eod event */
|
|
DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
|
|
g[u].suffix.graph = edge_props.graph;
|
|
continue;
|
|
}
|
|
|
|
makeEodEventLeftfix(*tbi, u, h);
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
|
|
u32 *max_len, const CompileContext &cc) {
|
|
map<RoseInVertex, u32> max_depth_from_eod;
|
|
*max_len = 0;
|
|
|
|
reverse(topo.begin(), topo.end()); /* we want to start at accept end */
|
|
|
|
for (auto v : topo) {
|
|
u32 v_depth = 0;
|
|
|
|
if (ig[v].type == RIV_ACCEPT) {
|
|
DEBUG_PRINTF("[ACCEPT]\n");
|
|
// cppcheck-suppress useStlAlgorithm
|
|
for (const auto &e : in_edges_range(v, ig)) {
|
|
if (!ig[e].graph || !can_only_match_at_eod(*ig[e].graph)) {
|
|
DEBUG_PRINTF("floating accept\n");
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
switch (ig[v].type) {
|
|
case RIV_LITERAL:
|
|
DEBUG_PRINTF("[LITERAL]\n");
|
|
break;
|
|
case RIV_START:
|
|
DEBUG_PRINTF("[START]\n");
|
|
break;
|
|
case RIV_ANCHORED_START:
|
|
DEBUG_PRINTF("[ANCHOR]\n");
|
|
break;
|
|
case RIV_ACCEPT:
|
|
break;
|
|
case RIV_ACCEPT_EOD:
|
|
DEBUG_PRINTF("[EOD]\n");
|
|
break;
|
|
default:
|
|
assert(0);
|
|
DEBUG_PRINTF("????\n");
|
|
return false;
|
|
}
|
|
|
|
for (const auto &e : out_edges_range(v, ig)) {
|
|
RoseInVertex t = target(e, ig);
|
|
|
|
assert(contains(max_depth_from_eod, t));
|
|
u64a max_width;
|
|
|
|
if (ig[v].type == RIV_START || ig[v].type == RIV_ANCHORED_START) {
|
|
/* start itself doesn't need to be in history buffer
|
|
* just need to make sure all succ literals are ok */
|
|
if (ig[t].type == RIV_LITERAL) {
|
|
max_width = ig[t].s.length();
|
|
} else {
|
|
max_width = 0;
|
|
}
|
|
if (ig[e].graph) {
|
|
depth graph_max_width = findMaxWidth(*ig[e].graph);
|
|
DEBUG_PRINTF("graph max width %s, lag %u\n",
|
|
graph_max_width.str().c_str(),
|
|
ig[e].graph_lag);
|
|
if (!graph_max_width.is_finite()) {
|
|
DEBUG_PRINTF("fail due to graph with inf max width\n");
|
|
return false;
|
|
}
|
|
max_width += graph_max_width;
|
|
}
|
|
} else if (ig[e].haig) {
|
|
DEBUG_PRINTF("fail due to haig\n");
|
|
return false;
|
|
} else if (ig[e].graph) {
|
|
depth graph_max_width = findMaxWidth(*ig[e].graph);
|
|
DEBUG_PRINTF("graph max width %s, lag %u\n",
|
|
graph_max_width.str().c_str(), ig[e].graph_lag);
|
|
if (!graph_max_width.is_finite()) {
|
|
DEBUG_PRINTF("fail due to graph with inf max width\n");
|
|
return false;
|
|
}
|
|
max_width = ig[e].graph_lag + graph_max_width;
|
|
} else {
|
|
max_width = ig[e].maxBound;
|
|
if (ig[t].type == RIV_LITERAL) {
|
|
max_width += ig[t].s.length();
|
|
}
|
|
}
|
|
|
|
max_width += max_depth_from_eod[t];
|
|
if (max_width > ROSE_BOUND_INF) {
|
|
max_width = ROSE_BOUND_INF;
|
|
}
|
|
|
|
DEBUG_PRINTF("max_width=%llu\n", max_width);
|
|
|
|
ENSURE_AT_LEAST(&v_depth, (u32)max_width);
|
|
}
|
|
|
|
if (v_depth == ROSE_BOUND_INF
|
|
|| v_depth > cc.grey.maxHistoryAvailable) {
|
|
DEBUG_PRINTF("not suitable for eod table %u\n", v_depth);
|
|
return false;
|
|
}
|
|
|
|
max_depth_from_eod[v] = v_depth;
|
|
ENSURE_AT_LEAST(max_len, v_depth);
|
|
}
|
|
|
|
DEBUG_PRINTF("to the eod table and beyond\n");
|
|
return true;
|
|
}
|
|
|
|
static
|
|
void shift_accepts_to_end(const RoseInGraph &ig,
|
|
vector<RoseInVertex> &topo_order) {
|
|
stable_partition(begin(topo_order), end(topo_order),
|
|
[&](RoseInVertex v){ return !is_any_accept(v, ig); });
|
|
}
|
|
|
|
static
|
|
void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
|
|
const RoseInGraph &ig = bd.ig;
|
|
|
|
/* add the pattern in to the main rose graph */
|
|
DEBUG_PRINTF("%srose pop\n", bd.som ? "som " : "");
|
|
|
|
/* Note: an input vertex may need to create several rose vertices. This is
|
|
* primarily because a RoseVertex can only have 1 one leftfix */
|
|
map<RoseInVertex, vector<RoseVertex> > vertex_map;
|
|
|
|
vector<RoseInVertex> v_order = topo_order(ig);
|
|
shift_accepts_to_end(ig, v_order);
|
|
|
|
u32 eod_space_required;
|
|
bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required,
|
|
tbi->cc);
|
|
if (use_eod_table) {
|
|
ENSURE_AT_LEAST(&tbi->ematcher_region_size, eod_space_required);
|
|
}
|
|
|
|
assert(ig[v_order.front()].type == RIV_START
|
|
|| ig[v_order.front()].type == RIV_ANCHORED_START);
|
|
|
|
for (RoseInVertex iv : v_order) {
|
|
DEBUG_PRINTF("vertex %zu\n", ig[iv].index);
|
|
|
|
if (ig[iv].type == RIV_START) {
|
|
DEBUG_PRINTF("is root\n");
|
|
vertex_map[iv].emplace_back(tbi->root);
|
|
continue;
|
|
} else if (ig[iv].type == RIV_ANCHORED_START) {
|
|
DEBUG_PRINTF("is anchored root\n");
|
|
vertex_map[iv].emplace_back(tbi->anchored_root);
|
|
continue;
|
|
}
|
|
|
|
vector<pair<RoseVertex, RoseInEdge> > parents;
|
|
for (const auto &e : in_edges_range(iv, ig)) {
|
|
RoseInVertex u = source(e, ig);
|
|
assert(contains(vertex_map, u));
|
|
const vector<RoseVertex> &images = vertex_map[u];
|
|
|
|
// We should have no dupes.
|
|
assert(set<RoseVertex>(images.begin(), images.end()).size()
|
|
== images.size());
|
|
|
|
for (auto v_image : images) {
|
|
// v_image should NOT already be in our parents list.
|
|
assert(find_if(parents.begin(), parents.end(),
|
|
[&v_image](const pair<RoseVertex, RoseInEdge> &p) {
|
|
return p.first == v_image;
|
|
}) == parents.end());
|
|
|
|
parents.emplace_back(v_image, e);
|
|
|
|
if (tbi->isAnchored(v_image)) {
|
|
assert(!use_eod_table);
|
|
u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[iv]);
|
|
assert(overlap <= tbi->cc.grey.maxHistoryAvailable + 1);
|
|
ENSURE_AT_LEAST(&tbi->max_rose_anchored_floating_overlap,
|
|
overlap);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (ig[iv].type == RIV_LITERAL) {
|
|
DEBUG_PRINTF("LITERAL '%s'\n", dumpString(ig[iv].s).c_str());
|
|
assert(!isLeafNode(iv, ig));
|
|
doRoseLiteralVertex(tbi, use_eod_table, vertex_map, parents, iv,
|
|
bd);
|
|
} else {
|
|
if (ig[iv].type == RIV_ACCEPT) {
|
|
DEBUG_PRINTF("ACCEPT\n");
|
|
} else {
|
|
assert(ig[iv].type == RIV_ACCEPT_EOD);
|
|
DEBUG_PRINTF("ACCEPT_EOD\n");
|
|
}
|
|
assert(isLeafNode(iv, ig)); /* accepts are final */
|
|
doRoseAcceptVertex(tbi, parents, iv, bd);
|
|
}
|
|
}
|
|
DEBUG_PRINTF("done\n");
|
|
}
|
|
|
|
template<typename GraphT>
|
|
static
|
|
bool empty(const GraphT &g) {
|
|
typename GraphT::vertex_iterator vi, ve;
|
|
tie(vi, ve) = vertices(g);
|
|
return vi == ve;
|
|
}
|
|
|
|
static
|
|
bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm,
|
|
const CompileContext &cc) {
|
|
if (isImplementableNFA(h, &rm, cc)) {
|
|
return true;
|
|
}
|
|
|
|
if (prefilter && cc.grey.prefilterReductions) {
|
|
// If we're prefiltering, we can have another go with a reduced graph.
|
|
UNUSED size_t numBefore = num_vertices(h);
|
|
prefilterReductions(h, cc);
|
|
UNUSED size_t numAfter = num_vertices(h);
|
|
DEBUG_PRINTF("reduced from %zu to %zu vertices\n", numBefore, numAfter);
|
|
|
|
if (isImplementableNFA(h, &rm, cc)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
DEBUG_PRINTF("unable to build engine\n");
|
|
return false;
|
|
}
|
|
|
|
static
|
|
bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) {
|
|
assert(in_degree(v, ig));
|
|
|
|
// cppcheck-suppress useStlAlgorithm
|
|
for (const auto &e : in_edges_range(v, ig)) {
|
|
if (ig[e].graph || ig[e].haig) {
|
|
DEBUG_PRINTF("edge graph\n");
|
|
return true;
|
|
}
|
|
if (ig[e].minBound || ig[e].maxBound != ROSE_BOUND_INF) {
|
|
DEBUG_PRINTF("edge bounds\n");
|
|
return true;
|
|
}
|
|
|
|
RoseInVertex u = source(e, ig);
|
|
if (ig[u].type == RIV_START) {
|
|
continue;
|
|
}
|
|
if (ig[u].type != RIV_LITERAL) {
|
|
DEBUG_PRINTF("unsafe pred vertex\n");
|
|
return true;
|
|
}
|
|
if (ig[u].delay) {
|
|
DEBUG_PRINTF("pred has delay\n");
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static
|
|
u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) {
|
|
/* overly conservative if only part of the string is nocase */
|
|
string pred = pred_key.get_string();
|
|
string lit = lit_key.get_string();
|
|
|
|
if (pred_key.any_nocase() || lit_key.any_nocase()) {
|
|
upperString(pred);
|
|
upperString(lit);
|
|
}
|
|
|
|
string::size_type last = pred.rfind(lit);
|
|
if (last == string::npos) {
|
|
return MAX_DELAY;
|
|
}
|
|
|
|
u32 raw = pred.size() - last - 1;
|
|
return MIN(raw, MAX_DELAY);
|
|
}
|
|
|
|
static
|
|
u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseInVertex v) {
|
|
// First, check the overlap constraints on (u,v).
|
|
size_t max_delay;
|
|
if (ig[v].type == RIV_LITERAL) {
|
|
DEBUG_PRINTF("lit->lit edge: '%s' -> '%s'\n",
|
|
escapeString(ig[u].s).c_str(),
|
|
escapeString(ig[v].s).c_str());
|
|
max_delay = maxAvailableDelay(ig[u].s, ig[v].s);
|
|
} else if (ig[v].type == RIV_ACCEPT) {
|
|
DEBUG_PRINTF("lit->accept edge: '%s' -> ACCEPT\n",
|
|
escapeString(ig[u].s).c_str());
|
|
max_delay = MAX_DELAY;
|
|
} else {
|
|
assert(0);
|
|
return 0;
|
|
}
|
|
|
|
DEBUG_PRINTF("max safe delay for this edge: %zu\n", max_delay);
|
|
|
|
// Now consider the predecessors of u.
|
|
for (const auto &e : in_edges_range(u, ig)) {
|
|
RoseInVertex w = source(e, ig);
|
|
if (ig[w].type == RIV_START) {
|
|
continue;
|
|
}
|
|
assert(ig[w].type == RIV_LITERAL);
|
|
assert(ig[w].delay == 0);
|
|
|
|
DEBUG_PRINTF("pred lit->lit edge: '%s' -> '%s'\n",
|
|
escapeString(ig[w].s).c_str(),
|
|
escapeString(ig[u].s).c_str());
|
|
|
|
// We cannot delay the literal on u so much that a predecessor literal
|
|
// could occur in the delayed region. For example, consider
|
|
// 'barman.*foobar': if we allow 'foobar' to be delayed by 3, then
|
|
// 'barman' could occur in the input string and race with 'foobar', as
|
|
// in 'foobarman'.
|
|
|
|
const size_t pred_len = ig[w].s.length();
|
|
size_t overlap = maxOverlap(ig[u].s, ig[w].s, 0);
|
|
DEBUG_PRINTF("pred_len=%zu, overlap=%zu\n", pred_len, overlap);
|
|
assert(overlap <= pred_len);
|
|
size_t max_lit_delay = pred_len - min(overlap + 1, pred_len);
|
|
DEBUG_PRINTF("overlap=%zu -> max_lit_delay=%zu\n", overlap,
|
|
max_lit_delay);
|
|
max_delay = min(max_delay, max_lit_delay);
|
|
}
|
|
|
|
DEBUG_PRINTF("max_delay=%zu\n", max_delay);
|
|
assert(max_delay <= MAX_DELAY);
|
|
return max_delay;
|
|
}
|
|
|
|
static
|
|
bool transformInfixToDelay(const RoseInGraph &ig, const RoseInEdge &e,
|
|
const CompileContext &cc, u32 *delay_out) {
|
|
const u32 max_history =
|
|
cc.streaming ? cc.grey.maxHistoryAvailable : ROSE_BOUND_INF;
|
|
|
|
const RoseInVertex u = source(e, ig), v = target(e, ig);
|
|
const u32 graph_lag = ig[e].graph_lag;
|
|
|
|
// Clone a copy of the graph, as we need to be able to roll back this
|
|
// operation.
|
|
NGHolder h;
|
|
cloneHolder(h, *ig[e].graph);
|
|
|
|
DEBUG_PRINTF("target literal: %s\n", dumpString(ig[v].s).c_str());
|
|
DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
|
|
graph_lag);
|
|
|
|
assert(graph_lag <= ig[v].s.length());
|
|
if (graph_lag < ig[v].s.length()) {
|
|
size_t len = ig[v].s.length() - graph_lag;
|
|
ue2_literal lit(ig[v].s.substr(0, len));
|
|
DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
|
|
u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
|
|
if (delay2 == MO_INVALID_IDX) {
|
|
DEBUG_PRINTF("couldn't remove trailing literal\n");
|
|
return false;
|
|
}
|
|
if (delay2 != len) {
|
|
DEBUG_PRINTF("couldn't remove entire trailing literal\n");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
PureRepeat repeat;
|
|
if (!isPureRepeat(h, repeat)) {
|
|
DEBUG_PRINTF("graph is not repeat\n");
|
|
return false;
|
|
}
|
|
DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
|
|
if (!repeat.bounds.max.is_infinite()) {
|
|
DEBUG_PRINTF("not inf\n");
|
|
return false;
|
|
}
|
|
|
|
if (!repeat.reach.all()) {
|
|
DEBUG_PRINTF("non-dot reach\n");
|
|
return false;
|
|
}
|
|
|
|
u32 delay = ig[v].s.length() + repeat.bounds.min;
|
|
if (delay > MAX_DELAY) {
|
|
DEBUG_PRINTF("delay %u > MAX_DELAY\n", delay);
|
|
return false;
|
|
}
|
|
|
|
if (delay + ig[u].s.length() - 1 > max_history) {
|
|
DEBUG_PRINTF("delay too large for history\n");
|
|
return false;
|
|
}
|
|
|
|
*delay_out = delay;
|
|
return true;
|
|
}
|
|
|
|
static
|
|
void transformLiteralDelay(RoseInGraph &ig, const CompileContext &cc) {
|
|
if (!cc.grey.roseTransformDelay) {
|
|
return;
|
|
}
|
|
|
|
for (auto u : vertices_range(ig)) {
|
|
if (ig[u].type != RIV_LITERAL) {
|
|
continue;
|
|
}
|
|
if (out_degree(u, ig) != 1) {
|
|
continue;
|
|
}
|
|
|
|
RoseInEdge e = *out_edges(u, ig).first;
|
|
RoseInVertex v = target(e, ig);
|
|
if (ig[v].type != RIV_LITERAL) {
|
|
continue;
|
|
}
|
|
if (ig[e].haig) {
|
|
continue;
|
|
}
|
|
if (!ig[e].graph) {
|
|
continue;
|
|
}
|
|
|
|
if (predsAreDelaySensitive(ig, u)) {
|
|
DEBUG_PRINTF("preds are delay sensitive\n");
|
|
continue;
|
|
}
|
|
|
|
u32 max_delay = findMaxSafeDelay(ig, u, v);
|
|
|
|
DEBUG_PRINTF("lit->lit edge with graph: '%s' -> '%s'\n",
|
|
escapeString(ig[u].s).c_str(),
|
|
escapeString(ig[v].s).c_str());
|
|
|
|
u32 delay = 0;
|
|
if (!transformInfixToDelay(ig, e, cc, &delay)) {
|
|
continue;
|
|
}
|
|
|
|
if (delay > max_delay) {
|
|
DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
|
|
continue;
|
|
}
|
|
|
|
DEBUG_PRINTF("setting lit delay to %u and deleting graph\n", delay);
|
|
ig[u].delay = delay;
|
|
ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
|
|
ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
|
|
ig[e].graph_lag = 0;
|
|
ig[e].graph.reset();
|
|
ig[e].minBound = 0;
|
|
ig[e].maxBound = ROSE_BOUND_INF;
|
|
}
|
|
}
|
|
|
|
static
|
|
bool transformInfixToAnchBounds(const RoseInGraph &ig, const RoseInEdge &e,
|
|
const CompileContext &cc, DepthMinMax *bounds) {
|
|
const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
|
|
: ROSE_BOUND_INF;
|
|
|
|
const RoseInVertex v = target(e, ig);
|
|
const u32 graph_lag = ig[e].graph_lag;
|
|
|
|
// Clone a copy of the graph, as we need to be able to roll back this
|
|
// operation.
|
|
NGHolder h;
|
|
cloneHolder(h, *ig[e].graph);
|
|
|
|
DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
|
|
graph_lag);
|
|
|
|
assert(graph_lag <= ig[v].s.length());
|
|
if (graph_lag < ig[v].s.length()) {
|
|
size_t len = ig[v].s.length() - graph_lag;
|
|
ue2_literal lit(ig[v].s.substr(0, len));
|
|
DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
|
|
u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
|
|
if (delay2 == MO_INVALID_IDX) {
|
|
DEBUG_PRINTF("couldn't remove trailing literal\n");
|
|
return false;
|
|
}
|
|
if (delay2 != len) {
|
|
DEBUG_PRINTF("couldn't remove entire trailing literal\n");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
PureRepeat repeat;
|
|
if (!isPureRepeat(h, repeat)) {
|
|
DEBUG_PRINTF("graph is not repeat\n");
|
|
return false;
|
|
}
|
|
DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
|
|
if (!repeat.bounds.max.is_infinite()) {
|
|
DEBUG_PRINTF("not inf\n");
|
|
return false;
|
|
}
|
|
|
|
if (!repeat.reach.all()) {
|
|
DEBUG_PRINTF("non-dot reach\n");
|
|
return false;
|
|
}
|
|
|
|
*bounds = repeat.bounds;
|
|
return true;
|
|
}
|
|
|
|
static
|
|
void transformAnchoredLiteralOverlap(RoseInGraph &ig, RoseBuildData &bd,
|
|
const CompileContext &cc) {
|
|
if (!cc.grey.roseTransformDelay) {
|
|
return;
|
|
}
|
|
|
|
for (const auto &e : edges_range(ig)) {
|
|
const RoseInVertex u = source(e, ig);
|
|
const RoseInVertex v = target(e, ig);
|
|
|
|
if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) {
|
|
continue;
|
|
}
|
|
if (ig[e].haig || !ig[e].graph) {
|
|
continue;
|
|
}
|
|
|
|
if (ig[u].min_offset != ig[u].max_offset) {
|
|
DEBUG_PRINTF("u not fixed depth\n");
|
|
continue;
|
|
}
|
|
|
|
DEBUG_PRINTF("anch_lit->lit edge with graph: '%s' -> '%s'\n",
|
|
escapeString(ig[u].s).c_str(),
|
|
escapeString(ig[v].s).c_str());
|
|
|
|
DepthMinMax bounds;
|
|
if (!transformInfixToAnchBounds(ig, e, cc, &bounds)) {
|
|
continue;
|
|
}
|
|
|
|
DEBUG_PRINTF("setting bounds to %s and deleting graph\n",
|
|
bounds.str().c_str());
|
|
ig[e].graph_lag = 0;
|
|
ig[e].graph.reset();
|
|
ig[e].minBound = bounds.min;
|
|
ig[e].maxBound = bounds.max.is_finite() ? (u32)bounds.max
|
|
: ROSE_BOUND_INF;
|
|
bd.anch_history_edges.insert(e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* \brief Transform small trailing dot repeat suffixes into delay on the last
|
|
* literal.
|
|
*
|
|
* For example, the case /hatstand.*teakettle./s can just delay 'teakettle' +1
|
|
* rather than having a suffix to handle the dot.
|
|
*
|
|
* This transformation looks for literal->accept edges and transforms them if
|
|
* appropriate. It doesn't handle complex cases where the literal has more than
|
|
* one successor.
|
|
*/
|
|
static
|
|
void transformSuffixDelay(RoseInGraph &ig, const CompileContext &cc) {
|
|
if (!cc.grey.roseTransformDelay) {
|
|
return;
|
|
}
|
|
|
|
const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
|
|
: ROSE_BOUND_INF;
|
|
|
|
set<RoseInVertex> modified_accepts; // may be dead after transform
|
|
|
|
for (auto u : vertices_range(ig)) {
|
|
if (ig[u].type != RIV_LITERAL) {
|
|
continue;
|
|
}
|
|
if (out_degree(u, ig) != 1) {
|
|
continue;
|
|
}
|
|
|
|
RoseInEdge e = *out_edges(u, ig).first;
|
|
RoseInVertex v = target(e, ig);
|
|
if (ig[v].type != RIV_ACCEPT) {
|
|
continue;
|
|
}
|
|
if (ig[e].haig) {
|
|
continue;
|
|
}
|
|
if (!ig[e].graph) {
|
|
continue;
|
|
}
|
|
|
|
if (predsAreDelaySensitive(ig, u)) {
|
|
DEBUG_PRINTF("preds are delay sensitive\n");
|
|
continue;
|
|
}
|
|
|
|
DEBUG_PRINTF("lit->accept edge with graph: lit='%s'\n",
|
|
escapeString(ig[u].s).c_str());
|
|
|
|
const NGHolder &h = *ig[e].graph;
|
|
const set<ReportID> reports = all_reports(h);
|
|
if (reports.size() != 1) {
|
|
DEBUG_PRINTF("too many reports\n");
|
|
continue;
|
|
}
|
|
|
|
PureRepeat repeat;
|
|
if (!isPureRepeat(h, repeat)) {
|
|
DEBUG_PRINTF("suffix graph is not repeat\n");
|
|
continue;
|
|
}
|
|
DEBUG_PRINTF("suffix graph is %s repeat\n",
|
|
repeat.bounds.str().c_str());
|
|
|
|
if (!repeat.reach.all()) {
|
|
DEBUG_PRINTF("non-dot reach\n");
|
|
continue;
|
|
}
|
|
|
|
if (repeat.bounds.min != repeat.bounds.max ||
|
|
repeat.bounds.min > depth(MAX_DELAY)) {
|
|
DEBUG_PRINTF("repeat is variable or too large\n");
|
|
continue;
|
|
}
|
|
|
|
u32 max_delay = findMaxSafeDelay(ig, u, v);
|
|
|
|
u32 delay = repeat.bounds.min;
|
|
if (delay > max_delay) {
|
|
DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
|
|
continue;
|
|
}
|
|
|
|
if (delay + ig[u].s.length() - 1 > max_history) {
|
|
DEBUG_PRINTF("delay too large for history\n");
|
|
continue;
|
|
}
|
|
|
|
DEBUG_PRINTF("setting lit delay to %u and removing suffix\n", delay);
|
|
ig[u].delay = delay;
|
|
ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
|
|
ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
|
|
|
|
// Construct a new accept vertex for this report and remove edge e.
|
|
// (This allows us to cope if v has more than one in-edge).
|
|
RoseInVertex v2 =
|
|
add_vertex(RoseInVertexProps::makeAccept(reports), ig);
|
|
add_edge(u, v2, ig);
|
|
remove_edge(e, ig);
|
|
modified_accepts.insert(v);
|
|
}
|
|
|
|
DEBUG_PRINTF("%zu modified accepts\n", modified_accepts.size());
|
|
|
|
for (auto v : modified_accepts) {
|
|
if (in_degree(v, ig) == 0) {
|
|
DEBUG_PRINTF("removing accept vertex with no preds\n");
|
|
remove_vertex(v, ig);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef NDEBUG
|
|
static
|
|
bool validateKinds(const RoseInGraph &g) {
|
|
for (const auto &e : edges_range(g)) {
|
|
if (g[e].graph && g[e].graph->kind != whatRoseIsThis(g, e)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) {
|
|
DEBUG_PRINTF("trying to rose\n");
|
|
assert(validateKinds(ig));
|
|
assert(hasCorrectlyNumberedVertices(ig));
|
|
|
|
if (::ue2::empty(ig)) {
|
|
assert(0);
|
|
return false;
|
|
}
|
|
|
|
const unique_ptr<RoseInGraph> in_ptr = cloneRoseGraph(ig);
|
|
RoseInGraph &in = *in_ptr;
|
|
|
|
RoseBuildData bd(in, false);
|
|
|
|
transformLiteralDelay(in, cc);
|
|
transformAnchoredLiteralOverlap(in, bd, cc);
|
|
transformSuffixDelay(in, cc);
|
|
|
|
renumber_vertices(in);
|
|
assert(validateKinds(in));
|
|
|
|
insertion_ordered_map<NGHolder *, vector<RoseInEdge>> graphs;
|
|
|
|
for (const auto &e : edges_range(in)) {
|
|
if (!in[e].graph) {
|
|
assert(!in[e].dfa);
|
|
assert(!in[e].haig);
|
|
continue; // no graph
|
|
}
|
|
|
|
if (in[e].haig || in[e].dfa) {
|
|
/* Early DFAs/Haigs are always implementable (we've already built
|
|
* the raw DFA). */
|
|
continue;
|
|
}
|
|
|
|
NGHolder *h = in[e].graph.get();
|
|
|
|
assert(isCorrectlyTopped(*h));
|
|
graphs[h].emplace_back(e);
|
|
}
|
|
|
|
vector<RoseInEdge> graph_edges;
|
|
|
|
for (const auto &m : graphs) {
|
|
NGHolder *h = m.first;
|
|
if (!canImplementGraph(*h, prefilter, rm, cc)) {
|
|
return false;
|
|
}
|
|
insert(&graph_edges, graph_edges.end(), m.second);
|
|
}
|
|
|
|
/* we are now past the point of no return. We can start making irreversible
|
|
changes to the rose graph, etc */
|
|
|
|
for (const auto &e : graph_edges) {
|
|
assert(in[e].graph);
|
|
assert(!in[e].haig);
|
|
NGHolder &h = *in[e].graph;
|
|
DEBUG_PRINTF("handling %p\n", &h);
|
|
assert(allMatchStatesHaveReports(h));
|
|
|
|
if (!generates_callbacks(whatRoseIsThis(in, e))
|
|
&& in[target(e, in)].type != RIV_ACCEPT_EOD) {
|
|
set_report(h, getNewNfaReport());
|
|
}
|
|
}
|
|
|
|
populateRoseGraph(this, bd);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool RoseBuildImpl::addSombeRose(const RoseInGraph &ig) {
|
|
DEBUG_PRINTF("rose is trying to consume a sombe\n");
|
|
assert(validateKinds(ig));
|
|
|
|
if (::ue2::empty(ig)) {
|
|
assert(0);
|
|
return false;
|
|
}
|
|
|
|
RoseBuildData bd(ig, true);
|
|
|
|
for (const auto &e : edges_range(ig)) {
|
|
if (!ig[e].graph) {
|
|
continue; // no graph
|
|
}
|
|
DEBUG_PRINTF("handling %p\n", ig[e].graph.get());
|
|
assert(allMatchStatesHaveReports(*ig[e].graph));
|
|
assert(ig[e].haig);
|
|
}
|
|
|
|
populateRoseGraph(this, bd);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
|
|
const ReportManager &rm, const CompileContext &cc) {
|
|
assert(validateKinds(ig));
|
|
|
|
if (::ue2::empty(ig)) {
|
|
assert(0);
|
|
return false;
|
|
}
|
|
|
|
vector<NGHolder *> graphs;
|
|
|
|
for (const auto &e : edges_range(ig)) {
|
|
if (!ig[e].graph) {
|
|
continue; // no graph
|
|
}
|
|
|
|
if (ig[e].haig) {
|
|
// Haigs are always implementable (we've already built the raw DFA).
|
|
continue;
|
|
}
|
|
|
|
graphs.emplace_back(ig[e].graph.get());
|
|
}
|
|
|
|
// cppcheck-suppress useStlAlgorithm
|
|
for (const auto &g : graphs) {
|
|
if (!canImplementGraph(*g, prefilter, rm, cc)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit,
|
|
const flat_set<ReportID> &reports) {
|
|
assert(!reports.empty());
|
|
|
|
if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) &&
|
|
lit.length() > 3) {
|
|
DEBUG_PRINTF("adding as puffette\n");
|
|
const CharReach &cr = *lit.begin();
|
|
for (const auto &report : reports) {
|
|
addOutfix(raw_puff(lit.length(), true, report, cr, true));
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
RoseInGraph ig;
|
|
RoseInVertex start = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
|
|
RoseInVertex accept = add_vertex(
|
|
eod ? RoseInVertexProps::makeAcceptEod(set<ReportID>())
|
|
: RoseInVertexProps::makeAccept(set<ReportID>()), ig);
|
|
RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
|
|
|
|
add_edge(start, v, RoseInEdgeProps(0U, anchored ? 0U : ROSE_BOUND_INF), ig);
|
|
add_edge(v, accept, RoseInEdgeProps(0U, 0U), ig);
|
|
|
|
calcVertexOffsets(ig);
|
|
|
|
ig[accept].reports.insert(reports.begin(), reports.end());
|
|
|
|
addRose(ig, false);
|
|
}
|
|
|
|
static
|
|
u32 findMaxBAWidth(const NGHolder &h) {
|
|
// Must be bi-anchored: no out-edges from startDs (other than its
|
|
// self-loop), no in-edges to accept.
|
|
if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) {
|
|
return ROSE_BOUND_INF;
|
|
}
|
|
depth d = findMaxWidth(h);
|
|
assert(d.is_reachable());
|
|
|
|
if (!d.is_finite()) {
|
|
return ROSE_BOUND_INF;
|
|
}
|
|
return d;
|
|
}
|
|
|
|
static
|
|
void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h,
|
|
const RoseBuildImpl &tbi) {
|
|
outfix.maxBAWidth = findMaxBAWidth(h);
|
|
outfix.minWidth = findMinWidth(h);
|
|
outfix.maxWidth = findMaxWidth(h);
|
|
outfix.maxOffset = findMaxOffset(h, tbi.rm);
|
|
populateReverseAccelerationInfo(outfix.rev_info, h);
|
|
}
|
|
|
|
static
|
|
bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) {
|
|
map<flat_set<ReportID>, ReportID> report_remap;
|
|
shared_ptr<NGHolder> eod_leftfix
|
|
= makeRoseEodPrefix(h, build, report_remap);
|
|
|
|
bool nfa_ok = isImplementableNFA(h, &build.rm, build.cc);
|
|
|
|
/* TODO: check if early dfa is possible */
|
|
|
|
if (!nfa_ok) {
|
|
DEBUG_PRINTF("could not build as NFA\n");
|
|
return false;
|
|
}
|
|
|
|
u32 eod_event = getEodEventID(build);
|
|
|
|
auto &g = build.g;
|
|
for (const auto &report_mapping : report_remap) {
|
|
RoseVertex v = add_vertex(g);
|
|
g[v].literals.insert(eod_event);
|
|
build.literal_info[eod_event].vertices.insert(v);
|
|
|
|
g[v].left.graph = eod_leftfix;
|
|
g[v].left.leftfix_report = report_mapping.second;
|
|
g[v].left.lag = 0;
|
|
RoseEdge e1 = add_edge(build.anchored_root, v, g);
|
|
g[e1].minBound = 0;
|
|
g[e1].maxBound = ROSE_BOUND_INF;
|
|
g[v].min_offset = findMinWidth(*eod_leftfix);
|
|
g[v].max_offset = ROSE_BOUND_INF;
|
|
|
|
depth max_width = findMaxWidth(*g[v].left.graph);
|
|
if (max_width.is_finite() && isPureAnchored(*eod_leftfix)) {
|
|
g[e1].maxBound = max_width;
|
|
g[v].max_offset = max_width;
|
|
}
|
|
|
|
g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix
|
|
RoseVertex w = add_vertex(g);
|
|
g[w].eod_accept = true;
|
|
g[w].reports = report_mapping.first;
|
|
g[w].min_offset = g[v].min_offset;
|
|
g[w].max_offset = g[v].max_offset;
|
|
RoseEdge e = add_edge(v, w, g);
|
|
g[e].minBound = 0;
|
|
g[e].maxBound = 0;
|
|
g[e].history = ROSE_ROLE_HISTORY_NONE;
|
|
DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool RoseBuildImpl::addOutfix(const NGHolder &h) {
|
|
DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h));
|
|
|
|
/* TODO: handle more than one report */
|
|
if (!in_degree(h.accept, h)
|
|
&& all_reports(h).size() == 1
|
|
&& addEodOutfix(*this, h)) {
|
|
return true;
|
|
}
|
|
|
|
const u32 nfa_states = isImplementableNFA(h, &rm, cc);
|
|
if (nfa_states) {
|
|
DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states);
|
|
} else {
|
|
DEBUG_PRINTF("not implementable as an NFA\n");
|
|
}
|
|
|
|
bool dfa_cand = !nfa_states || nfa_states > 128 /* slow model */
|
|
|| can_exhaust(h, rm); /* can be pruned */
|
|
|
|
unique_ptr<raw_dfa> rdfa;
|
|
|
|
if (!nfa_states || cc.grey.roseMcClellanOutfix == 2 ||
|
|
(cc.grey.roseMcClellanOutfix == 1 && dfa_cand)) {
|
|
rdfa = buildMcClellan(h, &rm, cc.grey);
|
|
}
|
|
|
|
if (!nfa_states && !rdfa) {
|
|
DEBUG_PRINTF("could not build as either an NFA or a DFA\n");
|
|
return false;
|
|
}
|
|
|
|
if (rdfa) {
|
|
outfixes.emplace_back(OutfixInfo(std::move(rdfa)));
|
|
} else {
|
|
outfixes.emplace_back(OutfixInfo(cloneHolder(h)));
|
|
}
|
|
|
|
populateOutfixInfo(outfixes.back(), h, *this);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) {
|
|
DEBUG_PRINTF("haig with %zu states\n", haig.states.size());
|
|
|
|
outfixes.emplace_back(OutfixInfo(std::make_unique<raw_som_dfa>(haig)));
|
|
populateOutfixInfo(outfixes.back(), h, *this);
|
|
|
|
return true; /* failure is not yet an option */
|
|
}
|
|
|
|
bool RoseBuildImpl::addOutfix(const raw_puff &rp) {
|
|
if (!mpv_outfix) {
|
|
mpv_outfix = std::make_unique<OutfixInfo>(MpvProto());
|
|
}
|
|
|
|
auto *mpv = mpv_outfix->mpv();
|
|
assert(mpv);
|
|
mpv->puffettes.emplace_back(rp);
|
|
|
|
mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
|
|
mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
|
|
mpv_outfix->maxWidth = rp.unbounded
|
|
? depth::infinity()
|
|
: max(mpv_outfix->maxWidth, depth(rp.repeats));
|
|
|
|
if (mpv_outfix->maxOffset == ROSE_BOUND_INF || rp.unbounded) {
|
|
mpv_outfix->maxOffset = ROSE_BOUND_INF;
|
|
} else {
|
|
mpv_outfix->maxOffset = MAX(mpv_outfix->maxOffset, rp.repeats);
|
|
}
|
|
|
|
return true; /* failure is not yet an option */
|
|
}
|
|
|
|
bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
|
|
u32 *event_out) {
|
|
if (!mpv_outfix) {
|
|
mpv_outfix = std::make_unique<OutfixInfo>(MpvProto());
|
|
}
|
|
|
|
auto *mpv = mpv_outfix->mpv();
|
|
assert(mpv);
|
|
mpv->triggered_puffettes.emplace_back(rp);
|
|
|
|
mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
|
|
mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
|
|
mpv_outfix->maxWidth = rp.unbounded
|
|
? depth::infinity()
|
|
: max(mpv_outfix->maxWidth, depth(rp.repeats));
|
|
|
|
mpv_outfix->maxOffset = ROSE_BOUND_INF; /* TODO: we could get information from
|
|
* the caller */
|
|
|
|
*queue_out = mpv_outfix->get_queue(qif);
|
|
*event_out = MQE_TOP_FIRST + mpv->triggered_puffettes.size() - 1;
|
|
|
|
return true; /* failure is not yet an option */
|
|
}
|
|
|
|
static
|
|
bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
|
|
NFAVertex u,
|
|
const vector<DepthMinMax> &vertexDepths,
|
|
map<u32, DepthMinMax> &depthMap,
|
|
map<NFAVertex, set<u32>> &reportMap,
|
|
map<ReportID, u32> &allocated_reports,
|
|
flat_set<u32> &added_lit_ids) {
|
|
const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion);
|
|
const size_t index = w[u].index;
|
|
assert(index < vertexDepths.size());
|
|
const DepthMinMax &d = vertexDepths.at(index);
|
|
|
|
for (const auto &int_report : w[u].reports) {
|
|
assert(int_report != MO_INVALID_IDX);
|
|
|
|
u32 lit_id;
|
|
if (!contains(allocated_reports, int_report)) {
|
|
lit_id = tbi.getNewLiteralId();
|
|
added_lit_ids.insert(lit_id);
|
|
allocated_reports[int_report] = lit_id;
|
|
} else {
|
|
lit_id = allocated_reports[int_report];
|
|
}
|
|
|
|
reportMap[u].insert(lit_id);
|
|
|
|
if (!contains(depthMap, lit_id)) {
|
|
depthMap[lit_id] = d;
|
|
} else {
|
|
depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
|
|
}
|
|
|
|
if (depthMap[lit_id].max > max_anchored_depth) {
|
|
DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
|
|
depthMap[lit_id].max.str().c_str(),
|
|
tbi.cc.grey.maxAnchoredRegion);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Failure path for addAnchoredAcyclic: removes the literal IDs that have been
|
|
// added to support anchored NFAs. Assumes that they are a contiguous range at
|
|
// the end of the RoseBuildImpl::literal_info vector.
|
|
static
|
|
void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) {
|
|
if (lit_ids.empty()) {
|
|
return;
|
|
}
|
|
|
|
DEBUG_PRINTF("remove last %zu literals\n", lit_ids.size());
|
|
|
|
// lit_ids should be a contiguous range.
|
|
assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1);
|
|
assert(*lit_ids.rbegin() == tbi.literals.size() - 1);
|
|
|
|
assert(all_of_in(lit_ids, [&](u32 lit_id) {
|
|
return lit_id < tbi.literal_info.size() &&
|
|
tbi.literals.at(lit_id).table == ROSE_ANCHORED &&
|
|
tbi.literal_info[lit_id].vertices.empty();
|
|
}));
|
|
|
|
tbi.literals.erase_back(lit_ids.size());
|
|
assert(tbi.literals.size() == *lit_ids.begin());
|
|
|
|
// lit_ids should be at the end of tbi.literal_info.
|
|
assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1);
|
|
tbi.literal_info.resize(*lit_ids.begin()); // remove all ids in lit_ids
|
|
}
|
|
|
|
bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
|
|
auto vertexDepths = calcDepthsFrom(h, h.start);
|
|
|
|
map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */
|
|
map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */
|
|
map<ReportID, u32> allocated_reports; /* report -> literal id */
|
|
flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
|
|
|
|
for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
|
|
// cppcheck-suppress useStlAlgorithm
|
|
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
|
|
reportMap, allocated_reports,
|
|
added_lit_ids)) {
|
|
removeAddedLiterals(*this, added_lit_ids);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
map<ReportID, u32> allocated_reports_eod; /* report -> literal id */
|
|
|
|
for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
|
|
if (v == h.accept) {
|
|
continue;
|
|
}
|
|
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
|
|
reportMap, allocated_reports_eod,
|
|
added_lit_ids)) {
|
|
removeAddedLiterals(*this, added_lit_ids);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
assert(!reportMap.empty());
|
|
|
|
int rv = addAnchoredNFA(*this, h, reportMap);
|
|
if (rv != ANCHORED_FAIL) {
|
|
assert(rv != ANCHORED_REMAP);
|
|
DEBUG_PRINTF("added anchored nfa\n");
|
|
/* add edges to the rose graph to bubble the match up */
|
|
for (const auto &m : allocated_reports) {
|
|
const ReportID &report = m.first;
|
|
const u32 &lit_id = m.second;
|
|
assert(depthMap[lit_id].max.is_finite());
|
|
u32 minBound = depthMap[lit_id].min;
|
|
u32 maxBound = depthMap[lit_id].max;
|
|
RoseVertex v
|
|
= createAnchoredVertex(this, lit_id, minBound, maxBound);
|
|
g[v].reports.insert(report);
|
|
}
|
|
|
|
for (const auto &m : allocated_reports_eod) {
|
|
const ReportID &report = m.first;
|
|
const u32 &lit_id = m.second;
|
|
assert(depthMap[lit_id].max.is_finite());
|
|
u32 minBound = depthMap[lit_id].min;
|
|
u32 maxBound = depthMap[lit_id].max;
|
|
RoseVertex v
|
|
= createAnchoredVertex(this, lit_id, minBound, maxBound);
|
|
RoseVertex eod = add_vertex(g);
|
|
g[eod].eod_accept = true;
|
|
g[eod].reports.insert(report);
|
|
g[eod].min_offset = g[v].min_offset;
|
|
g[eod].max_offset = g[v].max_offset;
|
|
add_edge(v, eod, g);
|
|
}
|
|
|
|
return true;
|
|
} else {
|
|
DEBUG_PRINTF("failed to add anchored nfa\n");
|
|
removeAddedLiterals(*this, added_lit_ids);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
} // namespace ue2
|