mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
3156 lines
102 KiB
C++
3156 lines
102 KiB
C++
/*
|
|
* Copyright (c) 2015-2017, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/**
|
|
* \file
|
|
* \brief SOM ("Start of Match") analysis.
|
|
*/
|
|
|
|
#include "ng_som.h"
|
|
|
|
#include "ng.h"
|
|
#include "ng_dump.h"
|
|
#include "ng_equivalence.h"
|
|
#include "ng_execute.h"
|
|
#include "ng_haig.h"
|
|
#include "ng_limex.h"
|
|
#include "ng_literal_analysis.h"
|
|
#include "ng_prune.h"
|
|
#include "ng_redundancy.h"
|
|
#include "ng_region.h"
|
|
#include "ng_reports.h"
|
|
#include "ng_som_add_redundancy.h"
|
|
#include "ng_som_util.h"
|
|
#include "ng_split.h"
|
|
#include "ng_util.h"
|
|
#include "ng_violet.h"
|
|
#include "ng_width.h"
|
|
#include "grey.h"
|
|
#include "ue2common.h"
|
|
#include "compiler/compiler.h"
|
|
#include "nfa/goughcompile.h"
|
|
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
|
|
#include "parser/position.h"
|
|
#include "som/som.h"
|
|
#include "rose/rose_build.h"
|
|
#include "rose/rose_in_util.h"
|
|
#include "util/alloc.h"
|
|
#include "util/compare.h"
|
|
#include "util/compile_error.h"
|
|
#include "util/container.h"
|
|
#include "util/dump_charclass.h"
|
|
#include "util/graph_range.h"
|
|
|
|
#include <algorithm>
|
|
#include <map>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
using namespace std;
|
|
|
|
namespace ue2 {
|
|
|
|
static const size_t MAX_SOM_PLANS = 10;
|
|
static const size_t MAX_SOMBE_CHAIN_VERTICES = 4000;
|
|
|
|
#define MAX_REV_NFA_PREFIX 80
|
|
|
|
namespace {
|
|
struct som_plan {
|
|
som_plan(const shared_ptr<NGHolder> &p, const CharReach &e, bool i,
|
|
u32 parent_in) : prefix(p), escapes(e), is_reset(i),
|
|
no_implement(false), parent(parent_in) { }
|
|
shared_ptr<NGHolder> prefix;
|
|
CharReach escapes;
|
|
bool is_reset;
|
|
bool no_implement;
|
|
u32 parent; // index of parent plan in the vector.
|
|
|
|
// Reporters: a list of vertices in the graph that must be have their
|
|
// reports updated at implementation time to report this plan's
|
|
// som_loc_out.
|
|
vector<NFAVertex> reporters;
|
|
|
|
// Similar, but these report the som_loc_in.
|
|
vector<NFAVertex> reporters_in;
|
|
};
|
|
}
|
|
|
|
static
|
|
bool regionCanEstablishSom(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const u32 region, const vector<NFAVertex> &r_exits,
|
|
const vector<DepthMinMax> &depths) {
|
|
if (region == regions.at(g.accept) ||
|
|
region == regions.at(g.acceptEod)) {
|
|
DEBUG_PRINTF("accept in region\n");
|
|
return false;
|
|
}
|
|
|
|
DEBUG_PRINTF("region %u\n", region);
|
|
for (UNUSED auto v : r_exits) {
|
|
DEBUG_PRINTF(" exit %zu\n", g[v].index);
|
|
}
|
|
|
|
/* simple if each region exit is at fixed distance from SOM. Note SOM does
|
|
not include virtual starts */
|
|
for (auto v : r_exits) {
|
|
assert(regions.at(v) == region);
|
|
const DepthMinMax &d = depths.at(g[v].index);
|
|
if (d.min != d.max) {
|
|
DEBUG_PRINTF("failing %zu as %s != %s\n", g[v].index,
|
|
d.min.str().c_str(), d.max.str().c_str());
|
|
return false;
|
|
}
|
|
}
|
|
DEBUG_PRINTF("region %u/%zu is good\n", regions.at(r_exits[0]),
|
|
g[r_exits[0]].index);
|
|
|
|
return true;
|
|
}
|
|
|
|
namespace {
|
|
|
|
struct region_info {
|
|
region_info() : optional(false), dag(false) {}
|
|
vector<NFAVertex> enters;
|
|
vector<NFAVertex> exits;
|
|
vector<NFAVertex> full;
|
|
bool optional; /* skip edges around region */
|
|
bool dag; /* completely acyclic */
|
|
};
|
|
|
|
}
|
|
|
|
static
|
|
void buildRegionMapping(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
map<u32, region_info> &info,
|
|
bool include_region_0 = false) {
|
|
for (auto v : vertices_range(g)) {
|
|
u32 region = regions.at(v);
|
|
if (!include_region_0 && (is_any_start(v, g) || region == 0)) {
|
|
continue;
|
|
}
|
|
assert(!region || !is_any_start(v, g));
|
|
|
|
if (is_any_accept(v, g)) {
|
|
continue;
|
|
}
|
|
|
|
if (isRegionEntry(g, v, regions)) {
|
|
info[region].enters.emplace_back(v);
|
|
}
|
|
if (isRegionExit(g, v, regions)) {
|
|
info[region].exits.emplace_back(v);
|
|
}
|
|
info[region].full.emplace_back(v);
|
|
}
|
|
|
|
for (auto &m : info) {
|
|
if (!m.second.enters.empty()
|
|
&& isOptionalRegion(g, m.second.enters.front(), regions)) {
|
|
m.second.optional = true;
|
|
}
|
|
m.second.dag = true; /* will be cleared for cyclic regions later */
|
|
}
|
|
|
|
set<NFAEdge> be;
|
|
BackEdges<set<NFAEdge> > backEdgeVisitor(be);
|
|
boost::depth_first_search(g, visitor(backEdgeVisitor).root_vertex(g.start));
|
|
|
|
for (const auto &e : be) {
|
|
NFAVertex u = source(e, g);
|
|
NFAVertex v = target(e, g);
|
|
if (is_special(u, g) || is_special(v, g)) {
|
|
assert(is_special(u, g) && is_special(v, g));
|
|
continue;
|
|
}
|
|
u32 r = regions.at(v);
|
|
assert(regions.at(u) == r);
|
|
info[r].dag = false;
|
|
}
|
|
|
|
if (include_region_0) {
|
|
info[0].dag = false;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
for (const auto &m : info) {
|
|
u32 r = m.first;
|
|
const region_info &r_i = m.second;
|
|
DEBUG_PRINTF("region %u:%s%s\n", r,
|
|
r_i.dag ? " (dag)" : "",
|
|
r_i.optional ? " (optional)" : "");
|
|
DEBUG_PRINTF(" enters:");
|
|
for (u32 i = 0; i < r_i.enters.size(); i++) {
|
|
printf(" %zu", g[r_i.enters[i]].index);
|
|
}
|
|
printf("\n");
|
|
DEBUG_PRINTF(" exits:");
|
|
for (u32 i = 0; i < r_i.exits.size(); i++) {
|
|
printf(" %zu", g[r_i.exits[i]].index);
|
|
}
|
|
printf("\n");
|
|
DEBUG_PRINTF(" all:");
|
|
for (u32 i = 0; i < r_i.full.size(); i++) {
|
|
printf(" %zu", g[r_i.full[i]].index);
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static
|
|
bool validateXSL(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const u32 region, const CharReach &escapes, u32 *bad_region) {
|
|
/* need to check that the escapes escape all of the graph past region */
|
|
u32 first_bad_region = ~0U;
|
|
for (auto v : vertices_range(g)) {
|
|
u32 v_region = regions.at(v);
|
|
if (!is_special(v, g) && v_region > region &&
|
|
(escapes & g[v].char_reach).any()) {
|
|
DEBUG_PRINTF("problem with escapes for %zu\n", g[v].index);
|
|
first_bad_region = MIN(first_bad_region, v_region);
|
|
}
|
|
}
|
|
|
|
if (first_bad_region != ~0U) {
|
|
*bad_region = first_bad_region;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static
|
|
bool validateEXSL(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const u32 region, const CharReach &escapes,
|
|
const NGHolder &prefix, u32 *bad_region) {
|
|
/* EXSL: To be a valid EXSL with escapes e, we require that all states
|
|
* go dead after /[e][^e]*{subsequent prefix match}/.
|
|
*/
|
|
|
|
/* TODO: this is overly conservative as it allow partial matches from the
|
|
* prefix to be considered even when the tail has processed some [^e] */
|
|
|
|
u32 first_bad_region = ~0U;
|
|
const vector<CharReach> escapes_vec(1, escapes);
|
|
const vector<CharReach> notescapes_vec(1, ~escapes);
|
|
|
|
flat_set<NFAVertex> states;
|
|
/* turn on all states past the prefix */
|
|
DEBUG_PRINTF("region %u is cutover\n", region);
|
|
for (auto v : vertices_range(g)) {
|
|
if (!is_special(v, g) && regions.at(v) > region) {
|
|
states.insert(v);
|
|
}
|
|
}
|
|
|
|
/* process the escapes */
|
|
states = execute_graph(g, escapes_vec, states);
|
|
|
|
/* flood with any number of not escapes */
|
|
flat_set<NFAVertex> prev_states;
|
|
while (prev_states != states) {
|
|
prev_states = states;
|
|
states = execute_graph(g, notescapes_vec, states);
|
|
insert(&states, prev_states);
|
|
}
|
|
|
|
/* find input starts to use for when we are running the prefix through as
|
|
* when the escape character arrives we may be in matching the prefix
|
|
* already */
|
|
flat_set<NFAVertex> prefix_start_states;
|
|
for (auto v : vertices_range(prefix)) {
|
|
if (v != prefix.accept && v != prefix.acceptEod
|
|
/* and as we have already made it past the prefix once */
|
|
&& v != prefix.start) {
|
|
prefix_start_states.insert(v);
|
|
}
|
|
}
|
|
|
|
prefix_start_states =
|
|
execute_graph(prefix, escapes_vec, prefix_start_states);
|
|
|
|
assert(contains(prefix_start_states, prefix.startDs));
|
|
/* see what happens after we feed it the prefix */
|
|
states = execute_graph(g, prefix, prefix_start_states, states);
|
|
|
|
for (auto v : states) {
|
|
assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
|
|
* on */
|
|
DEBUG_PRINTF("state still active\n");
|
|
first_bad_region = MIN(first_bad_region, regions.at(v));
|
|
}
|
|
|
|
if (first_bad_region != ~0U) {
|
|
*bad_region = first_bad_region;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static
|
|
bool isPossibleLock(const NGHolder &g,
|
|
map<u32, region_info>::const_iterator region,
|
|
const map<u32, region_info> &info,
|
|
CharReach *escapes_out) {
|
|
/* TODO: we could also check for self-loops on curr region */
|
|
|
|
/* TODO: some straw-walking logic. lowish priority has we know there can
|
|
* only be optional regions between us and the cyclic */
|
|
|
|
assert(region != info.end());
|
|
map<u32, region_info>::const_iterator next_region = region;
|
|
++next_region;
|
|
if (next_region == info.end()) {
|
|
assert(0); /* odd */
|
|
return false;
|
|
}
|
|
|
|
const region_info &next_info = next_region->second;
|
|
if (next_info.enters.empty()) {
|
|
assert(0); /* odd */
|
|
return false;
|
|
}
|
|
|
|
if (next_info.full.size() == 1 && !next_info.dag) {
|
|
*escapes_out = ~g[next_info.full.front()].char_reach;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static
|
|
unique_ptr<NGHolder>
|
|
makePrefix(const NGHolder &g, const unordered_map<NFAVertex, u32> ®ions,
|
|
const region_info &curr, const region_info &next,
|
|
bool renumber = true) {
|
|
const vector<NFAVertex> &curr_exits = curr.exits;
|
|
const vector<NFAVertex> &next_enters = next.enters;
|
|
|
|
assert(!next_enters.empty());
|
|
assert(!curr_exits.empty());
|
|
|
|
unique_ptr<NGHolder> prefix_ptr = std::make_unique<NGHolder>();
|
|
NGHolder &prefix = *prefix_ptr;
|
|
|
|
deque<NFAVertex> lhs_verts;
|
|
insert(&lhs_verts, lhs_verts.end(), vertices(g));
|
|
|
|
unordered_map<NFAVertex, NFAVertex> lhs_map; // g -> prefix
|
|
fillHolder(&prefix, g, lhs_verts, &lhs_map);
|
|
prefix.kind = NFA_OUTFIX;
|
|
|
|
// We need a reverse mapping to track regions.
|
|
unordered_map<NFAVertex, NFAVertex> rev_map; // prefix -> g
|
|
for (const auto &e : lhs_map) {
|
|
rev_map.emplace(e.second, e.first);
|
|
}
|
|
|
|
clear_in_edges(prefix.accept, prefix);
|
|
clear_in_edges(prefix.acceptEod, prefix);
|
|
add_edge(prefix.accept, prefix.acceptEod, prefix);
|
|
|
|
assert(!next_enters.empty());
|
|
assert(next_enters.front() != NGHolder::null_vertex());
|
|
u32 dead_region = regions.at(next_enters.front());
|
|
DEBUG_PRINTF("curr_region %u, dead_region %u\n",
|
|
regions.at(curr_exits.front()), dead_region);
|
|
for (auto v : inv_adjacent_vertices_range(next_enters.front(), g)) {
|
|
if (regions.at(v) >= dead_region) {
|
|
continue;
|
|
}
|
|
/* add edge to new accepts */
|
|
NFAVertex p_v = lhs_map[v];
|
|
add_edge(p_v, prefix.accept, prefix);
|
|
}
|
|
|
|
assert(in_degree(prefix.accept, prefix) != 0);
|
|
|
|
/* prune everything past the picked region */
|
|
vector<NFAVertex> to_clear;
|
|
assert(contains(lhs_map, curr_exits.front()));
|
|
NFAVertex p_u = lhs_map[curr_exits.front()];
|
|
DEBUG_PRINTF("p_u: %zu\n", prefix[p_u].index);
|
|
for (auto p_v : adjacent_vertices_range(p_u, prefix)) {
|
|
auto v = rev_map.at(p_v);
|
|
if (p_v == prefix.accept || regions.at(v) < dead_region) {
|
|
continue;
|
|
}
|
|
to_clear.emplace_back(p_v);
|
|
}
|
|
|
|
for (auto v : to_clear) {
|
|
DEBUG_PRINTF("clearing in_edges on %zu\n", prefix[v].index);
|
|
clear_in_edges(v, prefix);
|
|
}
|
|
|
|
pruneUseless(prefix, renumber /* sometimes we want no renumber to keep
|
|
depth map valid */);
|
|
|
|
assert(num_vertices(prefix) > N_SPECIALS);
|
|
return prefix_ptr;
|
|
}
|
|
|
|
static
|
|
void replaceTempSomSlot(ReportManager &rm, NGHolder &g, u32 real_slot) {
|
|
const u32 temp_slot = UINT32_MAX;
|
|
/* update the som slot on the prefix report */
|
|
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
|
auto &reports = g[v].reports;
|
|
assert(reports.size() == 1);
|
|
Report ir = rm.getReport(*reports.begin());
|
|
if (ir.onmatch != temp_slot) {
|
|
continue;
|
|
}
|
|
ir.onmatch = real_slot;
|
|
ReportID rep = rm.getInternalId(ir);
|
|
|
|
assert(reports.size() == 1);
|
|
reports.clear();
|
|
reports.insert(rep);
|
|
}
|
|
}
|
|
|
|
static
|
|
void setPrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type,
|
|
u32 som_loc, const vector<DepthMinMax> &depths,
|
|
bool prefix_by_rev) {
|
|
Report ir = makeCallback(0U, 0);
|
|
ir.type = ir_type;
|
|
ir.onmatch = som_loc;
|
|
|
|
/* add report for storing in som location on new accepts */
|
|
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
|
if (prefix_by_rev) {
|
|
ir.somDistance = MO_INVALID_IDX; /* will be populated properly
|
|
* later */
|
|
} else {
|
|
const DepthMinMax &d = depths.at(g[v].index);
|
|
assert(d.min == d.max);
|
|
ir.somDistance = d.max;
|
|
}
|
|
ReportID rep = rm.getInternalId(ir);
|
|
|
|
auto &reports = g[v].reports;
|
|
reports.clear();
|
|
reports.insert(rep);
|
|
}
|
|
}
|
|
|
|
static
|
|
void updatePrefixReports(ReportManager &rm, NGHolder &g, ReportType ir_type) {
|
|
/* update the som action on the prefix report */
|
|
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
|
auto &reports = g[v].reports;
|
|
assert(reports.size() == 1);
|
|
Report ir = rm.getReport(*reports.begin());
|
|
ir.type = ir_type;
|
|
ReportID rep = rm.getInternalId(ir);
|
|
|
|
assert(reports.size() == 1);
|
|
reports.clear();
|
|
reports.insert(rep);
|
|
}
|
|
}
|
|
|
|
static
|
|
void updatePrefixReportsRevNFA(ReportManager &rm, NGHolder &g,
|
|
u32 rev_comp_id) {
|
|
/* update the action on the prefix report, to refer to a reverse nfa,
|
|
* report type is also adjusted. */
|
|
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
|
auto &reports = g[v].reports;
|
|
assert(reports.size() == 1);
|
|
Report ir = rm.getReport(*reports.begin());
|
|
switch (ir.type) {
|
|
case INTERNAL_SOM_LOC_SET:
|
|
ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA;
|
|
break;
|
|
case INTERNAL_SOM_LOC_SET_IF_UNSET:
|
|
ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET;
|
|
break;
|
|
case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
|
|
ir.type = INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE;
|
|
break;
|
|
default:
|
|
assert(0);
|
|
break;
|
|
}
|
|
|
|
ir.revNfaIndex = rev_comp_id;
|
|
ReportID rep = rm.getInternalId(ir);
|
|
|
|
assert(reports.size() == 1);
|
|
reports.clear();
|
|
reports.insert(rep);
|
|
}
|
|
}
|
|
|
|
static
|
|
void setMidfixReports(ReportManager &rm, const som_plan &item,
|
|
const u32 som_slot_in, const u32 som_slot_out) {
|
|
assert(item.prefix);
|
|
NGHolder &g = *item.prefix;
|
|
|
|
Report ir = makeCallback(0U, 0);
|
|
ir.type = item.is_reset ? INTERNAL_SOM_LOC_COPY
|
|
: INTERNAL_SOM_LOC_COPY_IF_WRITABLE;
|
|
ir.onmatch = som_slot_out;
|
|
ir.somDistance = som_slot_in;
|
|
ReportID rep = rm.getInternalId(ir);
|
|
|
|
/* add report for storing in som location on new accepts */
|
|
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
|
auto &reports = g[v].reports;
|
|
reports.clear();
|
|
reports.insert(rep);
|
|
}
|
|
}
|
|
|
|
static
|
|
bool finalRegion(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
NFAVertex v) {
|
|
u32 region = regions.at(v);
|
|
for (auto w : adjacent_vertices_range(v, g)) {
|
|
if (w != g.accept && w != g.acceptEod && regions.at(w) != region) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static
|
|
void replaceExternalReportsWithSomRep(ReportManager &rm, NGHolder &g,
|
|
NFAVertex v, ReportType ir_type,
|
|
u64a param) {
|
|
assert(!g[v].reports.empty());
|
|
|
|
flat_set<ReportID> r_new;
|
|
|
|
for (const ReportID &report_id : g[v].reports) {
|
|
Report ir = rm.getReport(report_id);
|
|
|
|
if (ir.type != EXTERNAL_CALLBACK) {
|
|
/* we must have already done whatever magic we needed to do to this
|
|
* report */
|
|
r_new.insert(report_id);
|
|
continue;
|
|
}
|
|
|
|
ir.type = ir_type;
|
|
ir.somDistance = param;
|
|
ReportID rep = rm.getInternalId(ir);
|
|
|
|
DEBUG_PRINTF("vertex %zu, replacing report %u with %u (type %u)\n",
|
|
g[v].index, report_id, rep, ir_type);
|
|
r_new.insert(rep);
|
|
}
|
|
g[v].reports = r_new;
|
|
}
|
|
|
|
/* updates the reports on all vertices leading to the sink */
|
|
static
|
|
void makeSomRelReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
|
|
const vector<DepthMinMax> &depths) {
|
|
for (auto v : inv_adjacent_vertices_range(sink, g)) {
|
|
if (v == g.accept) {
|
|
continue;
|
|
}
|
|
|
|
const DepthMinMax &d = depths.at(g[v].index);
|
|
assert(d.min == d.max);
|
|
replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
|
|
d.min);
|
|
}
|
|
}
|
|
|
|
/* updates the reports on all the provided vertices */
|
|
static
|
|
void makeSomRelReports(ReportManager &rm, NGHolder &g,
|
|
const vector<NFAVertex> &to_update,
|
|
const vector<DepthMinMax> &depths) {
|
|
for (auto v : to_update) {
|
|
const DepthMinMax &d = depths.at(g[v].index);
|
|
assert(d.min == d.max);
|
|
replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_REL,
|
|
d.min);
|
|
}
|
|
}
|
|
|
|
static
|
|
void makeSomAbsReports(ReportManager &rm, NGHolder &g, NFAVertex sink) {
|
|
for (auto v : inv_adjacent_vertices_range(sink, g)) {
|
|
if (v == g.accept) {
|
|
continue;
|
|
}
|
|
replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_ABS,
|
|
0);
|
|
}
|
|
}
|
|
|
|
static
|
|
void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g, u32 som_loc) {
|
|
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
|
replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
|
|
som_loc);
|
|
}
|
|
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
|
if (v == g.accept) {
|
|
continue;
|
|
}
|
|
replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
|
|
som_loc);
|
|
}
|
|
}
|
|
|
|
static
|
|
void updateReportToUseRecordedSom(ReportManager &rm, NGHolder &g,
|
|
const vector<NFAVertex> &to_update,
|
|
u32 som_loc) {
|
|
for (auto v : to_update) {
|
|
replaceExternalReportsWithSomRep(rm, g, v, EXTERNAL_CALLBACK_SOM_STORED,
|
|
som_loc);
|
|
}
|
|
}
|
|
|
|
static
|
|
bool createEscaper(NG &ng, const NGHolder &prefix, const CharReach &escapes,
|
|
u32 som_loc) {
|
|
ReportManager &rm = ng.rm;
|
|
|
|
/* escaper = /prefix[^escapes]*[escapes]/ */
|
|
DEBUG_PRINTF("creating escaper for %u\n", som_loc);
|
|
NGHolder h;
|
|
cloneHolder(h, prefix);
|
|
assert(h.kind == NFA_OUTFIX);
|
|
|
|
NFAVertex u = add_vertex(h);
|
|
h[u].char_reach = ~escapes;
|
|
|
|
NFAVertex v = add_vertex(h);
|
|
h[v].char_reach = escapes;
|
|
|
|
for (auto w : inv_adjacent_vertices_range(h.accept, h)) {
|
|
add_edge(w, u, h);
|
|
add_edge(w, v, h);
|
|
h[w].reports.clear();
|
|
}
|
|
|
|
clear_in_edges(h.accept, h);
|
|
|
|
add_edge(u, v, h);
|
|
add_edge(u, u, h);
|
|
add_edge(v, h.accept, h);
|
|
|
|
Report ir = makeCallback(0U, 0);
|
|
ir.type = INTERNAL_SOM_LOC_MAKE_WRITABLE;
|
|
ir.onmatch = som_loc;
|
|
h[v].reports.insert(rm.getInternalId(ir));
|
|
return ng.addHolder(h);
|
|
}
|
|
|
|
static
|
|
void fillHolderForLockCheck(NGHolder *out, const NGHolder &g,
|
|
const map<u32, region_info> &info,
|
|
map<u32, region_info>::const_iterator picked) {
|
|
/* NOTE: This is appropriate for firstMatchIsFirst */
|
|
DEBUG_PRINTF("prepping for lock check\n");
|
|
|
|
NGHolder &midfix = *out;
|
|
|
|
map<NFAVertex, NFAVertex> v_map;
|
|
v_map[g.start] = midfix.start;
|
|
v_map[g.startDs] = midfix.startDs;
|
|
|
|
/* include the lock region */
|
|
assert(picked != info.end());
|
|
auto graph_last = next(picked);
|
|
|
|
assert(!graph_last->second.dag);
|
|
assert(graph_last->second.full.size() == 1);
|
|
|
|
for (auto jt = graph_last; ; --jt) {
|
|
DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
|
|
|
|
/* add all vertices in region, create mapping */
|
|
for (auto v : jt->second.full) {
|
|
DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
|
|
if (contains(v_map, v)) {
|
|
continue;
|
|
}
|
|
|
|
/* treat all virtual starts as happening anywhere, so that the
|
|
* virtual start is not counted as part of the SoM */
|
|
if (is_virtual_start(v, g)) {
|
|
v_map[v] = midfix.startDs;
|
|
continue;
|
|
}
|
|
|
|
NFAVertex vnew = add_vertex(g[v], midfix);
|
|
v_map[v] = vnew;
|
|
}
|
|
|
|
/* add edges leaving region verts based on mapping */
|
|
for (auto v : jt->second.full) {
|
|
NFAVertex u = v_map[v];
|
|
for (auto w : adjacent_vertices_range(v, g)) {
|
|
if (w == g.accept || w == g.acceptEod) {
|
|
add_edge_if_not_present(u, midfix.accept, midfix);
|
|
continue;
|
|
}
|
|
if (!contains(v_map, w)) {
|
|
add_edge_if_not_present(u, midfix.accept, midfix);
|
|
} else {
|
|
add_edge_if_not_present(u, v_map[w], midfix);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (jt == info.begin()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* add edges from startds to the enters of all the initial optional
|
|
* regions and the first mandatory region. */
|
|
for (auto jt = info.begin(); ; ++jt) {
|
|
for (auto enter : jt->second.enters) {
|
|
assert(contains(v_map, enter));
|
|
NFAVertex v = v_map[enter];
|
|
add_edge_if_not_present(midfix.startDs, v, midfix);
|
|
}
|
|
|
|
if (!jt->second.optional) {
|
|
break;
|
|
}
|
|
|
|
if (jt == graph_last) {
|
|
/* all regions are optional - add a direct edge to accept */
|
|
add_edge_if_not_present(midfix.startDs, midfix.accept, midfix);
|
|
break;
|
|
}
|
|
}
|
|
|
|
assert(in_degree(midfix.accept, midfix));
|
|
renumber_vertices(midfix);
|
|
}
|
|
|
|
static
|
|
void fillRoughMidfix(NGHolder *out, const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const map<u32, region_info> &info,
|
|
map<u32, region_info>::const_iterator picked) {
|
|
/* as we are not the first prefix, we are probably not acyclic. We need to
|
|
* generate an acyclic holder to acts a fake prefix to sentClearsTail.
|
|
* This will result in a more conservative estimate. */
|
|
/* NOTE: This is not appropriate for firstMatchIsFirst */
|
|
NGHolder &midfix = *out;
|
|
add_edge(midfix.startDs, midfix.accept, midfix);
|
|
|
|
map<NFAVertex, NFAVertex> v_map;
|
|
|
|
map<u32, region_info>::const_iterator jt = picked;
|
|
for (; jt->second.dag; --jt) {
|
|
DEBUG_PRINTF("adding r %u to midfix\n", jt->first);
|
|
if (!jt->second.optional) {
|
|
clear_out_edges(midfix.startDs, midfix);
|
|
add_edge(midfix.startDs, midfix.startDs, midfix);
|
|
}
|
|
|
|
/* add all vertices in region, create mapping */
|
|
for (auto v : jt->second.full) {
|
|
DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
|
|
NFAVertex vnew = add_vertex(g[v], midfix);
|
|
v_map[v] = vnew;
|
|
}
|
|
|
|
/* add edges leaving region verts based on mapping */
|
|
for (auto v : jt->second.full) {
|
|
NFAVertex u = v_map[v];
|
|
for (auto w : adjacent_vertices_range(v, g)) {
|
|
if (w == g.accept || w == g.acceptEod) {
|
|
continue;
|
|
}
|
|
if (!contains(v_map, w)) {
|
|
add_edge_if_not_present(u, midfix.accept, midfix);
|
|
} else {
|
|
add_edge_if_not_present(u, v_map[w], midfix);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* add edges from startds to enters */
|
|
for (auto enter : jt->second.enters) {
|
|
assert(contains(v_map, enter));
|
|
NFAVertex v = v_map[enter];
|
|
add_edge(midfix.startDs, v, midfix);
|
|
}
|
|
|
|
if (jt == info.begin()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* we can include the exits of the regions leading in */
|
|
if (!jt->second.dag) {
|
|
u32 first_early_region = jt->first;
|
|
clear_out_edges(midfix.startDs, midfix);
|
|
add_edge(midfix.startDs, midfix.startDs, midfix);
|
|
|
|
do {
|
|
for (auto v : jt->second.exits) {
|
|
DEBUG_PRINTF("adding v %zu to midfix\n", g[v].index);
|
|
NFAVertex vnew = add_vertex(g[v], midfix);
|
|
v_map[v] = vnew;
|
|
|
|
/* add edges from startds to new vertices */
|
|
add_edge(midfix.startDs, vnew, midfix);
|
|
}
|
|
|
|
/* add edges leaving region verts based on mapping */
|
|
for (auto v : jt->second.exits) {
|
|
NFAVertex u = v_map[v];
|
|
for (auto w : adjacent_vertices_range(v, g)) {
|
|
if (w == g.accept || w == g.acceptEod
|
|
|| regions.at(w) <= first_early_region) {
|
|
continue;
|
|
}
|
|
if (!contains(v_map, w)) {
|
|
add_edge_if_not_present(u, midfix.accept, midfix);
|
|
} else {
|
|
add_edge_if_not_present(u, v_map[w], midfix);
|
|
}
|
|
}
|
|
}
|
|
} while (jt->second.optional && jt != info.begin() && (jt--)->first);
|
|
|
|
if (jt->second.optional) {
|
|
assert(!jt->second.exits.empty());
|
|
NFAVertex v = v_map[jt->second.exits.front()];
|
|
for (auto w : adjacent_vertices_range(v, midfix)) {
|
|
add_edge(midfix.startDs, w, midfix);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
bool beginsWithDotStar(const NGHolder &g) {
|
|
bool hasDot = false;
|
|
|
|
// We can ignore the successors of start, as matches that begin there will
|
|
// necessarily have a SOM of 0.
|
|
|
|
set<NFAVertex> succ;
|
|
insert(&succ, adjacent_vertices(g.startDs, g));
|
|
succ.erase(g.startDs);
|
|
|
|
for (auto v : succ) {
|
|
// We want 'dot' states that aren't virtual starts.
|
|
if (g[v].char_reach.all() &&
|
|
!g[v].assert_flags) {
|
|
hasDot = true;
|
|
set<NFAVertex> dotsucc;
|
|
insert(&dotsucc, adjacent_vertices(v, g));
|
|
if (dotsucc != succ) {
|
|
DEBUG_PRINTF("failed dot-star succ check\n");
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (hasDot) {
|
|
DEBUG_PRINTF("begins with dot-star\n");
|
|
}
|
|
return hasDot;
|
|
}
|
|
|
|
static
|
|
bool buildMidfix(NG &ng, const som_plan &item, const u32 som_slot_in,
|
|
const u32 som_slot_out) {
|
|
assert(item.prefix);
|
|
assert(hasCorrectlyNumberedVertices(*item.prefix));
|
|
|
|
/* setup escaper for second som_location if required */
|
|
if (item.escapes.any()) {
|
|
if (!createEscaper(ng, *item.prefix, item.escapes, som_slot_out)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/* ensure we copy som from prev loc */
|
|
setMidfixReports(ng.rm, item, som_slot_in, som_slot_out);
|
|
|
|
/* add second prefix/1st midfix */
|
|
if (!ng.addHolder(*item.prefix)) {
|
|
DEBUG_PRINTF("---addHolder failed---\n");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static
|
|
bool isMandRegionBetween(map<u32, region_info>::const_iterator a,
|
|
map<u32, region_info>::const_iterator b) {
|
|
while (b != a) {
|
|
if (!b->second.optional) {
|
|
return true;
|
|
}
|
|
--b;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// Attempts to advance the current plan. Returns true if we advance to the end
|
|
// (woot!); updates picked, plan and bad_region.
|
|
static
|
|
bool advancePlan(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const NGHolder &prefix, bool stuck,
|
|
map<u32, region_info>::const_iterator &picked,
|
|
const map<u32, region_info>::const_iterator furthest,
|
|
const map<u32, region_info>::const_iterator furthest_lock,
|
|
const CharReach &next_escapes, som_plan &plan,
|
|
u32 *bad_region) {
|
|
u32 bad_region_r = 0;
|
|
u32 bad_region_x = 0;
|
|
u32 bad_region_e = 0;
|
|
DEBUG_PRINTF("curr %u\n", picked->first);
|
|
|
|
if (sentClearsTail(g, regions, prefix, furthest->first, &bad_region_r)) {
|
|
plan.is_reset = true;
|
|
picked = furthest;
|
|
DEBUG_PRINTF("Prefix clears tail, woot!\n");
|
|
return true;
|
|
} else {
|
|
DEBUG_PRINTF("Reset failed, first bad region %u\n", bad_region_r);
|
|
}
|
|
|
|
if (stuck) {
|
|
u32 to_region = furthest_lock->first;
|
|
if (validateXSL(g, regions, to_region, next_escapes, &bad_region_x)) {
|
|
DEBUG_PRINTF("XSL\n");
|
|
picked = furthest_lock;
|
|
plan.escapes = next_escapes;
|
|
return true;
|
|
} else {
|
|
DEBUG_PRINTF("XSL failed, first bad region %u\n", bad_region_x);
|
|
}
|
|
|
|
if (validateEXSL(g, regions, to_region, next_escapes, prefix,
|
|
&bad_region_e)) {
|
|
DEBUG_PRINTF("EXSL\n");
|
|
picked = furthest_lock;
|
|
plan.escapes = next_escapes;
|
|
return true;
|
|
} else {
|
|
DEBUG_PRINTF("EXSL failed, first bad region %u\n", bad_region_e);
|
|
}
|
|
} else {
|
|
DEBUG_PRINTF("!stuck, skipped XSL and EXSL\n");
|
|
}
|
|
|
|
assert(!plan.is_reset);
|
|
|
|
*bad_region = max(bad_region_x, bad_region_e);
|
|
if (bad_region_r >= *bad_region) {
|
|
*bad_region = bad_region_r;
|
|
plan.is_reset = true;
|
|
plan.escapes.clear();
|
|
picked = furthest;
|
|
} else {
|
|
picked = furthest_lock;
|
|
plan.escapes = next_escapes;
|
|
}
|
|
|
|
DEBUG_PRINTF("first bad region now %u\n", *bad_region);
|
|
return false;
|
|
}
|
|
|
|
static
|
|
bool addPlan(vector<som_plan> &plan, u32 parent) {
|
|
DEBUG_PRINTF("adding plan %zu with parent %u\n", plan.size(),
|
|
parent);
|
|
|
|
if (plan.size() >= MAX_SOM_PLANS) {
|
|
DEBUG_PRINTF("too many plans!\n");
|
|
return false;
|
|
}
|
|
|
|
plan.emplace_back(nullptr, CharReach(), false, parent);
|
|
return true;
|
|
}
|
|
|
|
// Fetches all preds of {accept, acceptEod} for this graph.
|
|
static
|
|
void addReporterVertices(const NGHolder &g, vector<NFAVertex> &reporters) {
|
|
set<NFAVertex> tmp;
|
|
insert(&tmp, inv_adjacent_vertices(g.accept, g));
|
|
insert(&tmp, inv_adjacent_vertices(g.acceptEod, g));
|
|
tmp.erase(g.accept);
|
|
|
|
#ifdef DEBUG
|
|
DEBUG_PRINTF("add reporters:");
|
|
for (UNUSED auto v : tmp) {
|
|
printf(" %zu", g[v].index);
|
|
}
|
|
printf("\n");
|
|
#endif
|
|
|
|
reporters.insert(reporters.end(), tmp.begin(), tmp.end());
|
|
}
|
|
|
|
// Fetches all preds of {accept, acceptEod} in this region.
|
|
static
|
|
void addReporterVertices(const region_info &r, const NGHolder &g,
|
|
vector<NFAVertex> &reporters) {
|
|
for (auto v : r.exits) {
|
|
if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
|
|
DEBUG_PRINTF("add reporter %zu\n", g[v].index);
|
|
reporters.emplace_back(v);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fetches the mappings of all preds of {accept, acceptEod} in this region.
|
|
static
|
|
void addMappedReporterVertices(const region_info &r, const NGHolder &g,
|
|
const unordered_map<NFAVertex, NFAVertex> &mapping,
|
|
vector<NFAVertex> &reporters) {
|
|
for (auto v : r.exits) {
|
|
if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
|
|
DEBUG_PRINTF("adding v=%zu\n", g[v].index);
|
|
auto it = mapping.find(v);
|
|
assert(it != mapping.end());
|
|
reporters.emplace_back(it->second);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Clone a version of the graph, but only including the in-edges of `enter'
|
|
// from earlier regions.
|
|
static
|
|
void cloneGraphWithOneEntry(NGHolder &out, const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
NFAVertex entry, const vector<NFAVertex> &enters,
|
|
unordered_map<NFAVertex, NFAVertex> &orig_to_copy) {
|
|
orig_to_copy.clear();
|
|
cloneHolder(out, g, &orig_to_copy);
|
|
|
|
assert(contains(orig_to_copy, entry));
|
|
const u32 region = regions.at(entry);
|
|
|
|
for (auto v : enters) {
|
|
if (v == entry) {
|
|
continue;
|
|
}
|
|
assert(contains(orig_to_copy, v));
|
|
|
|
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
|
if (regions.at(u) < region) {
|
|
assert(edge(orig_to_copy[u], orig_to_copy[v], out).second);
|
|
remove_edge(orig_to_copy[u], orig_to_copy[v], out);
|
|
}
|
|
}
|
|
}
|
|
|
|
pruneUseless(out);
|
|
}
|
|
|
|
static
|
|
void expandGraph(NGHolder &g, unordered_map<NFAVertex, u32> ®ions,
|
|
vector<NFAVertex> &enters) {
|
|
assert(!enters.empty());
|
|
const u32 split_region = regions.at(enters.front());
|
|
|
|
vector<NFAVertex> new_enters;
|
|
|
|
// Gather the list of vertices in the split region and subsequent regions.
|
|
vector<NFAVertex> tail_vertices;
|
|
for (auto v : vertices_range(g)) {
|
|
if (is_special(v, g) || regions.at(v) < split_region) {
|
|
continue;
|
|
}
|
|
tail_vertices.emplace_back(v);
|
|
}
|
|
|
|
for (auto enter : enters) {
|
|
DEBUG_PRINTF("processing enter %zu\n", g[enter].index);
|
|
map<NFAVertex, NFAVertex> orig_to_copy;
|
|
|
|
// Make a copy of all of the tail vertices, storing region info along
|
|
// the way.
|
|
for (auto v : tail_vertices) {
|
|
auto v2 = clone_vertex(g, v);
|
|
orig_to_copy[v] = v2;
|
|
regions[v2] = regions.at(v);
|
|
}
|
|
|
|
// Wire up the edges: edges from previous regions come from the
|
|
// original vertices, while edges internal to and beyond the split
|
|
// region go to the copies.
|
|
|
|
for (const auto &m : orig_to_copy) {
|
|
NFAVertex v = m.first, v2 = m.second;
|
|
|
|
for (const auto &e : out_edges_range(v, g)) {
|
|
NFAVertex t = target(e, g);
|
|
u32 t_region = regions.at(t);
|
|
if (t_region >= split_region && !is_special(t, g)) {
|
|
assert(contains(orig_to_copy, t));
|
|
t = orig_to_copy[t];
|
|
}
|
|
add_edge_if_not_present(v2, t, g[e], g);
|
|
}
|
|
|
|
for (const auto &e : in_edges_range(v, g)) {
|
|
NFAVertex u = source(e, g);
|
|
if (regions.at(u) >= split_region && !is_special(u, g)) {
|
|
assert(contains(orig_to_copy, u));
|
|
u = orig_to_copy[u];
|
|
}
|
|
add_edge_if_not_present(u, v2, g[e], g);
|
|
}
|
|
|
|
}
|
|
|
|
// Clear the in-edges from earlier regions of the OTHER enters for this
|
|
// copy of the split region.
|
|
for (auto v : enters) {
|
|
if (v == enter) {
|
|
continue;
|
|
}
|
|
|
|
remove_in_edge_if(orig_to_copy[v],
|
|
[&](const NFAEdge &e) {
|
|
NFAVertex u = source(e, g);
|
|
return regions.at(u) < split_region;
|
|
}, g);
|
|
}
|
|
|
|
new_enters.emplace_back(orig_to_copy[enter]);
|
|
}
|
|
|
|
// Remove the original set of tail vertices.
|
|
remove_vertices(tail_vertices, g);
|
|
pruneUseless(g);
|
|
regions = assignRegions(g);
|
|
|
|
enters.swap(new_enters);
|
|
}
|
|
|
|
static
|
|
bool doTreePlanningIntl(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const map<u32, region_info> &info,
|
|
map<u32, region_info>::const_iterator picked, u32 bad_region,
|
|
u32 parent_plan,
|
|
const unordered_map<NFAVertex, NFAVertex> ©_to_orig,
|
|
vector<som_plan> &plan, const Grey &grey) {
|
|
assert(picked != info.end());
|
|
|
|
DEBUG_PRINTF("picked=%u\n", picked->first);
|
|
DEBUG_PRINTF("parent is %u\n", parent_plan);
|
|
|
|
map<u32, region_info>::const_iterator furthest;
|
|
|
|
bool to_end = false;
|
|
while (!to_end) {
|
|
DEBUG_PRINTF("picked is %u\n", picked->first);
|
|
DEBUG_PRINTF("first bad region now %u\n", bad_region);
|
|
|
|
furthest = info.find(bad_region); /* first bad */
|
|
if (furthest == info.end()) {
|
|
DEBUG_PRINTF("no partition\n");
|
|
return false;
|
|
}
|
|
--furthest; /* last region we can establish som for */
|
|
|
|
if (furthest->first <= picked->first) {
|
|
DEBUG_PRINTF("failed to make any progress\n");
|
|
return false;
|
|
}
|
|
|
|
map<u32, region_info>::const_iterator furthest_lock = furthest;
|
|
CharReach next_escapes;
|
|
bool lock_found;
|
|
/* The last possible lock in the range that we examine should be the
|
|
* best. If the previous plan is a lock, this follow as any early lock
|
|
* must have a reach that is a subset of the last plan's lock. If the
|
|
* last plan is a resetting plan ..., ?is this true? */
|
|
do {
|
|
lock_found = isPossibleLock(g, furthest_lock, info,
|
|
&next_escapes);
|
|
} while (!lock_found && (--furthest_lock)->first > picked->first);
|
|
DEBUG_PRINTF("lock possible? %d\n", (int)lock_found);
|
|
|
|
if (lock_found && !isMandRegionBetween(picked, furthest_lock)) {
|
|
lock_found = false;
|
|
}
|
|
|
|
if (!isMandRegionBetween(picked, furthest)) {
|
|
return false;
|
|
}
|
|
|
|
/* There is no certainty that the som at a reset location will always
|
|
* go forward */
|
|
if (plan[parent_plan].is_reset && lock_found) {
|
|
NGHolder midfix;
|
|
DEBUG_PRINTF("checking if midfix is suitable for lock\n");
|
|
fillHolderForLockCheck(&midfix, g, info, furthest_lock);
|
|
|
|
if (!firstMatchIsFirst(midfix)) {
|
|
DEBUG_PRINTF("not stuck\n");
|
|
lock_found = false;
|
|
}
|
|
}
|
|
|
|
if (!addPlan(plan, parent_plan)) {
|
|
return false;
|
|
}
|
|
|
|
to_end = false;
|
|
|
|
if (lock_found && next_escapes.none()) {
|
|
picked = furthest_lock;
|
|
to_end = true;
|
|
}
|
|
|
|
if (!to_end) {
|
|
NGHolder conservative_midfix; /* for use in reset, exsl analysis */
|
|
fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
|
|
dumpHolder(conservative_midfix, 15, "som_pathmidfix", grey);
|
|
|
|
u32 old_bad_region = bad_region;
|
|
to_end = advancePlan(g, regions, conservative_midfix, lock_found,
|
|
picked, furthest, furthest_lock, next_escapes,
|
|
plan.back(), &bad_region);
|
|
if (!to_end
|
|
&& bad_region <= old_bad_region) { /* we failed to progress */
|
|
DEBUG_PRINTF("failed to make any progress\n");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/* handle direct edge to accepts from region */
|
|
if (edge(furthest->second.exits.front(), g.accept, g).second
|
|
|| edge(furthest->second.exits.front(), g.acceptEod, g).second) {
|
|
map<u32, region_info>::const_iterator it = furthest;
|
|
do {
|
|
addMappedReporterVertices(it->second, g, copy_to_orig,
|
|
plan.back().reporters_in);
|
|
} while (it != info.begin() && it->second.optional && (it--)->first);
|
|
}
|
|
|
|
/* create second prefix */
|
|
plan.back().prefix = makePrefix(g, regions, furthest->second,
|
|
next(furthest)->second);
|
|
parent_plan = plan.size() - 1;
|
|
}
|
|
|
|
// The last region contributes reporters. If it's optional, the regions
|
|
// before it do as well.
|
|
map<u32, region_info>::const_reverse_iterator it = info.rbegin();
|
|
do {
|
|
DEBUG_PRINTF("add mapped reporters for region %u\n", it->first);
|
|
addMappedReporterVertices(it->second, g, copy_to_orig,
|
|
plan.back().reporters);
|
|
} while (it != info.rend() && it->second.optional &&
|
|
(++it)->first > furthest->first);
|
|
|
|
return true;
|
|
}
|
|
|
|
static
|
|
bool doTreePlanning(NGHolder &g,
|
|
map<u32, region_info>::const_iterator presplit,
|
|
map<u32, region_info>::const_iterator picked,
|
|
vector<som_plan> &plan, const Grey &grey) {
|
|
DEBUG_PRINTF("picked is %u\n", picked->first);
|
|
DEBUG_PRINTF("presplit is %u\n", presplit->first);
|
|
|
|
map<u32, region_info>::const_iterator splitter = next(presplit);
|
|
vector<NFAVertex> enters = splitter->second.enters; // mutable copy
|
|
DEBUG_PRINTF("problem region has %zu entry vertices\n", enters.size());
|
|
|
|
if (enters.size() <= 1) {
|
|
// TODO: Splitting a region with one entry won't get us anywhere, but
|
|
// it shouldn't create buggy analyses either. See UE-1892.
|
|
DEBUG_PRINTF("nothing to split\n");
|
|
return false;
|
|
}
|
|
|
|
if (plan.size() + enters.size() > MAX_SOM_PLANS) {
|
|
DEBUG_PRINTF("splitting this tree would hit the plan limit.\n");
|
|
return false;
|
|
}
|
|
|
|
assert(!plan.empty());
|
|
const u32 parent_plan = plan.size() - 1;
|
|
|
|
// Make a copy of the graph, with the subgraph under each enter vertex
|
|
// duplicated without the edges into the other enter vertices.
|
|
// NOTE WELL: this will invalidate 'info' from the split point, but it's
|
|
// OK... we don't use it after this.
|
|
auto g_regions = assignRegions(g);
|
|
expandGraph(g, g_regions, enters);
|
|
dumpHolder(g, g_regions, 14, "som_expandedtree", grey);
|
|
|
|
for (auto v : enters) {
|
|
DEBUG_PRINTF("enter %zu\n", g[v].index);
|
|
|
|
// For this entry vertex, construct a version of the graph without the
|
|
// other entries in this region (g_path), and calculate its depths and
|
|
// regions.
|
|
|
|
NGHolder g_path;
|
|
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
|
|
cloneGraphWithOneEntry(g_path, g, g_regions, v, enters, orig_to_copy);
|
|
auto regions = assignRegions(g_path);
|
|
dumpHolder(g_path, regions, 14, "som_treepath", grey);
|
|
|
|
map<u32, region_info> path_info;
|
|
buildRegionMapping(g_path, regions, path_info);
|
|
|
|
// Translate 'picked' to the corresponding region iterator over the
|
|
// g_path graph. we can't trust the numbering, so we use a vertex
|
|
// instead.
|
|
NFAVertex picked_v = picked->second.enters.front();
|
|
assert(contains(orig_to_copy, picked_v));
|
|
u32 picked_region = regions.at(orig_to_copy[picked_v]);
|
|
map<u32, region_info>::const_iterator path_pick =
|
|
path_info.find(picked_region);
|
|
if (path_pick == path_info.end()) {
|
|
assert(0); // odd
|
|
return false;
|
|
}
|
|
|
|
// Similarly, find our bad_region.
|
|
assert(contains(orig_to_copy, v));
|
|
u32 bad_region = regions.at(orig_to_copy[v]);
|
|
|
|
// It's possible that the region may have grown to include its
|
|
// successors, in which case we (currently) run screaming. Just
|
|
// checking the size should be sufficient here.
|
|
if (picked->second.full.size() != path_pick->second.full.size()) {
|
|
DEBUG_PRINTF("picked region has grown, bailing\n");
|
|
return false;
|
|
}
|
|
|
|
// Construct reverse mapping from vertices in g_path to g.
|
|
unordered_map<NFAVertex, NFAVertex> copy_to_orig;
|
|
for (const auto &m : orig_to_copy) {
|
|
copy_to_orig.insert(make_pair(m.second, m.first));
|
|
}
|
|
|
|
bool to_end = doTreePlanningIntl(g_path, regions, path_info, path_pick,
|
|
bad_region, parent_plan,
|
|
copy_to_orig, plan, grey);
|
|
if (!to_end) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
enum dsp_behaviour {
|
|
ALLOW_MODIFY_HOLDER,
|
|
DISALLOW_MODIFY_HOLDER /* say no to tree planning */
|
|
};
|
|
|
|
static
|
|
bool doSomPlanning(NGHolder &g, bool stuck_in,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const map<u32, region_info> &info,
|
|
map<u32, region_info>::const_iterator picked,
|
|
vector<som_plan> &plan,
|
|
const Grey &grey,
|
|
dsp_behaviour behaviour = ALLOW_MODIFY_HOLDER) {
|
|
DEBUG_PRINTF("in picked is %u\n", picked->first);
|
|
|
|
/* Need to verify how far the lock covers */
|
|
u32 bad_region;
|
|
NGHolder *ap_pref = plan.back().prefix.get();
|
|
NGHolder ap_temp;
|
|
if (hasBigCycles(*ap_pref)) {
|
|
fillRoughMidfix(&ap_temp, g, regions, info, picked);
|
|
ap_pref = &ap_temp;
|
|
}
|
|
|
|
bool to_end = advancePlan(g, regions, *ap_pref, stuck_in, picked,
|
|
picked, picked, plan.back().escapes,
|
|
plan.back(), &bad_region);
|
|
|
|
if (to_end) {
|
|
DEBUG_PRINTF("advanced through the whole graph in one go!\n");
|
|
addReporterVertices(g, plan.back().reporters);
|
|
return true;
|
|
}
|
|
|
|
map<u32, region_info>::const_iterator prev_furthest = picked;
|
|
map<u32, region_info>::const_iterator furthest;
|
|
|
|
furthest = info.find(bad_region); /* first bad */
|
|
if (furthest == info.begin() || furthest == info.end()) {
|
|
DEBUG_PRINTF("no partition\n");
|
|
return false;
|
|
}
|
|
--furthest; /* last region we can establish som for */
|
|
|
|
if (furthest->first <= picked->first) {
|
|
do_tree:
|
|
/* unable to establish SoM past the last picked region */
|
|
if (behaviour == DISALLOW_MODIFY_HOLDER) {
|
|
/* tree planning mutates the graph */
|
|
return false;
|
|
}
|
|
|
|
DEBUG_PRINTF("failed to make any progress\n");
|
|
assert(!plan.empty());
|
|
if (plan.size() == 1) {
|
|
DEBUG_PRINTF("not handling initial alternations yet\n");
|
|
return false;
|
|
}
|
|
plan.pop_back();
|
|
return doTreePlanning(g, furthest, prev_furthest, plan, grey);
|
|
}
|
|
|
|
furthest = picked;
|
|
while (!to_end) {
|
|
prev_furthest = furthest;
|
|
|
|
DEBUG_PRINTF("prev further is %u\n", prev_furthest->first);
|
|
DEBUG_PRINTF("first bad region now %u\n", bad_region);
|
|
|
|
furthest = info.find(bad_region); /* first bad */
|
|
if (furthest == info.begin() || furthest == info.end()) {
|
|
DEBUG_PRINTF("no partition\n");
|
|
return false;
|
|
}
|
|
--furthest; /* last region we can establish som for */
|
|
|
|
map<u32, region_info>::const_iterator furthest_lock = furthest;
|
|
CharReach next_escapes;
|
|
bool stuck;
|
|
do {
|
|
stuck = isPossibleLock(g, furthest_lock, info, &next_escapes);
|
|
} while (!stuck && (--furthest_lock)->first > prev_furthest->first);
|
|
DEBUG_PRINTF("lock possible? %d\n", (int)stuck);
|
|
DEBUG_PRINTF("furthest_lock=%u\n", furthest_lock->first);
|
|
|
|
if (stuck && !isMandRegionBetween(prev_furthest, furthest_lock)) {
|
|
stuck = false;
|
|
}
|
|
|
|
if (!isMandRegionBetween(prev_furthest, furthest)) {
|
|
DEBUG_PRINTF("no mand region between %u and %u\n",
|
|
prev_furthest->first, furthest->first);
|
|
return false;
|
|
}
|
|
|
|
/* There is no certainty that the som at a reset location will always
|
|
* go forward */
|
|
if (plan.back().is_reset && stuck) {
|
|
NGHolder midfix;
|
|
fillHolderForLockCheck(&midfix, g, info, furthest_lock);
|
|
|
|
DEBUG_PRINTF("checking if midfix is suitable for lock\n");
|
|
if (!firstMatchIsFirst(midfix)) {
|
|
DEBUG_PRINTF("not stuck\n");
|
|
stuck = false;
|
|
}
|
|
}
|
|
|
|
assert(!plan.empty());
|
|
if (!addPlan(plan, plan.size() - 1)) {
|
|
return false;
|
|
}
|
|
|
|
to_end = false;
|
|
|
|
if (stuck && next_escapes.none()) {
|
|
picked = furthest_lock;
|
|
to_end = true;
|
|
}
|
|
|
|
if (!to_end) {
|
|
NGHolder conservative_midfix; /* for use in reset, exsl analysis */
|
|
fillRoughMidfix(&conservative_midfix, g, regions, info, furthest);
|
|
|
|
u32 old_bad_region = bad_region;
|
|
to_end = advancePlan(g, regions, conservative_midfix, stuck, picked,
|
|
furthest, furthest_lock, next_escapes,
|
|
plan.back(), &bad_region);
|
|
|
|
if (!to_end
|
|
&& bad_region <= old_bad_region) { /* we failed to progress */
|
|
goto do_tree;
|
|
}
|
|
}
|
|
|
|
/* handle direct edge to accepts from region */
|
|
if (edge(furthest->second.exits.front(), g.accept, g).second
|
|
|| edge(furthest->second.exits.front(), g.acceptEod, g).second) {
|
|
map<u32, region_info>::const_iterator it = furthest;
|
|
do {
|
|
DEBUG_PRINTF("direct edge to accept from region %u\n",
|
|
it->first);
|
|
addReporterVertices(it->second, g, plan.back().reporters_in);
|
|
} while (it != info.begin() && it->second.optional
|
|
&& (it--)->first);
|
|
}
|
|
|
|
/* create second prefix */
|
|
plan.back().prefix = makePrefix(g, regions, furthest->second,
|
|
next(furthest)->second);
|
|
}
|
|
DEBUG_PRINTF("(final) picked is %u\n", picked->first);
|
|
|
|
// The last region contributes reporters. If it's optional, the regions
|
|
// before it do as well.
|
|
map<u32, region_info>::const_reverse_iterator it = info.rbegin();
|
|
do {
|
|
DEBUG_PRINTF("region %u contributes reporters to last plan\n",
|
|
it->first);
|
|
addReporterVertices(it->second, g, plan.back().reporters);
|
|
} while (it != info.rend() && it->second.optional &&
|
|
(++it)->first > furthest->first);
|
|
|
|
DEBUG_PRINTF("done!\n");
|
|
return true;
|
|
}
|
|
|
|
static
|
|
void dumpSomPlan(UNUSED const NGHolder &g, UNUSED const som_plan &p,
|
|
UNUSED size_t num) {
|
|
#if defined(DEBUG) || defined(DUMP_PLANS)
|
|
DEBUG_PRINTF("plan %zu: prefix=%p, escapes=%s, is_reset=%d, "
|
|
"parent=%u\n",
|
|
num, p.prefix.get(),
|
|
describeClass(p.escapes, 20, CC_OUT_TEXT).c_str(),
|
|
p.is_reset, p.parent);
|
|
printf(" reporters:");
|
|
for (auto v : p.reporters) {
|
|
printf(" %zu", g[v].index);
|
|
}
|
|
printf("\n");
|
|
printf(" reporters_in:");
|
|
for (auto v : p.reporters_in) {
|
|
printf(" %zu", g[v].index);
|
|
}
|
|
printf("\n");
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* Note: if we fail to build a midfix/ng.addHolder, we throw a pattern too
|
|
* large exception as (1) if previous ng modification have been applied (other
|
|
* midfixes have been applied), ng will be an undefined state on return and (2)
|
|
* if the head of a pattern cannot be implemented we are generally unable to
|
|
* implement the full pattern.
|
|
*/
|
|
static
|
|
void implementSomPlan(NG &ng, const ExpressionInfo &expr, u32 comp_id,
|
|
NGHolder &g, vector<som_plan> &plan,
|
|
const u32 first_som_slot) {
|
|
ReportManager &rm = ng.rm;
|
|
SomSlotManager &ssm = ng.ssm;
|
|
|
|
DEBUG_PRINTF("%zu plans\n", plan.size());
|
|
assert(plan.size() <= MAX_SOM_PLANS);
|
|
assert(!plan.empty());
|
|
|
|
vector<u32> som_slots(plan.size());
|
|
som_slots[0] = first_som_slot;
|
|
|
|
// Root plan, which already has a SOM slot assigned (first_som_slot).
|
|
dumpSomPlan(g, plan.front(), 0);
|
|
dumpSomSubComponent(*plan.front().prefix, "04_som", expr.index, comp_id, 0,
|
|
ng.cc.grey);
|
|
assert(plan.front().prefix);
|
|
if (plan.front().escapes.any() && !plan.front().is_reset) {
|
|
/* setup escaper for first som location */
|
|
if (!createEscaper(ng, *plan.front().prefix, plan.front().escapes,
|
|
first_som_slot)) {
|
|
throw CompileError(expr.index, "Pattern is too large.");
|
|
}
|
|
}
|
|
|
|
assert(plan.front().reporters_in.empty());
|
|
updateReportToUseRecordedSom(rm, g, plan.front().reporters, first_som_slot);
|
|
|
|
// Tree of plans, encoded in a vector.
|
|
vector<som_plan>::const_iterator it = plan.begin();
|
|
for (++it; it != plan.end(); ++it) {
|
|
const u32 plan_num = it - plan.begin();
|
|
dumpSomPlan(g, *it, plan_num);
|
|
dumpSomSubComponent(*it->prefix, "04_som", expr.index, comp_id,
|
|
plan_num, ng.cc.grey);
|
|
|
|
assert(it->parent < plan_num);
|
|
u32 som_slot_in = som_slots[it->parent];
|
|
u32 som_slot_out = ssm.getSomSlot(*it->prefix, it->escapes,
|
|
it->is_reset, som_slot_in);
|
|
som_slots[plan_num] = som_slot_out;
|
|
|
|
assert(!it->no_implement);
|
|
if (!buildMidfix(ng, *it, som_slot_in, som_slot_out)) {
|
|
throw CompileError(expr.index, "Pattern is too large.");
|
|
}
|
|
updateReportToUseRecordedSom(rm, g, it->reporters_in, som_slot_in);
|
|
updateReportToUseRecordedSom(rm, g, it->reporters, som_slot_out);
|
|
}
|
|
|
|
/* create prefix to set the som_loc */
|
|
if (!plan.front().no_implement) {
|
|
renumber_vertices(*plan.front().prefix);
|
|
assert(plan.front().prefix->kind == NFA_OUTFIX);
|
|
if (!ng.addHolder(*plan.front().prefix)) {
|
|
throw CompileError(expr.index, "Pattern is too large.");
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void anchorStarts(NGHolder &g) {
|
|
vector<NFAEdge> dead;
|
|
for (const auto &e : out_edges_range(g.startDs, g)) {
|
|
NFAVertex v = target(e, g);
|
|
if (v == g.startDs) {
|
|
continue;
|
|
}
|
|
add_edge_if_not_present(g.start, v, g[e], g);
|
|
dead.emplace_back(e);
|
|
}
|
|
remove_edges(dead, g);
|
|
}
|
|
|
|
static
|
|
void setZeroReports(NGHolder &g) {
|
|
set<NFAVertex> acceptors;
|
|
insert(&acceptors, inv_adjacent_vertices(g.accept, g));
|
|
insert(&acceptors, inv_adjacent_vertices(g.acceptEod, g));
|
|
acceptors.erase(g.accept);
|
|
|
|
for (auto v : vertices_range(g)) {
|
|
auto &reports = g[v].reports;
|
|
reports.clear();
|
|
|
|
if (!contains(acceptors, v)) {
|
|
continue;
|
|
}
|
|
|
|
// We use the report ID to store the offset adjustment used for virtual
|
|
// starts.
|
|
|
|
if (g[v].assert_flags & POS_FLAG_VIRTUAL_START) {
|
|
reports.insert(1);
|
|
} else {
|
|
reports.insert(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* updates the reports on all vertices leading to the sink */
|
|
static
|
|
void makeSomRevNfaReports(ReportManager &rm, NGHolder &g, NFAVertex sink,
|
|
const ReportID report, const u32 comp_id) {
|
|
// Construct replacement report.
|
|
Report ir = rm.getReport(report);
|
|
ir.type = EXTERNAL_CALLBACK_SOM_REV_NFA;
|
|
ir.revNfaIndex = comp_id;
|
|
ReportID new_report = rm.getInternalId(ir);
|
|
|
|
for (auto v : inv_adjacent_vertices_range(sink, g)) {
|
|
if (v == g.accept) {
|
|
continue;
|
|
}
|
|
|
|
auto &r = g[v].reports;
|
|
if (contains(r, report)) {
|
|
r.erase(report);
|
|
r.insert(new_report);
|
|
}
|
|
}
|
|
}
|
|
|
|
static
|
|
void clearProperInEdges(NGHolder &g, const NFAVertex sink) {
|
|
vector<NFAEdge> dead;
|
|
for (const auto &e : in_edges_range(sink, g)) {
|
|
if (source(e, g) == g.accept) {
|
|
continue;
|
|
}
|
|
dead.emplace_back(e);
|
|
}
|
|
|
|
if (dead.empty()) {
|
|
return;
|
|
}
|
|
|
|
remove_edges(dead, g);
|
|
pruneUseless(g, false);
|
|
}
|
|
|
|
namespace {
|
|
struct SomRevNfa {
|
|
SomRevNfa(NFAVertex s, ReportID r, bytecode_ptr<NFA> n)
|
|
: sink(s), report(r), nfa(std::move(n)) {}
|
|
NFAVertex sink;
|
|
ReportID report;
|
|
bytecode_ptr<NFA> nfa;
|
|
};
|
|
}
|
|
|
|
static
|
|
bytecode_ptr<NFA> makeBareSomRevNfa(const NGHolder &g,
|
|
const CompileContext &cc) {
|
|
// Create a reversed anchored version of this NFA which fires a zero report
|
|
// ID on accept.
|
|
NGHolder g_rev;
|
|
reverseHolder(g, g_rev);
|
|
anchorStarts(g_rev);
|
|
setZeroReports(g_rev);
|
|
|
|
// Prep for actual construction.
|
|
renumber_vertices(g_rev);
|
|
g_rev.kind = NFA_REV_PREFIX;
|
|
reduceGraphEquivalences(g_rev, cc);
|
|
removeRedundancy(g_rev, SOM_NONE);
|
|
|
|
DEBUG_PRINTF("building a rev NFA with %zu vertices\n", num_vertices(g_rev));
|
|
|
|
auto nfa = constructReversedNFA(g_rev, cc);
|
|
if (!nfa) {
|
|
return nfa;
|
|
}
|
|
|
|
// Set some useful properties.
|
|
depth maxWidth = findMaxWidth(g);
|
|
if (maxWidth.is_finite()) {
|
|
nfa->maxWidth = (u32)maxWidth;
|
|
} else {
|
|
nfa->maxWidth = 0;
|
|
}
|
|
depth minWidth = findMinWidth(g);
|
|
nfa->minWidth = (u32)minWidth;
|
|
|
|
return nfa;
|
|
}
|
|
|
|
static
|
|
bool makeSomRevNfa(vector<SomRevNfa> &som_nfas, const NGHolder &g,
|
|
const ReportID report, const NFAVertex sink,
|
|
const CompileContext &cc) {
|
|
// Clone the graph with ONLY the given report vertices on the given sink.
|
|
NGHolder g2;
|
|
cloneHolder(g2, g);
|
|
clearProperInEdges(g2, sink == g.accept ? g2.acceptEod : g2.accept);
|
|
pruneAllOtherReports(g2, report);
|
|
|
|
if (in_degree(g2.accept, g2) == 0 && in_degree(g2.acceptEod, g2) == 1) {
|
|
DEBUG_PRINTF("no work to do for this sink\n");
|
|
return true;
|
|
}
|
|
|
|
renumber_vertices(g2); // for findMinWidth, findMaxWidth.
|
|
|
|
auto nfa = makeBareSomRevNfa(g2, cc);
|
|
if (!nfa) {
|
|
DEBUG_PRINTF("couldn't build rev nfa\n");
|
|
return false;
|
|
}
|
|
|
|
som_nfas.emplace_back(sink, report, std::move(nfa));
|
|
return true;
|
|
}
|
|
|
|
static
|
|
bool doSomRevNfa(NG &ng, NGHolder &g, const CompileContext &cc) {
|
|
ReportManager &rm = ng.rm;
|
|
|
|
// FIXME might want to work on a graph without extra redundancy?
|
|
depth maxWidth = findMaxWidth(g);
|
|
DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
|
|
|
|
if (maxWidth > depth(ng.maxSomRevHistoryAvailable)) {
|
|
DEBUG_PRINTF("too wide\n");
|
|
return false;
|
|
}
|
|
|
|
set<ReportID> reports = all_reports(g);
|
|
DEBUG_PRINTF("%zu reports\n", reports.size());
|
|
|
|
// We distinguish between reports and accept/acceptEod sinks in order to
|
|
// correctly handle cases which do different things on eod/normal accepts.
|
|
// Later, it might be more elegant to do this with a single NFA and
|
|
// multi-tops.
|
|
|
|
vector<SomRevNfa> som_nfas;
|
|
|
|
for (auto report : reports) {
|
|
if (!makeSomRevNfa(som_nfas, g, report, g.accept, cc)) {
|
|
return false;
|
|
}
|
|
if (!makeSomRevNfa(som_nfas, g, report, g.acceptEod, cc)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
for (auto &som_nfa : som_nfas) {
|
|
assert(som_nfa.nfa);
|
|
|
|
// Transfer ownership of the NFA to the SOM slot manager.
|
|
u32 comp_id = ng.ssm.addRevNfa(std::move(som_nfa.nfa), maxWidth);
|
|
|
|
// Replace this report on 'g' with a SOM_REV_NFA report pointing at our
|
|
// new component.
|
|
makeSomRevNfaReports(rm, g, som_nfa.sink, som_nfa.report, comp_id);
|
|
}
|
|
|
|
if (ng.cc.streaming) {
|
|
assert(ng.ssm.somHistoryRequired() <=
|
|
max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static
|
|
u32 doSomRevNfaPrefix(NG &ng, const ExpressionInfo &expr, const NGHolder &g,
|
|
const CompileContext &cc) {
|
|
depth maxWidth = findMaxWidth(g);
|
|
|
|
assert(maxWidth <= depth(ng.maxSomRevHistoryAvailable));
|
|
assert(all_reports(g).size() == 1);
|
|
|
|
auto nfa = makeBareSomRevNfa(g, cc);
|
|
if (!nfa) {
|
|
throw CompileError(expr.index, "Pattern is too large.");
|
|
}
|
|
|
|
if (ng.cc.streaming) {
|
|
assert(ng.ssm.somHistoryRequired() <=
|
|
max(cc.grey.maxHistoryAvailable, ng.maxSomRevHistoryAvailable));
|
|
}
|
|
|
|
return ng.ssm.addRevNfa(std::move(nfa), maxWidth);
|
|
}
|
|
|
|
static
|
|
bool is_literable(const NGHolder &g, NFAVertex v) {
|
|
const CharReach &cr = g[v].char_reach;
|
|
return cr.count() == 1 || cr.isCaselessChar();
|
|
}
|
|
|
|
static
|
|
void append(ue2_literal &s, const CharReach &cr) {
|
|
assert(cr.count() == 1 || cr.isCaselessChar());
|
|
s.push_back(cr.find_first(), cr.isCaselessChar());
|
|
}
|
|
|
|
static
|
|
map<u32, region_info>::const_iterator findLaterLiteral(const NGHolder &g,
|
|
const map<u32, region_info> &info,
|
|
map<u32, region_info>::const_iterator lower_bound,
|
|
ue2_literal &s_out, const Grey &grey) {
|
|
#define MIN_LITERAL_LENGTH 3
|
|
s_out.clear();
|
|
bool past_lower = false;
|
|
ue2_literal s;
|
|
map<u32, region_info>::const_iterator it;
|
|
for (it = info.begin(); it != info.end(); ++it) {
|
|
if (it == lower_bound) {
|
|
past_lower = true;
|
|
}
|
|
if (!it->second.optional && it->second.dag
|
|
&& it->second.full.size() == 1
|
|
&& is_literable(g, it->second.full.front())) {
|
|
append(s, g[it->second.full.front()].char_reach);
|
|
|
|
if (s.length() >= grey.maxHistoryAvailable && past_lower) {
|
|
goto exit;
|
|
}
|
|
} else {
|
|
if (past_lower && it != lower_bound
|
|
&& s.length() >= MIN_LITERAL_LENGTH) {
|
|
--it;
|
|
goto exit;
|
|
}
|
|
s.clear();
|
|
}
|
|
}
|
|
|
|
if (past_lower && it != lower_bound && s.length() >= MIN_LITERAL_LENGTH) {
|
|
--it;
|
|
s_out = s;
|
|
return it;
|
|
}
|
|
exit:
|
|
if (s.length() > grey.maxHistoryAvailable) {
|
|
ue2_literal::const_iterator jt = s.end() - grey.maxHistoryAvailable;
|
|
for (; jt != s.end(); ++jt) {
|
|
s_out.push_back(*jt);
|
|
}
|
|
} else {
|
|
s_out = s;
|
|
}
|
|
return it;
|
|
}
|
|
|
|
static
|
|
bool attemptToBuildChainAfterSombe(SomSlotManager &ssm, NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const map<u32, region_info> &info,
|
|
map<u32, region_info>::const_iterator picked,
|
|
const Grey &grey,
|
|
vector<som_plan> *plan) {
|
|
DEBUG_PRINTF("trying to chain from %u\n", picked->first);
|
|
const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
|
|
|
|
shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
|
|
next(picked)->second);
|
|
|
|
// Quick check to stop us from trying this on huge graphs, which causes us
|
|
// to spend forever in ng_execute looking at cases that will most like
|
|
// fail. See UE-2078.
|
|
size_t prefix_size = num_vertices(*prefix);
|
|
size_t total_size = num_vertices(g);
|
|
assert(total_size >= prefix_size);
|
|
if (total_size - prefix_size > MAX_SOMBE_CHAIN_VERTICES) {
|
|
DEBUG_PRINTF("suffix has %zu vertices, fail\n",
|
|
total_size - prefix_size);
|
|
return false;
|
|
}
|
|
|
|
clearReports(*prefix);
|
|
for (auto u : inv_adjacent_vertices_range(prefix->accept, *prefix)) {
|
|
(*prefix)[u].reports.insert(0);
|
|
}
|
|
|
|
dumpHolder(*prefix, 0, "full_haiglit_prefix", grey);
|
|
|
|
CharReach escapes;
|
|
bool stuck = isPossibleLock(g, picked, info, &escapes);
|
|
if (stuck) {
|
|
NGHolder gg;
|
|
fillHolderForLockCheck(&gg, g, info, picked);
|
|
|
|
stuck = firstMatchIsFirst(gg);
|
|
}
|
|
|
|
DEBUG_PRINTF("stuck = %d\n", (int)stuck);
|
|
|
|
// Note: no-one should ever pay attention to the root plan's som_loc_in.
|
|
plan->emplace_back(prefix, escapes, false, 0);
|
|
plan->back().no_implement = true;
|
|
|
|
dumpHolder(*plan->back().prefix, 22, "som_prefix", grey);
|
|
|
|
/* don't allow tree planning to mutate the graph */
|
|
if (!doSomPlanning(g, stuck, regions, info, picked, *plan, grey,
|
|
DISALLOW_MODIFY_HOLDER)) {
|
|
// Rollback SOM locations.
|
|
ssm.rollbackSomTo(numSomLocsBefore);
|
|
|
|
DEBUG_PRINTF("fail to chain\n");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static
|
|
void setReportOnHaigPrefix(RoseBuild &rose, NGHolder &h) {
|
|
ReportID haig_report_id = rose.getNewNfaReport();
|
|
DEBUG_PRINTF("setting report id of %u\n", haig_report_id);
|
|
|
|
clearReports(h);
|
|
for (auto u : inv_adjacent_vertices_range(h.accept, h)) {
|
|
h[u].reports.clear();
|
|
h[u].reports.insert(haig_report_id);
|
|
}
|
|
}
|
|
|
|
static
|
|
bool tryHaig(RoseBuild &rose, const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
som_type som, u32 somPrecision,
|
|
map<u32, region_info>::const_iterator picked,
|
|
shared_ptr<raw_som_dfa> *haig, shared_ptr<NGHolder> *haig_prefix,
|
|
const Grey &grey) {
|
|
DEBUG_PRINTF("trying to build a haig\n");
|
|
shared_ptr<NGHolder> prefix = makePrefix(g, regions, picked->second,
|
|
next(picked)->second);
|
|
prefix->kind = NFA_PREFIX;
|
|
setReportOnHaigPrefix(rose, *prefix);
|
|
dumpHolder(*prefix, 0, "haig_prefix", grey);
|
|
vector<vector<CharReach> > triggers; /* empty for prefix */
|
|
*haig = attemptToBuildHaig(*prefix, som, somPrecision, triggers, grey);
|
|
if (!*haig) {
|
|
DEBUG_PRINTF("failed to haig\n");
|
|
return false;
|
|
}
|
|
*haig_prefix = prefix;
|
|
return true;
|
|
}
|
|
|
|
static
|
|
void roseAddHaigLiteral(RoseBuild &tb, const shared_ptr<NGHolder> &prefix,
|
|
const shared_ptr<raw_som_dfa> &haig,
|
|
const ue2_literal &lit, const set<ReportID> &reports) {
|
|
assert(prefix && haig);
|
|
|
|
DEBUG_PRINTF("trying to build a sombe from %s\n", dumpString(lit).c_str());
|
|
|
|
RoseInGraph ig;
|
|
RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
|
|
RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
|
|
|
|
add_edge(s, v, RoseInEdgeProps(prefix, haig, lit.length()), ig);
|
|
|
|
assert(!reports.empty());
|
|
RoseInVertex a = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
|
|
add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
|
|
|
|
calcVertexOffsets(ig);
|
|
|
|
UNUSED bool rv = tb.addSombeRose(ig);
|
|
assert(rv); // TODO: recover from addRose failure
|
|
}
|
|
|
|
static
|
|
sombe_rv doHaigLitSom(NG &ng, NGHolder &g, const ExpressionInfo &expr,
|
|
u32 comp_id, som_type som,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const map<u32, region_info> &info,
|
|
map<u32, region_info>::const_iterator lower_bound) {
|
|
DEBUG_PRINTF("entry\n");
|
|
assert(g.kind == NFA_OUTFIX);
|
|
const CompileContext &cc = ng.cc;
|
|
ReportManager &rm = ng.rm;
|
|
SomSlotManager &ssm = ng.ssm;
|
|
|
|
if (!cc.grey.allowHaigLit) {
|
|
return SOMBE_FAIL;
|
|
}
|
|
|
|
const u32 numSomLocsBefore = ssm.numSomSlots(); /* for rollback */
|
|
u32 som_loc = ssm.getPrivateSomSlot();
|
|
|
|
if (!checkViolet(rm, g, false, cc) && !isImplementableNFA(g, &rm, cc)) {
|
|
// This is an optimisation: if we can't build a Haig from a portion of
|
|
// the graph, then we won't be able to manage it as an outfix either
|
|
// when we fall back.
|
|
throw CompileError(expr.index, "Pattern is too large.");
|
|
}
|
|
|
|
while (1) {
|
|
DEBUG_PRINTF("lower bound is %u\n", lower_bound->first);
|
|
ue2_literal s;
|
|
map<u32, region_info>::const_iterator lit
|
|
= findLaterLiteral(g, info, lower_bound, s, cc.grey);
|
|
if (lit == info.end()) {
|
|
DEBUG_PRINTF("failed to find literal\n");
|
|
ssm.rollbackSomTo(numSomLocsBefore);
|
|
return SOMBE_FAIL;
|
|
}
|
|
DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
|
|
lit->first);
|
|
|
|
if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
|
|
DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
|
|
lower_bound = lit;
|
|
++lower_bound;
|
|
continue;
|
|
}
|
|
|
|
shared_ptr<raw_som_dfa> haig;
|
|
shared_ptr<NGHolder> haig_prefix;
|
|
map<u32, region_info>::const_iterator haig_reg = lit;
|
|
|
|
if (edge(lit->second.exits.front(), g.acceptEod, g).second) {
|
|
/* TODO: handle */
|
|
ssm.rollbackSomTo(numSomLocsBefore);
|
|
return SOMBE_FAIL;
|
|
}
|
|
|
|
advance(haig_reg, -(s32)s.length());
|
|
|
|
if (!haig_reg->first && haig_reg->second.full.size() == 2) {
|
|
/* just starts */
|
|
|
|
/* TODO: make below assertion true, reset checks could be stronger
|
|
* (12356)
|
|
*/
|
|
/* assert(!attemptToBuildChainAfterSombe(ng, g, info, lit, cc.grey,
|
|
&plan)); */
|
|
|
|
lower_bound = lit;
|
|
++lower_bound;
|
|
continue; /* somebody else should have been able to chain */
|
|
}
|
|
|
|
bool ok = true;
|
|
set<ReportID> rep;
|
|
if (next(lit) != info.end()) {
|
|
/* non terminal literal */
|
|
|
|
/* TODO: handle edges to accept ? */
|
|
vector<som_plan> plan;
|
|
if (edge(lit->second.exits.front(), g.accept, g).second) {
|
|
insert(&rep, g[lit->second.exits.front()].reports);
|
|
remove_edge(lit->second.exits.front(), g.accept, g);
|
|
g[lit->second.exits.front()].reports.clear();
|
|
|
|
/* Note: we can mess with the graph as this is the last literal
|
|
* we will find and on failure the graph will be thrown away */
|
|
}
|
|
|
|
ok = attemptToBuildChainAfterSombe(ssm, g, regions, info, lit,
|
|
cc.grey, &plan);
|
|
ok = ok && tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(),
|
|
haig_reg, &haig, &haig_prefix, cc.grey);
|
|
|
|
if (!ok) {
|
|
DEBUG_PRINTF(":( going to next attempt\n");
|
|
goto next_try;
|
|
}
|
|
|
|
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
|
|
|
|
Report ir = makeCallback(0U, 0);
|
|
assert(!plan.empty());
|
|
if (plan.front().is_reset) {
|
|
ir.type = INTERNAL_SOM_LOC_SET_FROM;
|
|
} else {
|
|
ir.type = INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE;
|
|
}
|
|
ir.onmatch = som_loc;
|
|
rep.insert(rm.getInternalId(ir));
|
|
} else {
|
|
/* terminal literal */
|
|
ok = tryHaig(*ng.rose, g, regions, som, ssm.somPrecision(), haig_reg,
|
|
&haig, &haig_prefix, cc.grey);
|
|
|
|
/* find report */
|
|
insert(&rep, g[lit->second.exits.front()].reports);
|
|
|
|
/* TODO: som_loc is unused */
|
|
}
|
|
|
|
if (ok) {
|
|
roseAddHaigLiteral(*ng.rose, haig_prefix, haig, s, rep);
|
|
if (next(lit) != info.end()) {
|
|
return SOMBE_HANDLED_INTERNAL;
|
|
} else {
|
|
ssm.rollbackSomTo(numSomLocsBefore);
|
|
return SOMBE_HANDLED_ALL;
|
|
}
|
|
}
|
|
next_try:
|
|
lower_bound = lit;
|
|
++lower_bound;
|
|
}
|
|
assert(0);
|
|
return SOMBE_FAIL;
|
|
}
|
|
|
|
static
|
|
bool leadingLiterals(const NGHolder &g, set<ue2_literal> *lits,
|
|
set<NFAVertex> *terminals) {
|
|
/* TODO: smarter (topo) */
|
|
#define MAX_LEADING_LITERALS 20
|
|
set<NFAVertex> s_succ;
|
|
insert(&s_succ, adjacent_vertices(g.start, g));
|
|
|
|
set<NFAVertex> sds_succ;
|
|
insert(&sds_succ, adjacent_vertices(g.startDs, g));
|
|
|
|
if (!is_subset_of(s_succ, sds_succ)) {
|
|
DEBUG_PRINTF("not floating\n");
|
|
return false;
|
|
}
|
|
|
|
sds_succ.erase(g.startDs);
|
|
|
|
map<NFAVertex, vector<ue2_literal> > curr;
|
|
curr[g.startDs].emplace_back(ue2_literal());
|
|
|
|
map<NFAVertex, set<NFAVertex> > seen;
|
|
map<NFAVertex, vector<ue2_literal> > next;
|
|
|
|
bool did_expansion = true;
|
|
while (did_expansion) {
|
|
did_expansion = false;
|
|
u32 count = 0;
|
|
assert(!curr.empty());
|
|
for (const auto &m : curr) {
|
|
const NFAVertex u = m.first;
|
|
const vector<ue2_literal> &base = m.second;
|
|
DEBUG_PRINTF("expanding from %zu\n", g[u].index);
|
|
for (auto v : adjacent_vertices_range(u, g)) {
|
|
if (v == g.startDs) {
|
|
continue;
|
|
}
|
|
if (contains(seen[u], v)) {
|
|
DEBUG_PRINTF("loop\n");
|
|
goto skip_to_next_terminal;
|
|
}
|
|
if (is_any_accept(v, g) || is_match_vertex(v, g)) {
|
|
DEBUG_PRINTF("match\n");
|
|
goto skip_to_next_terminal;
|
|
}
|
|
if (g[v].char_reach.count() > 2 * MAX_LEADING_LITERALS) {
|
|
DEBUG_PRINTF("wide\n");
|
|
goto skip_to_next_terminal;
|
|
}
|
|
}
|
|
|
|
for (auto v : adjacent_vertices_range(u, g)) {
|
|
assert(!contains(seen[u], v));
|
|
if (v == g.startDs) {
|
|
continue;
|
|
}
|
|
insert(&seen[v], seen[u]);
|
|
seen[v].insert(v);
|
|
CharReach cr = g[v].char_reach;
|
|
vector<ue2_literal> &out = next[v];
|
|
|
|
DEBUG_PRINTF("expanding to %zu (|| = %zu)\n", g[v].index,
|
|
cr.count());
|
|
for (size_t c = cr.find_first(); c != CharReach::npos;
|
|
c = cr.find_next(c)) {
|
|
bool nocase = ourisalpha(c) && cr.test(mytoupper(c))
|
|
&& cr.test(mytolower(c));
|
|
|
|
if (nocase && (char)c == mytolower(c)) {
|
|
continue; /* uppercase already handled us */
|
|
}
|
|
|
|
for (const auto &lit : base) {
|
|
if (count >= MAX_LEADING_LITERALS) {
|
|
DEBUG_PRINTF("count %u\n", count);
|
|
goto exit;
|
|
}
|
|
did_expansion = true;
|
|
out.emplace_back(lit);
|
|
out.back().push_back(c, nocase);
|
|
count++;
|
|
if (out.back().length() > MAX_MASK2_WIDTH
|
|
&& mixed_sensitivity(out.back())) {
|
|
goto exit;
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
if (0) {
|
|
skip_to_next_terminal:
|
|
insert(&next[u], next[u].end(), base);
|
|
count += base.size();
|
|
if (count > MAX_LEADING_LITERALS) {
|
|
DEBUG_PRINTF("count %u\n", count);
|
|
goto exit;
|
|
}
|
|
}
|
|
}
|
|
|
|
curr.swap(next);
|
|
next.clear();
|
|
};
|
|
exit:;
|
|
for (const auto &m : curr) {
|
|
NFAVertex t = m.first;
|
|
if (t == g.startDs) {
|
|
assert(curr.size() == 1);
|
|
return false;
|
|
}
|
|
assert(!is_special(t, g));
|
|
terminals->insert(t);
|
|
insert(lits, m.second);
|
|
}
|
|
assert(lits->size() <= MAX_LEADING_LITERALS);
|
|
return !lits->empty();
|
|
}
|
|
|
|
static
|
|
bool splitOffLeadingLiterals(const NGHolder &g, set<ue2_literal> *lit_out,
|
|
NGHolder *rhs) {
|
|
DEBUG_PRINTF("looking for a leading literals\n");
|
|
|
|
set<NFAVertex> terms;
|
|
if (!leadingLiterals(g, lit_out, &terms)) {
|
|
return false;
|
|
}
|
|
|
|
for (UNUSED const auto &lit : *lit_out) {
|
|
DEBUG_PRINTF("literal is '%s' (len %zu)\n", dumpString(lit).c_str(),
|
|
lit.length());
|
|
}
|
|
|
|
/* need to validate that it is a clean split */
|
|
assert(!terms.empty());
|
|
set<NFAVertex> adj_term1;
|
|
insert(&adj_term1, adjacent_vertices(*terms.begin(), g));
|
|
for (auto v : terms) {
|
|
DEBUG_PRINTF("term %zu\n", g[v].index);
|
|
set<NFAVertex> temp;
|
|
insert(&temp, adjacent_vertices(v, g));
|
|
if (temp != adj_term1) {
|
|
DEBUG_PRINTF("bad split\n");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
unordered_map<NFAVertex, NFAVertex> rhs_map;
|
|
vector<NFAVertex> pivots;
|
|
insert(&pivots, pivots.end(), adj_term1);
|
|
splitRHS(g, pivots, rhs, &rhs_map);
|
|
|
|
assert(is_triggered(*rhs));
|
|
return true;
|
|
}
|
|
|
|
static
|
|
void findBestLiteral(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
ue2_literal *lit_out, NFAVertex *v,
|
|
const CompileContext &cc) {
|
|
map<u32, region_info> info;
|
|
buildRegionMapping(g, regions, info, false);
|
|
|
|
ue2_literal best;
|
|
NFAVertex best_v = NGHolder::null_vertex();
|
|
|
|
map<u32, region_info>::const_iterator lit = info.begin();
|
|
while (1) {
|
|
ue2_literal s;
|
|
lit = findLaterLiteral(g, info, lit, s, cc.grey);
|
|
if (lit == info.end()) {
|
|
break;
|
|
}
|
|
DEBUG_PRINTF("test literal: %s [r=%u]\n", dumpString(s).c_str(),
|
|
lit->first);
|
|
|
|
if (s.length() > MAX_MASK2_WIDTH && mixed_sensitivity(s)) {
|
|
DEBUG_PRINTF("long & mixed-sensitivity, Rose can't handle this\n");
|
|
++lit;
|
|
continue;
|
|
}
|
|
|
|
if (s.length() > best.length()) {
|
|
best = s;
|
|
assert(!lit->second.exits.empty());
|
|
best_v = lit->second.exits[0];
|
|
}
|
|
|
|
++lit;
|
|
}
|
|
|
|
lit_out->swap(best);
|
|
*v = best_v;
|
|
}
|
|
|
|
static
|
|
bool splitOffBestLiteral(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
ue2_literal *lit_out, NGHolder *lhs, NGHolder *rhs,
|
|
const CompileContext &cc) {
|
|
NFAVertex v = NGHolder::null_vertex();
|
|
|
|
findBestLiteral(g, regions, lit_out, &v, cc);
|
|
if (lit_out->empty()) {
|
|
return false;
|
|
}
|
|
|
|
DEBUG_PRINTF("literal is '%s'\n", dumpString(*lit_out).c_str());
|
|
|
|
unordered_map<NFAVertex, NFAVertex> lhs_map;
|
|
unordered_map<NFAVertex, NFAVertex> rhs_map;
|
|
|
|
splitGraph(g, v, lhs, &lhs_map, rhs, &rhs_map);
|
|
|
|
DEBUG_PRINTF("v = %zu\n", g[v].index);
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Replace the given graph's EXTERNAL_CALLBACK reports with
|
|
* EXTERNAL_CALLBACK_SOM_PASS reports.
|
|
*/
|
|
void makeReportsSomPass(ReportManager &rm, NGHolder &g) {
|
|
for (const auto &v : vertices_range(g)) {
|
|
const auto &reports = g[v].reports;
|
|
if (reports.empty()) {
|
|
continue;
|
|
}
|
|
|
|
flat_set<ReportID> new_reports;
|
|
for (const ReportID &id : reports) {
|
|
const Report &report = rm.getReport(id);
|
|
if (report.type != EXTERNAL_CALLBACK) {
|
|
new_reports.insert(id);
|
|
continue;
|
|
}
|
|
Report report2 = report;
|
|
report2.type = EXTERNAL_CALLBACK_SOM_PASS;
|
|
new_reports.insert(rm.getInternalId(report2));
|
|
}
|
|
|
|
g[v].reports = new_reports;
|
|
}
|
|
}
|
|
|
|
static
|
|
bool doLitHaigSom(NG &ng, const NGHolder &g, som_type som) {
|
|
ue2_literal lit;
|
|
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
|
if (!rhs) {
|
|
assert(0);
|
|
throw std::bad_alloc();
|
|
}
|
|
if (!ng.cc.grey.allowLitHaig) {
|
|
return false;
|
|
}
|
|
|
|
dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
|
|
|
|
if (!splitOffLeadingLiteral(g, &lit, &*rhs)) {
|
|
DEBUG_PRINTF("no literal\n");
|
|
return false;
|
|
}
|
|
|
|
if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
|
|
DEBUG_PRINTF("lit too short\n");
|
|
return false;
|
|
}
|
|
|
|
assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
|
|
|
|
makeReportsSomPass(ng.rm, *rhs);
|
|
|
|
dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
|
|
|
|
vector<vector<CharReach> > triggers;
|
|
triggers.emplace_back(as_cr_seq(lit));
|
|
|
|
assert(rhs->kind == NFA_SUFFIX);
|
|
shared_ptr<raw_som_dfa> haig
|
|
= attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
|
|
ng.cc.grey, false /* lit implies adv som */);
|
|
if (!haig) {
|
|
DEBUG_PRINTF("failed to haig\n");
|
|
return false;
|
|
}
|
|
DEBUG_PRINTF("haig %p\n", haig.get());
|
|
|
|
RoseInGraph ig;
|
|
RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
|
|
RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
|
|
add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
|
|
|
|
RoseInVertex a
|
|
= add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
|
|
add_edge(v, a, RoseInEdgeProps(haig), ig);
|
|
|
|
calcVertexOffsets(ig);
|
|
|
|
return ng.rose->addSombeRose(ig);
|
|
}
|
|
|
|
static
|
|
bool doHaigLitHaigSom(NG &ng, NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
som_type som) {
|
|
if (!ng.cc.grey.allowLitHaig) {
|
|
return false;
|
|
}
|
|
|
|
// In streaming mode, we can only delay up to our max available history.
|
|
const u32 max_delay =
|
|
ng.cc.streaming ? ng.cc.grey.maxHistoryAvailable : MO_INVALID_IDX;
|
|
|
|
ue2_literal lit;
|
|
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
|
shared_ptr<NGHolder> lhs = make_shared<NGHolder>();
|
|
if (!rhs || !lhs) {
|
|
assert(0);
|
|
throw std::bad_alloc();
|
|
}
|
|
|
|
if (!splitOffBestLiteral(g, regions, &lit, &*lhs, &*rhs, ng.cc)) {
|
|
return false;
|
|
}
|
|
|
|
DEBUG_PRINTF("split off best lit '%s' (len=%zu)\n", dumpString(lit).c_str(),
|
|
lit.length());
|
|
|
|
if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
|
|
DEBUG_PRINTF("lit too short\n");
|
|
return false;
|
|
}
|
|
|
|
assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
|
|
|
|
if (edge(rhs->start, rhs->acceptEod, *rhs).second) {
|
|
return false; /* TODO: handle */
|
|
}
|
|
|
|
makeReportsSomPass(ng.rm, *rhs);
|
|
|
|
dumpHolder(*lhs, 92, "haiglithaig_lhs", ng.cc.grey);
|
|
dumpHolder(*rhs, 93, "haiglithaig_rhs", ng.cc.grey);
|
|
|
|
u32 delay = removeTrailingLiteralStates(*lhs, lit, max_delay);
|
|
|
|
RoseInGraph ig;
|
|
RoseInVertex s
|
|
= add_vertex(RoseInVertexProps::makeStart(false), ig);
|
|
RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
|
|
|
|
bool lhs_all_vac = true;
|
|
NGHolder::adjacency_iterator ai, ae;
|
|
for (tie(ai, ae) = adjacent_vertices(lhs->startDs, *lhs);
|
|
ai != ae && lhs_all_vac; ++ai) {
|
|
if (!is_special(*ai, *lhs)) {
|
|
lhs_all_vac = false;
|
|
}
|
|
}
|
|
for (tie(ai, ae) = adjacent_vertices(lhs->start, *lhs);
|
|
ai != ae && lhs_all_vac; ++ai) {
|
|
if (!is_special(*ai, *lhs)) {
|
|
lhs_all_vac = false;
|
|
}
|
|
}
|
|
|
|
if (lhs_all_vac) {
|
|
/* lhs is completely vacuous --> no prefix needed */
|
|
add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
|
|
} else {
|
|
assert(delay == lit.length());
|
|
setReportOnHaigPrefix(*ng.rose, *lhs);
|
|
vector<vector<CharReach> > prefix_triggers; /* empty for prefix */
|
|
assert(lhs->kind == NFA_PREFIX);
|
|
shared_ptr<raw_som_dfa> l_haig
|
|
= attemptToBuildHaig(*lhs, som, ng.ssm.somPrecision(),
|
|
prefix_triggers, ng.cc.grey);
|
|
if (!l_haig) {
|
|
DEBUG_PRINTF("failed to haig\n");
|
|
return false;
|
|
}
|
|
DEBUG_PRINTF("lhs haig %p\n", l_haig.get());
|
|
|
|
add_edge(s, v, RoseInEdgeProps(lhs, l_haig, delay), ig);
|
|
}
|
|
|
|
if (!edge(rhs->start, rhs->accept, *rhs).second) {
|
|
assert(rhs->kind == NFA_SUFFIX);
|
|
|
|
vector<vector<CharReach> > triggers;
|
|
triggers.emplace_back(as_cr_seq(lit));
|
|
|
|
ue2_literal lit2;
|
|
if (getTrailingLiteral(g, &lit2)
|
|
&& lit2.length() >= ng.cc.grey.minRoseLiteralLength
|
|
&& minStringPeriod(lit2) >= 2) {
|
|
|
|
/* TODO: handle delay */
|
|
size_t overlap = maxOverlap(lit, lit2, 0);
|
|
u32 delay2 = min((size_t)max_delay, lit2.length() - overlap);
|
|
delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2);
|
|
rhs->kind = NFA_INFIX;
|
|
assert(delay2 <= lit2.length());
|
|
setReportOnHaigPrefix(*ng.rose, *rhs);
|
|
|
|
shared_ptr<raw_som_dfa> m_haig
|
|
= attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
|
|
triggers, ng.cc.grey, true);
|
|
DEBUG_PRINTF("mhs haig %p\n", m_haig.get());
|
|
if (!m_haig) {
|
|
DEBUG_PRINTF("failed to haig\n");
|
|
return false;
|
|
}
|
|
|
|
RoseInVertex w
|
|
= add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
|
|
add_edge(v, w, RoseInEdgeProps(rhs, m_haig, delay2), ig);
|
|
|
|
NFAVertex reporter = getSoleSourceVertex(g, g.accept);
|
|
assert(reporter);
|
|
const auto &reports = g[reporter].reports;
|
|
RoseInVertex a =
|
|
add_vertex(RoseInVertexProps::makeAccept(reports), ig);
|
|
add_edge(w, a, RoseInEdgeProps(0U, 0U), ig);
|
|
} else {
|
|
/* TODO: analysis to see if som is in fact always increasing */
|
|
shared_ptr<raw_som_dfa> r_haig
|
|
= attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(),
|
|
triggers, ng.cc.grey, true);
|
|
DEBUG_PRINTF("rhs haig %p\n", r_haig.get());
|
|
if (!r_haig) {
|
|
DEBUG_PRINTF("failed to haig\n");
|
|
return false;
|
|
}
|
|
RoseInVertex a
|
|
= add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()),
|
|
ig);
|
|
add_edge(v, a, RoseInEdgeProps(r_haig), ig);
|
|
}
|
|
} else {
|
|
DEBUG_PRINTF("has start->accept edge\n");
|
|
if (in_degree(g.acceptEod, g) > 1) {
|
|
DEBUG_PRINTF("also has a path to EOD\n");
|
|
return false;
|
|
}
|
|
NFAVertex reporter = getSoleSourceVertex(g, g.accept);
|
|
if (!reporter) {
|
|
return false; /* TODO: later */
|
|
}
|
|
const auto &reports = g[reporter].reports;
|
|
assert(!reports.empty());
|
|
RoseInVertex a =
|
|
add_vertex(RoseInVertexProps::makeAccept(reports), ig);
|
|
add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
|
|
}
|
|
|
|
calcVertexOffsets(ig);
|
|
|
|
return ng.rose->addSombeRose(ig);
|
|
}
|
|
|
|
static
|
|
bool doMultiLitHaigSom(NG &ng, const NGHolder &g, som_type som) {
|
|
set<ue2_literal> lits;
|
|
shared_ptr<NGHolder> rhs = make_shared<NGHolder>();
|
|
if (!ng.cc.grey.allowLitHaig) {
|
|
return false;
|
|
}
|
|
|
|
dumpHolder(g, 90, "lithaig_full", ng.cc.grey);
|
|
|
|
if (!splitOffLeadingLiterals(g, &lits, &*rhs)) {
|
|
DEBUG_PRINTF("no literal\n");
|
|
return false;
|
|
}
|
|
|
|
makeReportsSomPass(ng.rm, *rhs);
|
|
|
|
dumpHolder(*rhs, 91, "lithaig_rhs", ng.cc.grey);
|
|
|
|
vector<vector<CharReach>> triggers;
|
|
for (const auto &lit : lits) {
|
|
if (lit.length() < ng.cc.grey.minRoseLiteralLength) {
|
|
DEBUG_PRINTF("lit too short\n");
|
|
return false;
|
|
}
|
|
|
|
assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit));
|
|
triggers.emplace_back(as_cr_seq(lit));
|
|
}
|
|
|
|
bool unordered_som_triggers = true; /* TODO: check overlaps to ensure that
|
|
* we can promise ordering */
|
|
|
|
assert(rhs->kind == NFA_SUFFIX);
|
|
shared_ptr<raw_som_dfa> haig
|
|
= attemptToBuildHaig(*rhs, som, ng.ssm.somPrecision(), triggers,
|
|
ng.cc.grey, unordered_som_triggers);
|
|
if (!haig) {
|
|
DEBUG_PRINTF("failed to haig\n");
|
|
return false;
|
|
}
|
|
DEBUG_PRINTF("haig %p\n", haig.get());
|
|
|
|
RoseInGraph ig;
|
|
RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(false), ig);
|
|
|
|
RoseInVertex a
|
|
= add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
|
|
|
|
for (const auto &lit : lits) {
|
|
RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
|
|
add_edge(s, v, RoseInEdgeProps(0, ROSE_BOUND_INF), ig);
|
|
add_edge(v, a, RoseInEdgeProps(haig), ig);
|
|
}
|
|
|
|
calcVertexOffsets(ig);
|
|
|
|
return ng.rose->addSombeRose(ig);
|
|
}
|
|
|
|
static
|
|
bool trySombe(NG &ng, NGHolder &g, som_type som) {
|
|
if (doLitHaigSom(ng, g, som)) {
|
|
return true;
|
|
}
|
|
|
|
auto regions = assignRegions(g);
|
|
|
|
if (doHaigLitHaigSom(ng, g, regions, som)) {
|
|
return true;
|
|
}
|
|
|
|
if (doMultiLitHaigSom(ng, g, som)) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static
|
|
map<u32, region_info>::const_iterator pickInitialSomCut(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const map<u32, region_info> &info,
|
|
const vector<DepthMinMax> &depths) {
|
|
map<u32, region_info>::const_iterator picked = info.end();
|
|
for (map<u32, region_info>::const_iterator it = info.begin();
|
|
it != info.end(); ++it) {
|
|
if (it->second.exits.empty()) {
|
|
assert(it == info.begin());
|
|
continue;
|
|
}
|
|
|
|
if (!regionCanEstablishSom(g, regions, it->first, it->second.exits,
|
|
depths)) {
|
|
/* last region is as far as we can go */
|
|
DEBUG_PRINTF("region %u is beyond the fixed region\n", it->first);
|
|
break;
|
|
}
|
|
picked = it;
|
|
}
|
|
|
|
return picked;
|
|
}
|
|
|
|
static
|
|
map<u32, region_info>::const_iterator tryForLaterRevNfaCut(const NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const map<u32, region_info> &info,
|
|
const vector<DepthMinMax> &depths,
|
|
const map<u32, region_info>::const_iterator &orig,
|
|
const CompileContext &cc) {
|
|
DEBUG_PRINTF("trying for later rev nfa cut\n");
|
|
assert(orig != info.end());
|
|
|
|
vector<map<u32, region_info>::const_iterator> cands;
|
|
|
|
map<u32, region_info>::const_iterator it = orig;
|
|
++it;
|
|
for (; it != info.end(); ++it) {
|
|
/* for simplicity */
|
|
if (it->second.exits.size() != 1 || it->second.optional) {
|
|
continue;
|
|
}
|
|
NFAVertex v = *it->second.exits.begin();
|
|
|
|
if (edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second) {
|
|
continue; /* for simplicity would require external som nfa reports
|
|
* as well. */
|
|
}
|
|
|
|
const depth &max_depth = depths[g[v].index].max;
|
|
if (max_depth >
|
|
depth(cc.grey.somMaxRevNfaLength - 1)) { /* virtual starts */
|
|
continue;
|
|
}
|
|
|
|
if (max_depth > depth(MAX_REV_NFA_PREFIX)) {
|
|
/* probably not a good idea, anyway */
|
|
continue;
|
|
}
|
|
|
|
cands.emplace_back(it);
|
|
}
|
|
|
|
while (!cands.empty()) {
|
|
map<u32, region_info>::const_iterator rv = cands.back();
|
|
cands.pop_back();
|
|
|
|
NFAVertex v = *rv->second.exits.begin();
|
|
|
|
set<ue2_literal> lits = getLiteralSet(g, v);
|
|
compressAndScore(lits);
|
|
if (lits.empty()) {
|
|
next_region:
|
|
continue;
|
|
}
|
|
for (const auto &lit : lits) {
|
|
if (lit.length() <= 3 || minStringPeriod(lit) < 2) {
|
|
goto next_region;
|
|
}
|
|
}
|
|
|
|
if (rv->second.enters.empty()
|
|
|| find(rv->second.full.begin(), rv->second.full.end(), g.startDs)
|
|
!= rv->second.full.end()) {
|
|
continue;
|
|
}
|
|
|
|
if (!isMandRegionBetween(info.begin(), rv)
|
|
&& info.begin()->second.optional) {
|
|
continue;
|
|
}
|
|
|
|
/* check to see if it is a reasonable size */
|
|
auto prefix =
|
|
makePrefix(g, regions, rv->second, next(rv)->second, false);
|
|
|
|
NGHolder g_rev;
|
|
reverseHolder(*prefix, g_rev);
|
|
anchorStarts(g_rev);
|
|
|
|
renumber_vertices(g_rev);
|
|
g_rev.kind = NFA_REV_PREFIX;
|
|
reduceGraphEquivalences(g_rev, cc);
|
|
removeRedundancy(g_rev, SOM_NONE);
|
|
|
|
if (num_vertices(g_rev) > 128) { /* too big */
|
|
continue;
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
return info.end();
|
|
}
|
|
|
|
static
|
|
unique_ptr<NGHolder> makePrefixForChain(NGHolder &g,
|
|
const unordered_map<NFAVertex, u32> ®ions,
|
|
const map<u32, region_info> &info,
|
|
const map<u32, region_info>::const_iterator &picked,
|
|
vector<DepthMinMax> *depths, bool prefix_by_rev,
|
|
ReportManager &rm) {
|
|
DEBUG_PRINTF("making prefix for chain attempt\n");
|
|
auto prefix =
|
|
makePrefix(g, regions, picked->second, next(picked)->second, false);
|
|
|
|
/* For the root SOM plan, we use a temporary SOM slot to start with so that
|
|
* we don't have to do any complicated rollback operations if the call to
|
|
* doSomPlanning() below fails. The temporary SOM slot is replaced with a
|
|
* real one afterwards. */
|
|
const u32 temp_som_loc = UINT32_MAX;
|
|
setPrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_WRITABLE,
|
|
temp_som_loc, *depths, prefix_by_rev);
|
|
|
|
/* handle direct edge to accepts from region */
|
|
if (edge(picked->second.exits.front(), g.accept, g).second
|
|
|| edge(picked->second.exits.front(), g.acceptEod, g).second) {
|
|
map<u32, region_info>::const_iterator it = picked;
|
|
do {
|
|
makeSomRelReports(rm, g, it->second.exits, *depths);
|
|
} while (it != info.begin() && it->second.optional && (it--)->first);
|
|
}
|
|
|
|
depths->clear(); /* renumbering invalidates depths */
|
|
renumber_vertices(*prefix);
|
|
|
|
DEBUG_PRINTF("done\n");
|
|
return prefix;
|
|
}
|
|
|
|
sombe_rv doSom(NG &ng, NGHolder &g, const ExpressionInfo &expr, u32 comp_id,
|
|
som_type som) {
|
|
assert(som);
|
|
DEBUG_PRINTF("som hello\n");
|
|
ReportManager &rm = ng.rm;
|
|
SomSlotManager &ssm = ng.ssm;
|
|
const CompileContext &cc = ng.cc;
|
|
|
|
// Special case: if g is completely anchored or begins with a dot-star, we
|
|
// know that we have an absolute SOM of zero all the time.
|
|
if (!proper_out_degree(g.startDs, g) || beginsWithDotStar(g)) {
|
|
makeSomAbsReports(rm, g, g.accept);
|
|
makeSomAbsReports(rm, g, g.acceptEod);
|
|
return SOMBE_HANDLED_INTERNAL;
|
|
}
|
|
|
|
if (!cc.grey.allowSomChain) {
|
|
return SOMBE_FAIL;
|
|
}
|
|
|
|
// A pristine copy of the input graph, which must be restored to in paths
|
|
// that return false. Also used as the forward graph for som rev nfa
|
|
// construction.
|
|
NGHolder g_pristine;
|
|
cloneHolder(g_pristine, g);
|
|
|
|
vector<DepthMinMax> depths = getDistancesFromSOM(g);
|
|
|
|
// try a redundancy pass.
|
|
if (addSomRedundancy(g, depths)) {
|
|
depths = getDistancesFromSOM(g); // recalc
|
|
}
|
|
|
|
auto regions = assignRegions(g);
|
|
|
|
dumpHolder(g, regions, 11, "som_explode", cc.grey);
|
|
|
|
map<u32, region_info> info;
|
|
buildRegionMapping(g, regions, info);
|
|
|
|
map<u32, region_info>::const_iterator picked
|
|
= pickInitialSomCut(g, regions, info, depths);
|
|
DEBUG_PRINTF("picked %u\n", picked->first);
|
|
if (picked == info.end() || picked->second.exits.empty()) {
|
|
DEBUG_PRINTF("no regions/no progress possible\n");
|
|
clear_graph(g);
|
|
cloneHolder(g, g_pristine);
|
|
if (doSomRevNfa(ng, g, cc)) {
|
|
return SOMBE_HANDLED_INTERNAL;
|
|
} else {
|
|
return SOMBE_FAIL;
|
|
}
|
|
}
|
|
|
|
if (finalRegion(g, regions, picked->second.exits[0])) {
|
|
makeSomRelReports(rm, g, g.accept, depths);
|
|
makeSomRelReports(rm, g, g.acceptEod, depths);
|
|
return SOMBE_HANDLED_INTERNAL;
|
|
}
|
|
|
|
if (doSomRevNfa(ng, g_pristine, cc)) {
|
|
clear_graph(g);
|
|
cloneHolder(g, g_pristine);
|
|
return SOMBE_HANDLED_INTERNAL;
|
|
}
|
|
|
|
bool prefix_by_rev = false;
|
|
map<u32, region_info>::const_iterator picked_old = picked;
|
|
map<u32, region_info>::const_iterator rev_pick
|
|
= tryForLaterRevNfaCut(g, regions, info, depths, picked, cc);
|
|
if (rev_pick != info.end()) {
|
|
DEBUG_PRINTF("found later rev prefix cut point\n");
|
|
assert(rev_pick != picked);
|
|
picked = rev_pick;
|
|
prefix_by_rev = true;
|
|
} else {
|
|
/* sanity checks for picked region, these checks have already been done
|
|
* if we are using a prefix reverse nfa. */
|
|
if (picked->second.enters.empty()
|
|
|| find(picked->second.full.begin(), picked->second.full.end(),
|
|
g.startDs) != picked->second.full.end()) {
|
|
clear_graph(g);
|
|
cloneHolder(g, g_pristine);
|
|
return SOMBE_FAIL;
|
|
}
|
|
|
|
if (!isMandRegionBetween(info.begin(), picked)
|
|
&& info.begin()->second.optional) {
|
|
clear_graph(g);
|
|
cloneHolder(g, g_pristine);
|
|
return SOMBE_FAIL;
|
|
}
|
|
}
|
|
|
|
DEBUG_PRINTF("region %u is the final\n", picked->first);
|
|
|
|
shared_ptr<NGHolder> prefix = makePrefixForChain(
|
|
g, regions, info, picked, &depths, prefix_by_rev, rm);
|
|
/* note depths cleared as we have renumbered */
|
|
|
|
CharReach escapes;
|
|
bool stuck = isPossibleLock(g, picked, info, &escapes);
|
|
if (stuck) {
|
|
DEBUG_PRINTF("investigating potential lock\n");
|
|
|
|
NGHolder gg;
|
|
fillHolderForLockCheck(&gg, g, info, picked);
|
|
|
|
stuck = firstMatchIsFirst(gg);
|
|
}
|
|
|
|
if (stuck && escapes.none()) {
|
|
/* leads directly to .* --> woot */
|
|
DEBUG_PRINTF("initial slot is full lock\n");
|
|
u32 som_loc = ssm.getSomSlot(*prefix, escapes, false,
|
|
SomSlotManager::NO_PARENT);
|
|
replaceTempSomSlot(rm, *prefix, som_loc);
|
|
|
|
/* update all reports on g to report the som_loc's som */
|
|
updateReportToUseRecordedSom(rm, g, som_loc);
|
|
|
|
/* create prefix to set the som_loc */
|
|
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET_IF_UNSET);
|
|
if (prefix_by_rev) {
|
|
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
|
|
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
|
|
}
|
|
renumber_vertices(*prefix);
|
|
if (!ng.addHolder(*prefix)) {
|
|
DEBUG_PRINTF("failed to add holder\n");
|
|
clear_graph(g);
|
|
cloneHolder(g, g_pristine);
|
|
return SOMBE_FAIL;
|
|
}
|
|
|
|
DEBUG_PRINTF("ok found initial lock\n");
|
|
return SOMBE_HANDLED_INTERNAL;
|
|
}
|
|
|
|
vector<som_plan> plan;
|
|
retry:
|
|
// Note: no-one should ever pay attention to the root plan's parent.
|
|
plan.emplace_back(som_plan(prefix, escapes, false, 0));
|
|
dumpHolder(*plan.back().prefix, 12, "som_prefix", cc.grey);
|
|
if (!prefix_by_rev) {
|
|
if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey)) {
|
|
DEBUG_PRINTF("failed\n");
|
|
clear_graph(g);
|
|
cloneHolder(g, g_pristine);
|
|
return SOMBE_FAIL;
|
|
}
|
|
} else {
|
|
DEBUG_PRINTF("trying for som plan\n");
|
|
if (!doSomPlanning(g, stuck, regions, info, picked, plan, cc.grey,
|
|
DISALLOW_MODIFY_HOLDER)) {
|
|
/* Note: the larger prefixes generated by reverse nfas may not
|
|
* advance as fair as the original prefix - so we should retry
|
|
* with a smaller prefix. */
|
|
|
|
prefix_by_rev = false;
|
|
stuck = false; /* if we reached a lock, then prefix_by_rev would not
|
|
* have advanced. */
|
|
picked = picked_old;
|
|
plan.clear();
|
|
depths = getDistancesFromSOM(g); /* due to renumbering, need to
|
|
* regenerate */
|
|
prefix = makePrefixForChain(g, regions, info, picked, &depths,
|
|
prefix_by_rev, rm);
|
|
escapes.clear();
|
|
DEBUG_PRINTF("retrying\n");
|
|
goto retry;
|
|
}
|
|
}
|
|
DEBUG_PRINTF("som planning ok\n");
|
|
|
|
/* if the initial prefix is weak is if sombe approaches are better */
|
|
if (findMinWidth(*prefix) <= depth(2)) {
|
|
DEBUG_PRINTF("weak prefix... seeing if sombe can help out\n");
|
|
NGHolder g2;
|
|
cloneHolder(g2, g_pristine);
|
|
if (trySombe(ng, g2, som)) {
|
|
return SOMBE_HANDLED_ALL;
|
|
}
|
|
}
|
|
|
|
/* From this point we know that we are going to succeed or die horribly with
|
|
* a pattern too large. Anything done past this point can be considered
|
|
* committed to the compile. */
|
|
|
|
regions = assignRegions(g); // Update as g may have changed.
|
|
|
|
DEBUG_PRINTF("-- get slot for initial plan\n");
|
|
u32 som_loc;
|
|
if (plan[0].is_reset) {
|
|
som_loc = ssm.getInitialResetSomSlot(*prefix, g, regions,
|
|
picked->first, &plan[0].no_implement);
|
|
} else {
|
|
som_loc = ssm.getSomSlot(*prefix, escapes, false,
|
|
SomSlotManager::NO_PARENT);
|
|
}
|
|
|
|
replaceTempSomSlot(rm, *prefix, som_loc);
|
|
|
|
if (plan.front().is_reset) {
|
|
updatePrefixReports(rm, *prefix, INTERNAL_SOM_LOC_SET);
|
|
}
|
|
if (prefix_by_rev && !plan.front().no_implement) {
|
|
u32 rev_comp_id = doSomRevNfaPrefix(ng, expr, *prefix, cc);
|
|
updatePrefixReportsRevNFA(rm, *prefix, rev_comp_id);
|
|
}
|
|
|
|
implementSomPlan(ng, expr, comp_id, g, plan, som_loc);
|
|
|
|
DEBUG_PRINTF("success\n");
|
|
return SOMBE_HANDLED_INTERNAL;
|
|
}
|
|
|
|
sombe_rv doSomWithHaig(NG &ng, NGHolder &g, const ExpressionInfo &expr,
|
|
u32 comp_id, som_type som) {
|
|
assert(som);
|
|
|
|
DEBUG_PRINTF("som+haig hello\n");
|
|
|
|
// A pristine copy of the input graph, which must be restored to in paths
|
|
// that return false. Also used as the forward graph for som rev nfa
|
|
// construction.
|
|
NGHolder g_pristine;
|
|
cloneHolder(g_pristine, g);
|
|
|
|
if (trySombe(ng, g, som)) {
|
|
return SOMBE_HANDLED_ALL;
|
|
}
|
|
|
|
if (!ng.cc.grey.allowHaigLit || !ng.cc.grey.allowSomChain) {
|
|
return SOMBE_FAIL;
|
|
}
|
|
|
|
// know that we have an absolute SOM of zero all the time.
|
|
assert(edge(g.startDs, g.startDs, g).second);
|
|
|
|
vector<DepthMinMax> depths = getDistancesFromSOM(g);
|
|
|
|
// try a redundancy pass.
|
|
if (addSomRedundancy(g, depths)) {
|
|
depths = getDistancesFromSOM(g);
|
|
}
|
|
|
|
auto regions = assignRegions(g);
|
|
|
|
dumpHolder(g, regions, 21, "som_explode", ng.cc.grey);
|
|
|
|
map<u32, region_info> info;
|
|
buildRegionMapping(g, regions, info, true);
|
|
|
|
sombe_rv rv =
|
|
doHaigLitSom(ng, g, expr, comp_id, som, regions, info, info.begin());
|
|
if (rv == SOMBE_FAIL) {
|
|
clear_graph(g);
|
|
cloneHolder(g, g_pristine);
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
} // namespace ue2
|