From c32d7d51d9586f439efbc7e70b7ee1c8a4c0a87b Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Thu, 5 Jan 2017 12:35:32 +1100 Subject: [PATCH] remove ng_rose --- CMakeLists.txt | 2 - src/grey.cpp | 7 - src/grey.h | 2 - src/nfagraph/ng.cpp | 24 - src/nfagraph/ng_rose.cpp | 2911 ----------------------------------- src/nfagraph/ng_rose.h | 68 - src/nfagraph/ng_violet.cpp | 1 - src/rose/rose_build_add.cpp | 1 - 8 files changed, 3016 deletions(-) delete mode 100644 src/nfagraph/ng_rose.cpp delete mode 100644 src/nfagraph/ng_rose.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8fe82a70..f5d29642 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -820,8 +820,6 @@ SET (hs_SRCS src/nfagraph/ng_restructuring.h src/nfagraph/ng_revacc.cpp src/nfagraph/ng_revacc.h - src/nfagraph/ng_rose.cpp - src/nfagraph/ng_rose.h src/nfagraph/ng_sep.cpp src/nfagraph/ng_sep.h src/nfagraph/ng_small_literal_set.cpp diff --git a/src/grey.cpp b/src/grey.cpp index f0374b6d..8881666e 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -54,7 +54,6 @@ Grey::Grey(void) : allowMcSheng(true), allowPuff(true), allowLiteral(true), - allowRose(true), allowViolet(true), allowExtendedNFA(true), /* bounded repeats of course */ allowLimExNFA(true), @@ -114,7 +113,6 @@ Grey::Grey(void) : roseMcClellanSuffix(1), roseMcClellanOutfix(2), roseTransformDelay(true), - roseDesiredSplit(4), earlyMcClellanPrefix(true), earlyMcClellanInfix(true), earlyMcClellanSuffix(true), @@ -219,7 +217,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(allowMcSheng); G_UPDATE(allowPuff); G_UPDATE(allowLiteral); - G_UPDATE(allowRose); G_UPDATE(allowViolet); G_UPDATE(allowExtendedNFA); G_UPDATE(allowLimExNFA); @@ -279,7 +276,6 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(roseMcClellanSuffix); G_UPDATE(roseMcClellanOutfix); G_UPDATE(roseTransformDelay); - G_UPDATE(roseDesiredSplit); G_UPDATE(earlyMcClellanPrefix); G_UPDATE(earlyMcClellanInfix); G_UPDATE(earlyMcClellanSuffix); @@ -336,7 +332,6 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowMcClellan = false; g->allowPuff = false; g->allowLiteral = false; - g->allowRose = false; g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; @@ -354,7 +349,6 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowMcClellan = true; g->allowPuff = false; g->allowLiteral = false; - g->allowRose = false; g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; @@ -372,7 +366,6 @@ void applyGreyOverrides(Grey *g, const string &s) { g->allowMcClellan = true; g->allowPuff = false; g->allowLiteral = false; - g->allowRose = false; g->allowViolet = false; g->allowSmallLiteralSet = false; g->roseMasks = false; diff --git a/src/grey.h b/src/grey.h index 7a6a168b..17d82527 100644 --- a/src/grey.h +++ b/src/grey.h @@ -54,7 +54,6 @@ struct Grey { bool allowMcSheng; bool allowPuff; bool allowLiteral; - bool allowRose; bool allowViolet; bool allowExtendedNFA; bool allowLimExNFA; @@ -128,7 +127,6 @@ struct Grey { * always */ u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */ bool roseTransformDelay; - u32 roseDesiredSplit; bool earlyMcClellanPrefix; bool earlyMcClellanInfix; diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index a4f86fee..e1f29318 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -52,7 +52,6 @@ #include "ng_region.h" #include "ng_region_redundancy.h" #include "ng_reports.h" -#include "ng_rose.h" #include "ng_sep.h" #include "ng_small_literal_set.h" #include "ng_som.h" @@ -255,10 +254,6 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (splitOffRose(*ng.rose, g, w.prefilter, ng.rm, cc)) { - return true; - } - if (splitOffPuffs(*ng.rose, ng.rm, g, w.prefilter, cc)) { return true; } @@ -276,25 +271,6 @@ bool addComponent(NG &ng, NGHolder &g, const NGWrapper &w, const som_type som, return true; } - if (splitOffRose(*ng.rose, g, w.prefilter, ng.rm, cc)) { - return true; - } - - // A final pass at cyclic redundancy and Rose - // TODO: investigate - coverage results suggest that this never succeeds? - if (cc.grey.performGraphSimplification) { - if (removeCyclicPathRedundancy(g) || - removeCyclicDominated(g, som)) { - if (handleFixedWidth(*ng.rose, g, cc.grey)) { - return true; - } - } - } - - if (finalChanceRose(*ng.rose, g, w.prefilter, ng.rm, cc)) { - return true; - } - DEBUG_PRINTF("testing for outfix\n"); assert(allMatchStatesHaveReports(g)); if (ng.rose->addOutfix(g)) { diff --git a/src/nfagraph/ng_rose.cpp b/src/nfagraph/ng_rose.cpp deleted file mode 100644 index d24c3392..00000000 --- a/src/nfagraph/ng_rose.cpp +++ /dev/null @@ -1,2911 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose construction from NGHolder. - */ - -// #define DEBUG -// #define DEBUG_ROSE -#include "ng_rose.h" - -#include "grey.h" -#include "ng_depth.h" -#include "ng_dominators.h" -#include "ng_equivalence.h" -#include "ng_holder.h" -#include "ng_is_equal.h" -#include "ng_literal_analysis.h" -#include "ng_netflow.h" -#include "ng_prune.h" -#include "ng_redundancy.h" -#include "ng_region.h" -#include "ng_reports.h" -#include "ng_split.h" -#include "ng_util.h" -#include "ng_violet.h" -#include "ng_width.h" -#include "rose/rose_build.h" -#include "rose/rose_build_util.h" -#include "rose/rose_in_dump.h" -#include "rose/rose_in_graph.h" -#include "rose/rose_in_util.h" -#include "util/compare.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/graph.h" -#include "util/graph_range.h" -#include "util/make_unique.h" -#include "util/order_check.h" -#include "util/ue2string.h" -#include "util/ue2_containers.h" - -#include -#include -#include -#include - -#define NDEBUG_PRINTF(x, ...) \ - do { if (0) { DEBUG_PRINTF(x, ## __VA_ARGS__); } } while (0) - -using namespace std; - -namespace ue2 { - -/** - * Maps vertices in the original graph to vertices on edge graphs. Each edge - * graph should contain at most one copy of the vertex. Multiple images for a - * vertex arise after we split on multiple literals - in this cases all edges - * should share a common graph. - * - * If, when an edge is split, a vertex ends up in both the LHS and RHS then only - * the LHS is tracked. This is because in general we want to simplify the LHS - * and allow complexity to be pushed further back. - */ -typedef ue2::unordered_map > > - vdest_map_t; - -typedef ue2::unordered_map > vsrc_map_t; - -/** - * \brief Maximum width of the character class usable as an escape class. - */ -static const u32 MAX_ESCAPE_CHARS = 20; - -static -u32 maxDelay(const CompileContext &cc) { - if (!cc.streaming) { - return MO_INVALID_IDX; - } - return cc.grey.maxHistoryAvailable; -} - -static -bool createsAnchoredLHS(const NGHolder &g, const vector &vv, - const vector &depths, - const Grey &grey, depth max_depth = depth::infinity()) { - max_depth = min(max_depth, depth(grey.maxAnchoredRegion)); - - for (auto v : vv) { - /* avoid issues of self loops blowing out depths: - * look at preds, add 1 */ - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - u32 idx = g[u].index; - assert(idx < depths.size()); - if (maxDistFromStartOfData(depths.at(idx)) >= max_depth) { - return false; - } - } - } - return true; -} - -static -bool createsTransientLHS(const NGHolder &g, const vector &vv, - const vector &depths, - const Grey &grey) { - const depth max_depth(grey.maxHistoryAvailable); - - for (auto v : vv) { - /* avoid issues of self loops blowing out depths: - * look at preds, add 1 */ - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == v) { - continue; - } - - u32 idx = g[u].index; - assert(idx < depths.size()); - if (maxDistFromInit(depths.at(idx)) >= max_depth) { - return false; - } - } - } - return true; -} - -static -bool isLHSUsablyAnchored(const NGHolder &g, - const vector &depths, - const Grey &grey) { - assert(in_degree(g.acceptEod, g) == 1); - - vector accepts; - insert(&accepts, accepts.end(), inv_adjacent_vertices(g.accept, g)); - - bool rv = createsAnchoredLHS(g, accepts, depths, grey); - DEBUG_PRINTF("lhs is %susably anchored\n", rv ? "" : "not "); - return rv; -} - -static -bool isLHSTransient(const NGHolder &g, - const vector &depths, - const Grey &grey) { - assert(in_degree(g.acceptEod, g) == 1); - - vector accepts; - insert(&accepts, accepts.end(), inv_adjacent_vertices(g.accept, g)); - - bool rv = createsTransientLHS(g, accepts, depths, grey); - DEBUG_PRINTF("lhs is %stransient\n", rv ? "" : "not "); - return rv; -} - -namespace { - -/** - * Information on a cut: vertices and literals. - */ -struct VertLitInfo { - VertLitInfo(NFAVertex v, const set &litlit) - : vv(vector(1, v)), lit(litlit) {} - VertLitInfo(const vector &vvvv, const set &litlit) - : vv(vvvv), lit(litlit) {} - vector vv; - set lit; -}; - -/** - * A factory for candidate simple cuts (literals/vertices). - */ -class LitCollection : boost::noncopyable { - vector> lits; /**< sorted list of potential cuts */ - const NGHolder &g; /**< graph on which cuts are found */ - const vector &depths; /**< depth information for g */ - const ue2::unordered_map ®ion_map; /**< region map for g */ - - /** Set of vertices to avoid selecting as end vertices for cuts as previous - * cuts overlap them. This is solely to prevent us picking literal sets - * which do not add significant value. */ - ue2::unordered_set poisoned; - - /** Back-edges in g. */ - ue2::unordered_map > back_edges; - - const Grey &grey; - bool seeking_transient; - bool seeking_anchored; - - void poisonLHS(const VertLitInfo &picked); - void poisonLitVerts(const VertLitInfo &picked); - void poisonCandidates(const VertLitInfo &picked); - - friend class LitComparator; - -public: - LitCollection(const NGHolder &g_in, const vector &depths_in, - const ue2::unordered_map ®ion_map_in, - const set &ap, const set &ap_raw, - u32 min_len, bool desperation, const CompileContext &cc, - bool override_literal_quality_check = false); - - /**< Returns the next candidate cut. Cut still needs to be inspected for - * complete envelopment. */ - unique_ptr pickNext(void); -}; - -/** - * \brief Comparator class for sorting LitCollection::lits. - * - * This is separated out from LitCollection itself as passing LitCollection to - * std::sort() would incur a (potentially expensive) copy. - */ -class LitComparator { -public: - explicit LitComparator(const LitCollection &lc_in) : lc(lc_in) {} - bool operator()(const unique_ptr &a, - const unique_ptr &b) const { - assert(a && b); - - if (lc.seeking_anchored) { - bool a_anchored = - createsAnchoredLHS(lc.g, a->vv, lc.depths, lc.grey); - bool b_anchored = - createsAnchoredLHS(lc.g, b->vv, lc.depths, lc.grey); - - if (a_anchored != b_anchored) { - return a_anchored < b_anchored; - } - } - - if (lc.seeking_transient) { - bool a_transient = - createsTransientLHS(lc.g, a->vv, lc.depths, lc.grey); - bool b_transient = - createsTransientLHS(lc.g, b->vv, lc.depths, lc.grey); - - if (a_transient != b_transient) { - return a_transient < b_transient; - } - } - - u64a score_a = scoreSet(a->lit); - u64a score_b = scoreSet(b->lit); - - if (score_a != score_b) { - return score_a > score_b; - } - - /* vertices should only be in one candidate cut */ - assert(a->vv == b->vv || a->vv.front() != b->vv.front()); - return lc.g[a->vv.front()].index > - lc.g[b->vv.front()].index; - } - -private: - const LitCollection &lc; -}; - -static -size_t shorter_than(const set &s, size_t limit) { - size_t count = 0; - - for (const auto &lit : s) { - if (lit.length() < limit) { - count++; - } - } - - return count; -} - -static -u32 min_len(const set &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)lit.length()); - } - - return rv; -} - -static -u32 max_len(const set &s) { - u32 rv = 0; - - for (const auto &lit : s) { - rv = max(rv, (u32)lit.length()); - } - - return rv; -} - -static -u32 min_period(const set &s) { - u32 rv = ~0U; - - for (const auto &lit : s) { - rv = min(rv, (u32)minStringPeriod(lit)); - } - DEBUG_PRINTF("min period %u\n", rv); - return rv; -} - -static -bool validateRoseLiteralSetQuality(const set &s, u64a score, - u32 min_allowed_len, bool desperation, - bool override_literal_quality_check) { - if (!override_literal_quality_check && score >= NO_LITERAL_AT_EDGE_SCORE) { - DEBUG_PRINTF("candidate is too bad %llu/%zu\n", score, s.size()); - return false; - } - - assert(!s.empty()); - if (s.empty()) { - DEBUG_PRINTF("candidate is too bad/something went wrong\n"); - return false; - } - - u32 s_min_len = min_len(s); - u32 s_min_period = min_period(s); - size_t short_count = shorter_than(s, 5); - - DEBUG_PRINTF("cand '%s': score %llu count=%zu min_len=%u min_period=%u" - " short_count=%zu desp=%d\n", - dumpString(*s.begin()).c_str(), score, s.size(), s_min_len, - s_min_period, short_count, (int)desperation); - - bool ok = true; - - if (s.size() > 10 /* magic number is magic */ - || s_min_len < min_allowed_len - || (s_min_period <= 1 && !override_literal_quality_check - && min_allowed_len != 1)) { - ok = false; - } - - if (!ok && desperation - && s.size() <= 20 /* more magic numbers are magical */ - && (s_min_len > 5 || (s_min_len > 2 && short_count <= 10)) - && s_min_period > 1) { - DEBUG_PRINTF("candidate is ok\n"); - ok = true; - } - - if (!ok && desperation - && s.size() <= 50 /* more magic numbers are magical */ - && s_min_len > 10 - && s_min_period > 1) { - DEBUG_PRINTF("candidate is ok\n"); - ok = true; - } - - if (!ok) { - DEBUG_PRINTF("candidate is too bad\n"); - return false; - } - - return true; -} - -static UNUSED -void dumpRoseLiteralSet(const set &s) { - for (UNUSED const auto &lit : s) { - DEBUG_PRINTF(" lit: %s\n", dumpString(lit).c_str()); - } -} - -static -void getSimpleRoseLiterals(const NGHolder &g, const set &a_dom, - vector> *lits, - u32 min_allowed_len, bool desperation, - bool override_literal_quality_check) { - map scores; - map> lit_info; - set s; - - for (auto v : a_dom) { - s = getLiteralSet(g, v, true); /* RHS will take responsibility for any - revisits to the target vertex */ - - if (s.empty()) { - DEBUG_PRINTF("candidate is too bad\n"); - continue; - } - - DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - u64a score = compressAndScore(s); - - if (!validateRoseLiteralSetQuality(s, score, min_allowed_len, - desperation, - override_literal_quality_check)) { - continue; - } - - DEBUG_PRINTF("candidate is a candidate\n"); - scores[v] = score; - lit_info.insert(make_pair(v, ue2::make_unique(v, s))); - } - - /* try to filter out cases where appending some characters produces worse - * literals. Only bother to look back one byte, TODO make better */ - for (auto u : a_dom) { - if (out_degree(u, g) != 1 || !scores[u]) { - continue; - } - NFAVertex v = *adjacent_vertices(u, g).first; - if (contains(scores, v) && scores[v] >= scores[u]) { - DEBUG_PRINTF("killing off v as score %llu >= %llu\n", - scores[v], scores[u]); - lit_info.erase(v); - } - } - - lits->reserve(lit_info.size()); - for (auto &m : lit_info) { - lits->push_back(move(m.second)); - } - DEBUG_PRINTF("%zu candidate literal sets\n", lits->size()); -} - -static -void getRegionRoseLiterals(const NGHolder &g, - const ue2::unordered_map ®ion_map, - const set &a_dom_raw, - vector> *lits, - u32 min_allowed_len, bool desperation, - bool override_literal_quality_check) { - /* This allows us to get more places to chop the graph as we are not limited - to points where there is a single vertex to split. */ - - /* TODO: operate over 'proto-regions' which ignore back edges */ - - set mand, optional; - map > exits; - - for (auto v : vertices_range(g)) { - assert(contains(region_map, v)); - const u32 region = region_map.at(v); - - if (is_any_start(v, g) || region == 0) { - continue; - } - - if (is_any_accept(v, g)) { - continue; - } - - if (isRegionExit(g, v, region_map)) { - exits[region].push_back(v); - } - - if (isRegionEntry(g, v, region_map)) { - // Determine whether this region is mandatory or optional. We only - // need to do this check for the first entry vertex we encounter - // for this region. - if (!contains(mand, region) && !contains(optional, region)) { - if (isOptionalRegion(g, v, region_map)) { - optional.insert(region); - } else { - mand.insert(region); - } - } - } - } - - for (const auto &m : exits) { - if (0) { - next_cand: - continue; - } - - const u32 region = m.first; - const vector &vv = m.second; - assert(!vv.empty()); - - if (!contains(mand, region)) { - continue; - } - - for (auto v : vv) { - /* if an exit is in a_dom_raw, the region is already handled well - * by getSimpleRoseLiterals */ - if (contains(a_dom_raw, v)) { - goto next_cand; - } - } - - /* the final region may not have a neat exit. validate that all exits - * have an edge to each accept or none do */ - bool edge_to_a = edge(vv[0], g.accept, g).second; - bool edge_to_aeod = edge(vv[0], g.acceptEod, g).second; - const auto &reports = g[vv[0]].reports; - for (auto v : vv) { - if (edge_to_a != edge(v, g.accept, g).second) { - goto next_cand; - } - - if (edge_to_aeod != edge(v, g.acceptEod, g).second) { - goto next_cand; - } - - if (g[v].reports != reports) { - goto next_cand; - } - } - - DEBUG_PRINTF("inspecting region %u\n", region); - set s; - for (auto v : vv) { - DEBUG_PRINTF(" exit vertex: %zu\n", g[v].index); - /* Note: RHS can not be depended on to take all subsequent revisits - * to this vertex */ - set ss = getLiteralSet(g, v, false); - if (ss.empty()) { - DEBUG_PRINTF("candidate is too bad\n"); - goto next_cand; - } - insert(&s, ss); - } - - assert(!s.empty()); - - DEBUG_PRINTF("|candidate raw literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - u64a score = compressAndScore(s); - DEBUG_PRINTF("|candidate literal set| = %zu\n", s.size()); - dumpRoseLiteralSet(s); - - if (!validateRoseLiteralSetQuality(s, score, min_allowed_len, - desperation, - override_literal_quality_check)) { - continue; - } - - DEBUG_PRINTF("candidate is a candidate\n"); - lits->push_back(ue2::make_unique(vv, s)); - } -} - -static -void gatherBackEdges(const NGHolder &g, - ue2::unordered_map> *out) { - set backEdges; - BackEdges> be(backEdges); - depth_first_search(g, visitor(be).root_vertex(g.start)); - - for (const auto &e : backEdges) { - (*out)[source(e, g)].push_back(target(e, g)); - } -} - -LitCollection::LitCollection(const NGHolder &g_in, - const vector &depths_in, - const ue2::unordered_map ®ion_map_in, - const set &a_dom, - const set &a_dom_raw, u32 min_len, - bool desperation, const CompileContext &cc, - bool override_literal_quality_check) - : g(g_in), depths(depths_in), region_map(region_map_in), grey(cc.grey), - seeking_transient(cc.streaming), seeking_anchored(true) { - getSimpleRoseLiterals(g, a_dom, &lits, min_len, desperation, - override_literal_quality_check); - getRegionRoseLiterals(g, region_map, a_dom_raw, &lits, min_len, desperation, - override_literal_quality_check); - DEBUG_PRINTF("lit coll is looking for a%d t%d\n", (int)seeking_anchored, - (int)seeking_transient); - DEBUG_PRINTF("we have %zu candidate literal splits\n", lits.size()); - sort(lits.begin(), lits.end(), LitComparator(*this)); - gatherBackEdges(g, &back_edges); -} - -void LitCollection::poisonLHS(const VertLitInfo &picked) { - DEBUG_PRINTF("found anchored %d transient %d\n", - (int)createsAnchoredLHS(g, picked.vv, depths, grey), - (int)createsTransientLHS(g, picked.vv, depths, grey)); - set curr; - set next; - - insert(&curr, picked.vv); - - while (!curr.empty()) { - insert(&poisoned, curr); - next.clear(); - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!is_special(u, g) && !contains(poisoned, u)) { - next.insert(u); - } - } - } - - curr.swap(next); - } - - seeking_transient = false; - seeking_anchored = false; - - /* reprioritise cuts now that the LHS is taken care off */ - sort(lits.begin(), lits.end(), LitComparator(*this)); -} - -static -void flood_back(const NGHolder &g, u32 len, const set &initial, - set *visited) { - vector curr; - vector next; - - insert(&curr, curr.end(), initial); - - insert(visited, initial); - - /* bfs: flood back len vertices */ - for (u32 i = 1; i < len; i++) { - next.clear(); - DEBUG_PRINTF("poison %u/%u: curr %zu\n", i, len, curr.size()); - - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (!contains(*visited, u)) { - next.push_back(u); - visited->insert(u); - } - } - } - - next.swap(curr); - } -} - -/** - * Add vertices near a picked literal to the poison set unless it looks - * like they may still add value (ie they are on they other side of cycle). - */ -void LitCollection::poisonLitVerts(const VertLitInfo &picked) { - DEBUG_PRINTF("poisoning vertices associated with picked literals\n"); - - u32 len = max_len(picked.lit); - - /* poison vertices behind */ - - set starters; - insert(&starters, picked.vv); - - set visited; - - flood_back(g, len, starters, &visited); - - DEBUG_PRINTF("flood %zu vertices\n", visited.size()); - - /* inspect any back edges which are in the flooded subgraph; look for any - * destination vertices which are not starters */ - set anti; - for (auto u : visited) { - if (!contains(back_edges, u) || contains(starters, u)) { - continue; - } - - for (auto v : back_edges[u]) { - if (contains(visited, v) && !contains(starters, v)) { - anti.insert(v); - } - } - } - DEBUG_PRINTF("%zu cycle ends\n", visited.size()); - - /* remove any vertices which lie on the other side of a cycle from the - * visited set */ - set anti_pred; - flood_back(g, len - 1, anti, &anti_pred); - - DEBUG_PRINTF("flood visited %zu vertices; anti %zu\n", visited.size(), - anti_pred.size()); - - erase_all(&visited, anti_pred); - - DEBUG_PRINTF("filtered flood visited %zu vertices\n", visited.size()); - - insert(&poisoned, visited); - - insert(&poisoned, starters); /* complicated back loops can result in start - vertices being removed from the visited - set */ - - for (UNUSED auto v : picked.vv) { - assert(contains(poisoned, v)); - } - - /* TODO: poison vertices in front of us? */ -} - -void LitCollection::poisonCandidates(const VertLitInfo &picked) { - assert(!picked.lit.empty()); - if (picked.lit.empty()) { - return; - } - - if ((seeking_anchored && createsAnchoredLHS(g, picked.vv, depths, grey)) - || (seeking_transient && createsTransientLHS(g, picked.vv, depths, grey))) { - /* We don't want to pick anything to the LHS of picked.v any more as we - * have something good. We also don't want to provide any bonus for - * remaining literals based on anchoredness/transientness of the lhs. - */ - poisonLHS(picked); - } else { - poisonLitVerts(picked); - } -} - -unique_ptr LitCollection::pickNext() { - while (!lits.empty()) { - if (0) { - next_lit: - continue; - } - - for (auto v : lits.back()->vv) { - if (contains(poisoned, v)) { - DEBUG_PRINTF("skipping '%s' as overlapped\n", - dumpString(*(lits.back()->lit.begin())).c_str()); - lits.pop_back(); - goto next_lit; - } - } - - unique_ptr rv = move(lits.back()); - lits.pop_back(); - poisonCandidates(*rv); - DEBUG_PRINTF("best is '%s' %zu a%d t%d\n", - dumpString(*(rv->lit.begin())).c_str(), - g[rv->vv.front()].index, - (int)createsAnchoredLHS(g, rv->vv, depths, grey), - (int)createsTransientLHS(g, rv->vv, depths, grey)); - - return rv; - } - - return nullptr; -} - -} - -static -bool can_match(const NGHolder &g, const ue2_literal &lit, bool overhang_ok) { - set curr, next; - curr.insert(g.accept); - - for (auto it = lit.rbegin(); it != lit.rend(); ++it) { - next.clear(); - - for (auto v : curr) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start) { - if (overhang_ok) { - DEBUG_PRINTF("bail\n"); - return true; - } else { - continue; /* it is not possible for a lhs literal to - * overhang the start */ - } - } - - const CharReach &cr = g[u].char_reach; - if (!overlaps(*it, cr)) { - DEBUG_PRINTF("skip\n"); - continue; - } - - next.insert(u); - } - } - - curr.swap(next); - } - - return !curr.empty(); -} - -static -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay, const vector &preds) { - assert(delay <= lit.length()); - assert(isCorrectlyTopped(g)); - DEBUG_PRINTF("adding on '%s' %u\n", dumpString(lit).c_str(), delay); - - NFAVertex prev = g.accept; - auto it = lit.rbegin(); - while (delay--) { - NFAVertex curr = add_vertex(g); - assert(it != lit.rend()); - g[curr].char_reach = *it; - add_edge(curr, prev, g); - ++it; - prev = curr; - } - - for (auto v : preds) { - NFAEdge e = add_edge(v, prev, g); - if (v == g.start && is_triggered(g)) { - g[e].tops.insert(DEFAULT_TOP); - } - } - - // Every predecessor of accept must have a report. - for (auto u : inv_adjacent_vertices_range(g.accept, g)) { - g[u].reports.insert(0); - } - - renumber_vertices(g); - renumber_edges(g); - assert(allMatchStatesHaveReports(g)); - assert(isCorrectlyTopped(g)); -} - -static -void restoreTrailingLiteralStates(NGHolder &g, const ue2_literal &lit, - u32 delay) { - vector preds; - insert(&preds, preds.end(), inv_adjacent_vertices(g.accept, g)); - clear_in_edges(g.accept, g); - - for (auto v : preds) { - g[v].reports.clear(); /* clear report from old accepts */ - } - - restoreTrailingLiteralStates(g, lit, delay, preds); -} - -/* return false if we should get rid of the edge altogether */ -static -bool removeLiteralFromLHS(RoseInGraph &ig, const RoseInEdge &lhs, - const CompileContext &cc) { - unique_ptr h = cloneHolder(*ig[lhs].graph); - NGHolder &g = *h; - assert(ig[target(lhs, ig)].type == RIV_LITERAL); - const ue2_literal &lit = ig[target(lhs, ig)].s; - - /* lhs should be connected to a start */ - assert(ig[source(lhs, ig)].type == RIV_START - || ig[source(lhs, ig)].type == RIV_ANCHORED_START); - - if (in_degree(g.acceptEod, g) != 1 /* edge from accept */) { - assert(0); - return true; - } - if (lit.empty()) { - assert(0); - return true; - } - - const u32 max_delay = maxDelay(cc); - - // In streaming mode, we must limit the depth to the available history - // UNLESS the given literal follows start or startDs and has nothing - // before it that we will need to account for. In that case, we can - // lean on FDR's support for long literals. - if (literalIsWholeGraph(g, lit)) { - assert(!ig[lhs].haig); - assert(ig[lhs].minBound == 0); - assert(ig[lhs].maxBound == ROSE_BOUND_INF); - DEBUG_PRINTF("literal is the whole graph\n"); - - u32 delay = removeTrailingLiteralStates(g, lit, MO_INVALID_IDX, false); - assert(delay == lit.length()); - ig[lhs].graph = move(h); - ig[lhs].graph_lag = delay; - return true; - } - - if (!can_match(g, lit, false)) { - /* This is can happen if the literal arises from a large cyclic - to/beyond the pivot. As the LHS graph only cares about the first - reach of the pivot, this literal is junk */ - DEBUG_PRINTF("bogus edge\n"); - return false; - } - - u32 delay = removeTrailingLiteralStates(g, lit, max_delay, - false /* can't overhang start */); - - if (delay == MO_INVALID_IDX) { - /* This is can happen if the literal arises from a large cyclic - to/beyond the pivot. As the LHS graph only cares about the first - reach of the pivot, this literal is junk */ - DEBUG_PRINTF("bogus edge\n"); - return false; - } - - if (!delay) { - return true; - } - - DEBUG_PRINTF("setting delay %u on lhs %p\n", delay, h.get()); - - ig[lhs].graph = move(h); - ig[lhs].graph_lag = delay; - return true; -} - -static -void handleLhsCliche(RoseInGraph &ig, const RoseInEdge &lhs) { - const NGHolder &h = *ig[lhs].graph; - - size_t s_od = out_degree(h.start, h); - size_t sds_od = out_degree(h.startDs, h); - - assert(in_degree(h.acceptEod, h) == 1 /* edge from accept */); - /* need to check if simple floating start */ - if (edge(h.startDs, h.accept, h).second && sds_od == 2 - && ((s_od == 2 && edge(h.start, h.accept, h).second) || s_od == 1)) { - /* no need for graph */ - ig[lhs].graph.reset(); - ig[lhs].graph_lag = 0; - DEBUG_PRINTF("lhs is floating start\n"); - return; - } - - /* need to check if a simple anchor */ - /* start would have edges to sds and accept in this case */ - if (edge(h.start, h.accept, h).second && s_od == 2 && sds_od == 1) { - if (ig[source(lhs, ig)].type == RIV_ANCHORED_START) { - // assert(ig[lhs].graph_lag == ig[target(lhs, ig)].s.length()); - if (ig[lhs].graph_lag != ig[target(lhs, ig)].s.length()) { - DEBUG_PRINTF("oddness\n"); - return; - } - ig[lhs].graph.reset(); - ig[lhs].graph_lag = 0; - ig[lhs].maxBound = 0; - DEBUG_PRINTF("lhs is anchored start\n"); - } else { - DEBUG_PRINTF("lhs rewiring start\n"); - assert(ig[source(lhs, ig)].type == RIV_START); - RoseInVertex t = target(lhs, ig); - remove_edge(lhs, ig); - RoseInVertex s2 - = add_vertex(RoseInVertexProps::makeStart(true), ig); - add_edge(s2, t, RoseInEdgeProps(0U, 0U), ig); - } - return; - } -} - -static -void filterCandPivots(const NGHolder &g, const set &cand_raw, - set *out) { - for (auto u : cand_raw) { - const CharReach &u_cr = g[u].char_reach; - if (u_cr.count() > 40) { - continue; /* too wide to be plausible */ - } - - if (u_cr.count() > 2) { - /* include u as a candidate as successor may have backed away from - * expanding through it */ - out->insert(u); - continue; - } - - NFAVertex v = getSoleDestVertex(g, u); - if (v && in_degree(v, g) == 1 && out_degree(u, g) == 1) { - const CharReach &v_cr = g[v].char_reach; - if (v_cr.count() == 1 || v_cr.isCaselessChar()) { - continue; /* v will always generate better literals */ - } - } - - out->insert(u); - } -} - -/* cand_raw is the candidate set before filtering points which are clearly - * a bad idea. */ -static -void getCandidatePivots(const NGHolder &g, set *cand, - set *cand_raw) { - ue2::unordered_map dominators = - findDominators(g); - - set accepts; - - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (is_special(v, g)) { - continue; - } - accepts.insert(v); - } - for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) { - if (is_special(v, g)) { - continue; - } - accepts.insert(v); - } - - assert(!accepts.empty()); - - vector dom_trace; - auto ait = accepts.begin(); - assert(ait != accepts.end()); - NFAVertex curr = *ait; - while (curr && !is_special(curr, g)) { - dom_trace.push_back(curr); - curr = dominators[curr]; - } - reverse(dom_trace.begin(), dom_trace.end()); - for (++ait; ait != accepts.end(); ++ait) { - curr = *ait; - vector dom_trace2; - while (curr && !is_special(curr, g)) { - dom_trace2.push_back(curr); - curr = dominators[curr]; - } - reverse(dom_trace2.begin(), dom_trace2.end()); - auto dti = dom_trace.begin(), dtie = dom_trace.end(); - auto dtj = dom_trace2.begin(), dtje = dom_trace2.end(); - while (dti != dtie && dtj != dtje && *dti == *dtj) { - ++dti; - ++dtj; - } - dom_trace.erase(dti, dtie); - } - - cand_raw->insert(dom_trace.begin(), dom_trace.end()); - - filterCandPivots(g, *cand_raw, cand); -} - -static -void deanchorIfNeeded(NGHolder &g, bool *orig_anch) { - DEBUG_PRINTF("hi\n"); - if (proper_out_degree(g.startDs, g)) { - return; - } - - /* look for a non-special dot with a loop following start */ - set succ_g; - insert(&succ_g, adjacent_vertices(g.start, g)); - succ_g.erase(g.startDs); - - for (auto v : adjacent_vertices_range(g.start, g)) { - DEBUG_PRINTF("inspecting cand %zu || =%zu\n", g[v].index, - g[v].char_reach.size()); - - if (v == g.startDs || !g[v].char_reach.all()) { - continue; - } - - set succ_v; - insert(&succ_v, adjacent_vertices(v, g)); - - if (succ_v == succ_g) { - DEBUG_PRINTF("found ^.*\n"); - *orig_anch = true; - for (auto succ : succ_g) { - add_edge(g.startDs, succ, g); - } - clear_vertex(v, g); - remove_vertex(v, g); - renumber_vertices(g); - return; - } - - if (succ_g.size() == 1 && hasSelfLoop(v, g)) { - DEBUG_PRINTF("found ^.+\n"); - *orig_anch = true; - add_edge(g.startDs, v, g); - remove_edge(v, v, g); - return; - } - } -} - -static -unique_ptr makeTrivialGraph(const NGHolder &h, - vdest_map_t &v_dest_map, - vsrc_map_t &v_src_map) { - shared_ptr root_g = cloneHolder(h); - bool orig_anch = isAnchored(*root_g); - deanchorIfNeeded(*root_g, &orig_anch); - - DEBUG_PRINTF("orig_anch %d\n", (int)orig_anch); - - unique_ptr igp = ue2::make_unique(); - RoseInVertex start = - add_vertex(RoseInVertexProps::makeStart(orig_anch), *igp); - RoseInVertex accept = - add_vertex(RoseInVertexProps::makeAccept(set()), *igp); - - RoseInEdge e = - add_edge(start, accept, RoseInEdgeProps(root_g, 0), *igp).first; - - for (auto v : vertices_range(*root_g)) { - v_dest_map[v].emplace_back(e, v); - v_src_map[e].push_back(v); - } - - return igp; -} - -static never_inline -void updateVDestMap(const vector > &images, - const ue2::unordered_map &lhs_map, - const vector &l_e, - const ue2::unordered_map &rhs_map, - const vector &r_e, - vdest_map_t &v_dest_map, vsrc_map_t &v_src_map) { - RoseInEdge e = images.front().first; - set edge_set; - for (const auto &image : images) { - edge_set.insert(image.first); - } - const vector &domain = v_src_map[e]; - vector > temp; - - for (auto v : domain) { - vdest_map_t::iterator it = v_dest_map.find(v); - assert(it != v_dest_map.end()); - - temp.clear(); - - for (const auto &dest : it->second) { - const RoseInEdge &old_e = dest.first; - const NFAVertex old_dest = dest.second; - if (old_e != e) { - if (!contains(edge_set, old_e)) { - temp.emplace_back(old_e, old_dest); - } - } else if (contains(lhs_map, old_dest)) { - for (const auto &e2 : l_e) { - temp.emplace_back(e2, lhs_map.at(old_dest)); - } - /* only allow v to be tracked on one side of the split */ - } else if (contains(rhs_map, old_dest)) { - for (const auto &e2 : r_e) { - temp.emplace_back(e2, rhs_map.at(old_dest)); - } - } - } - NDEBUG_PRINTF("%zu images for vertex; prev %zu\n", temp.size(), - it->second.size()); - it->second.swap(temp); - } -} - -/** Returns the collection of vertices from the original graph which end up - * having an image in the [lr]hs side of the graph split. */ -static never_inline -void fillDomain(const vdest_map_t &v_dest_map, const vsrc_map_t &v_src_map, - RoseInEdge e, - const ue2::unordered_map &split_map, - vector *out) { - const vector &presplit_domain = v_src_map.at(e); - for (auto v : presplit_domain) { - /* v is in the original graph, need to find its image on e's graph */ - typedef vector > dests_t; - const dests_t &dests = v_dest_map.at(v); - for (const auto &dest : dests) { - if (dest.first == e) { - NFAVertex vv = dest.second; - /* vv is v image on e's graph */ - if (contains(split_map, vv)) { - out->push_back(v); - } - } - } - } -} - -static -void getSourceVerts(RoseInGraph &ig, - const vector > &images, - vector *out) { - set seen; - for (const auto &image : images) { - RoseInVertex s = source(image.first, ig); - if (contains(seen, s)) { - continue; - } - seen.insert(s); - out->push_back(s); - } -} - -static -void getDestVerts(RoseInGraph &ig, - const vector > &images, - vector *out) { - set seen; - for (const auto &image : images) { - RoseInVertex t = target(image.first, ig); - if (contains(seen, t)) { - continue; - } - seen.insert(t); - out->push_back(t); - } -} - -static -void getSourceVerts(RoseInGraph &ig, const vector &edges, - vector *out) { - set seen; - for (const auto &e : edges) { - RoseInVertex s = source(e, ig); - if (contains(seen, s)) { - continue; - } - seen.insert(s); - out->push_back(s); - } -} - -static -void getDestVerts(RoseInGraph &ig, const vector &edges, - vector *out) { - set seen; - for (const auto &e : edges) { - RoseInVertex t = target(e, ig); - if (contains(seen, t)) { - continue; - } - seen.insert(t); - out->push_back(t); - } -} - -static -bool splitRoseEdge(RoseInGraph &ig, const VertLitInfo &split, - vdest_map_t &v_dest_map, vsrc_map_t &v_src_map) { - const vector &root_splitters = split.vv; /* vertices in the - 'root' graph */ - assert(!root_splitters.empty()); - - /* need copy as split rose edge will update orig map */ - vector > images - = v_dest_map[root_splitters[0]]; - DEBUG_PRINTF("splitting %zu rose edge with %zu literals\n", - images.size(), split.lit.size()); - - /* note: as we haven't removed literals yet the graphs on all edges that we - * are going to split should be identical */ - const auto &base_graph = ig[images.front().first].graph; - - vector splitters; /* vertices in the graph being split */ - for (auto v : root_splitters) { - if (!contains(v_dest_map, v)) { - DEBUG_PRINTF("vertex to split on is no longer in the graph\n"); - return false; - } - - /* sanity check: verify all edges have the same underlying graph */ - for (UNUSED const auto &m : v_dest_map[v]) { - assert(base_graph == ig[m.first].graph); - } - assert(v_dest_map[v].size() == images.size()); - - splitters.push_back(v_dest_map[v].front().second); - } - - /* note: the set of split edges should form a complete bipartite graph */ - vector src_verts; - vector dest_verts; - getSourceVerts(ig, images, &src_verts); - getDestVerts(ig, images, &dest_verts); - assert(images.size() == src_verts.size() * dest_verts.size()); - - shared_ptr lhs = make_shared(); - shared_ptr rhs = make_shared(); - - ue2::unordered_map lhs_map; - ue2::unordered_map rhs_map; - - assert(base_graph); - splitGraph(*base_graph, splitters, lhs.get(), &lhs_map, - rhs.get(), &rhs_map); - - RoseInEdge first_e = images.front().first; - - /* all will be suffix or none */ - bool suffix = ig[target(first_e, ig)].type == RIV_ACCEPT; - - set splitter_reports; - for (auto v : splitters) { - insert(&splitter_reports, (*base_graph)[v].reports); - } - - bool do_accept = false; - bool do_accept_eod = false; - assert(rhs); - if (isVacuous(*rhs) && suffix) { - if (edge(rhs->start, rhs->accept, *rhs).second) { - DEBUG_PRINTF("rhs has a cliche\n"); - do_accept = true; - remove_edge(rhs->start, rhs->accept, *rhs); - } - - if (edge(rhs->start, rhs->acceptEod, *rhs).second) { - DEBUG_PRINTF("rhs has an eod cliche\n"); - do_accept_eod = true; - remove_edge(rhs->start, rhs->acceptEod, *rhs); - } - } - - bool do_norm = out_degree(rhs->start, *rhs) != 1; /* check if we still have - a graph left over */ - vector lhs_domain; - vector rhs_domain; - fillDomain(v_dest_map, v_src_map, first_e, lhs_map, &lhs_domain); - fillDomain(v_dest_map, v_src_map, first_e, rhs_map, &rhs_domain); - - vector l_e; - vector r_e; - for (const auto &lit : split.lit) { - DEBUG_PRINTF("best is '%s'\n", escapeString(lit).c_str()); - RoseInVertex v - = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - - /* work out delay later */ - if (do_accept) { - DEBUG_PRINTF("rhs has a cliche\n"); - RoseInVertex tt = add_vertex(RoseInVertexProps::makeAccept( - splitter_reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - if (do_accept_eod) { - DEBUG_PRINTF("rhs has an eod cliche\n"); - RoseInVertex tt = add_vertex(RoseInVertexProps::makeAcceptEod( - splitter_reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - for (auto src_v : src_verts) { - l_e.push_back(add_edge(src_v, v, - RoseInEdgeProps(lhs, 0U), ig).first); - v_src_map[l_e.back()] = lhs_domain; - } - - if (do_norm) { - for (auto dst_v : dest_verts) { - /* work out delay later */ - assert(out_degree(rhs->start, *rhs) > 1); - r_e.push_back( - add_edge(v, dst_v, RoseInEdgeProps(rhs, 0U), ig).first); - v_src_map[r_e.back()] = rhs_domain; - } - } - } - - updateVDestMap(images, lhs_map, l_e, rhs_map, r_e, v_dest_map, v_src_map); - - for (const auto &image : images) { - /* remove old edge */ - remove_edge(image.first, ig); - v_src_map.erase(image.first); - } - - return true; -} - -static -bool isStarCliche(const NGHolder &g) { - DEBUG_PRINTF("checking graph with %zu vertices\n", num_vertices(g)); - - bool nonspecials_seen = false; - - for (auto v : vertices_range(g)) { - if (is_special(v, g)) { - continue; - } - - if (nonspecials_seen) { - return false; - } - nonspecials_seen = true; - - if (!g[v].char_reach.all()) { - return false; - } - - if (!hasSelfLoop(v, g)) { - return false; - } - if (!edge(v, g.accept, g).second) { - return false; - } - } - - if (!nonspecials_seen) { - return false; - } - - if (!edge(g.start, g.accept, g).second) { - return false; - } - - return true; -} - -static -void processInfixes(RoseInGraph &ig, const CompileContext &cc) { - /* we want to ensure that every prefix/infix graph is unique at this stage - * as we have not done any analysis to check if they are safe to share */ - - vector dead; - - for (const auto &e : edges_range(ig)) { - if (!ig[e].graph) { - continue; - } - - RoseInVertex u = source(e, ig), v = target(e, ig); - - // Infixes are edges between two literals. - if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) { - continue; - } - - if (ig[e].graph_lag) { - continue; /* already looked at */ - } - - DEBUG_PRINTF("looking at infix %p\n", ig[e].graph.get()); - - const ue2_literal &lit1 = ig[u].s; - const ue2_literal &lit2 = ig[v].s; - size_t overlap = maxOverlap(lit1, lit2, 0); - - const NGHolder &h = *ig[e].graph; - - DEBUG_PRINTF("infix rose between literals '%s' and '%s', overlap %zu," - "size %zu\n", - dumpString(lit1).c_str(), dumpString(lit2).c_str(), - overlap, num_vertices(h)); - - if (!can_match(h, lit2, true)) { - DEBUG_PRINTF("found bogus edge\n"); - dead.push_back(e); - continue; - } - - unique_ptr h_new = cloneHolder(h); - - u32 delay = removeTrailingLiteralStates(*h_new, lit2, MO_INVALID_IDX); - if (delay == MO_INVALID_IDX) { - DEBUG_PRINTF("found bogus edge\n"); - dead.push_back(e); - continue; - } - - // Delay can be set to at most lit2.length() - overlap, but we must - // truncate to history available in streaming mode. - u32 max_allowed_delay = lit2.length() - overlap; - LIMIT_TO_AT_MOST(&max_allowed_delay, delay); - - if (cc.streaming) { - LIMIT_TO_AT_MOST(&max_allowed_delay, cc.grey.maxHistoryAvailable); - } - - if (delay != max_allowed_delay) { - restoreTrailingLiteralStates(*h_new, lit2, delay); - delay = removeTrailingLiteralStates(*h_new, lit2, max_allowed_delay); - } - - if (isStarCliche(*h_new)) { - DEBUG_PRINTF("is a X star!\n"); - ig[e].graph.reset(); - ig[e].graph_lag = 0; - } else { - ig[e].graph = move(h_new); - ig[e].graph_lag = delay; - DEBUG_PRINTF("delay increased to %u\n", delay); - } - } - - for (const auto &e : dead) { - remove_edge(e, ig); - } -} - -static -void poisonNetflowScores(RoseInGraph &ig, RoseInEdge lhs, - vector *scores) { - assert(ig[lhs].graph); - NGHolder &h = *ig[lhs].graph; - - if (ig[target(lhs, ig)].type != RIV_LITERAL) { - /* nothing to poison in outfixes */ - assert(ig[target(lhs, ig)].type == RIV_ACCEPT); - return; - } - - set curr, next; - insert(&curr, inv_adjacent_vertices(h.accept, h)); - set poisoned; - u32 len = ig[target(lhs, ig)].s.length(); - assert(len); - while (len) { - next.clear(); - for (auto v : curr) { - insert(&poisoned, in_edges(v, h)); - insert(&next, inv_adjacent_vertices(v, h)); - } - - curr.swap(next); - len--; - } - - for (const auto &e : poisoned) { - (*scores)[h[e].index] = NO_LITERAL_AT_EDGE_SCORE; - } -} - -#define MAX_NETFLOW_CUT_WIDTH 40 /* magic number is magic */ -#define MAX_LEN_2_LITERALS_PER_CUT 3 - -static -bool checkValidNetflowLits(NGHolder &h, const vector &scores, - const map> &cut_lits, - const Grey &grey) { - DEBUG_PRINTF("cut width %zu\n", cut_lits.size()); - if (cut_lits.size() > MAX_NETFLOW_CUT_WIDTH) { - return false; - } - - u32 len_2_count = 0; - - for (const auto &cut : cut_lits) { - if (scores[h[cut.first].index] >= NO_LITERAL_AT_EDGE_SCORE) { - DEBUG_PRINTF("cut uses a forbidden edge\n"); - return false; - } - - if (min_len(cut.second) < grey.minRoseNetflowLiteralLength) { - DEBUG_PRINTF("cut uses a bad literal\n"); - return false; - } - - for (const auto &lit : cut.second) { - if (lit.length() == 2) { - len_2_count++; - } - } - } - - if (len_2_count > MAX_LEN_2_LITERALS_PER_CUT) { - return false; - } - - return true; -} - -static -void splitEdgesByCut(RoseInGraph &ig, const vector &to_cut, - const vector &cut, - const map > &cut_lits) { - assert(!to_cut.empty()); - assert(ig[to_cut.front()].graph); - NGHolder &h = *ig[to_cut.front()].graph; - - /* note: the set of split edges should form a complete bipartite graph */ - vector src_verts; - vector dest_verts; - getSourceVerts(ig, to_cut, &src_verts); - getDestVerts(ig, to_cut, &dest_verts); - assert(to_cut.size() == src_verts.size() * dest_verts.size()); - - map, shared_ptr > done_rhs; - - /* iterate over cut for determinism */ - for (const auto &e : cut) { - NFAVertex prev_v = source(e, h); - NFAVertex pivot = target(e, h); - - vector adj; - insert(&adj, adj.end(), adjacent_vertices(pivot, h)); - /* we can ignore presence of accept, accepteod in adj as it is best - effort */ - - if (!contains(done_rhs, adj)) { - ue2::unordered_map temp_map; - shared_ptr new_rhs = make_shared(); - splitRHS(h, adj, new_rhs.get(), &temp_map); - remove_edge(new_rhs->start, new_rhs->accept, *new_rhs); - remove_edge(new_rhs->start, new_rhs->acceptEod, *new_rhs); - done_rhs.insert(make_pair(adj, new_rhs)); - /* TODO need to update v_mapping (if we were doing more cuts) */ - } - - DEBUG_PRINTF("splitting on pivot %zu\n", h[pivot].index); - ue2::unordered_map temp_map; - shared_ptr new_lhs = make_shared(); - splitLHS(h, pivot, new_lhs.get(), &temp_map); - - /* want to cut of paths to pivot from things other than the pivot - - * makes a more svelte graphy */ - clear_in_edges(temp_map[pivot], *new_lhs); - add_edge(temp_map[prev_v], temp_map[pivot], *new_lhs); - - pruneUseless(*new_lhs); - - const set &lits = cut_lits.at(e); - for (const auto &lit : lits) { - RoseInVertex v - = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - - if (edge(pivot, h.accept, h).second) { - /* literal has a direct connection to accept */ - assert(ig[dest_verts.front()].type == RIV_ACCEPT); - const auto &reports = h[pivot].reports; - RoseInVertex tt = - add_vertex(RoseInVertexProps::makeAccept(reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - if (edge(pivot, h.acceptEod, h).second) { - /* literal has a direct connection to accept */ - assert(ig[dest_verts.front()].type == RIV_ACCEPT); - const auto &reports = h[pivot].reports; - RoseInVertex tt = add_vertex( - RoseInVertexProps::makeAcceptEod(reports), ig); - add_edge(v, tt, RoseInEdgeProps(0U, 0U), ig); - } - - assert(done_rhs[adj].get()); - shared_ptr new_rhs = done_rhs[adj]; - if (out_degree(new_rhs->start, *new_rhs) != 1) { - for (auto dst_v : dest_verts) { - add_edge(v, dst_v, RoseInEdgeProps(done_rhs[adj], 0), ig); - } - } - - for (auto src_v : src_verts) { - add_edge(src_v, v, RoseInEdgeProps(new_lhs, 0), ig); - } - } - } - - /* TODO need to update v_mapping (if we were doing more cuts) */ - - for (const auto &e : to_cut) { - assert(ig[e].graph.get() == &h); - remove_edge(e, ig); - } -} - -static -bool doNetflowCut(RoseInGraph &ig, const vector &to_cut, - const Grey &grey) { - DEBUG_PRINTF("doing netflow cut\n"); - /* TODO: we should really get literals/scores from the full graph as this - * allows us to overlap the graph. Doesn't matter at the moment as we - * are working on the LHS. */ - - NGHolder &h = *ig[to_cut.front()].graph; - if (num_edges(h) > grey.maxRoseNetflowEdges) { - /* We have a limit on this because scoring edges and running netflow - * gets very slow for big graphs. */ - DEBUG_PRINTF("too many edges, skipping netflow cut\n"); - return false; - } - - renumber_vertices(h); - renumber_edges(h); - /* Step 1: Get scores for all edges */ - vector scores = scoreEdges(h); /* scores by edge_index */ - /* Step 2: poison scores for edges covered by successor literal */ - for (const auto &e : to_cut) { - assert(&h == ig[e].graph.get()); - poisonNetflowScores(ig, e, &scores); - } - /* Step 3: Find cutset based on scores */ - vector cut = findMinCut(h, scores); - - /* Step 4: Get literals corresponding to cut edges */ - map> cut_lits; - for (const auto &e : cut) { - set lits = getLiteralSet(h, e); - compressAndScore(lits); - cut_lits[e] = lits; - } - - /* if literals are underlength bail or if it involves a forbidden edge*/ - if (!checkValidNetflowLits(h, scores, cut_lits, grey)) { - return false; - } - DEBUG_PRINTF("splitting\n"); - - /* Step 5: Split graph based on cuts */ - splitEdgesByCut(ig, to_cut, cut, cut_lits); - return true; -} - -/** \brief Returns the number of intermediate vertices in the shortest path - * between (from, to). */ -static -u32 min_dist_between(NFAVertex from, NFAVertex to, const NGHolder &g) { - // Check for the trivial case: that way we don't have to set up the - // containers below. - if (edge(from, to, g).second) { - return 0; - } - - ue2::unordered_set visited; - visited.insert(from); - - flat_set curr, next; - curr.insert(from); - - assert(from != to); - - u32 d = 0; - - while (!curr.empty()) { - next.clear(); - for (auto v : curr) { - for (auto w : adjacent_vertices_range(v, g)) { - if (w == to) { - return d; - } - if (visited.insert(w).second) { // first visit to *ai - next.insert(w); - } - } - } - - d++; - curr.swap(next); - } - assert(0); - return ROSE_BOUND_INF; -} - -/** Literals which are completely enveloped by a successor are trouble because - * hamsterwheel acceleration can skip past the start of the literal. */ -static -bool enveloped(const vector &cand_split_v, - const set &cand_lit, const NGHolder &g, - const RoseInVertexProps &succ) { - if (succ.type != RIV_LITERAL) { - return false; - } - - /* TODO: handle multiple v more precisely: not all candidate v can start all - * candidate literals */ - - for (auto v : cand_split_v) { - u32 rhs_min_len = min_dist_between(v, g.accept, g); - if (rhs_min_len + min_len(cand_lit) >= succ.s.length()) { - return false; - } - } - - return true; /* we are in trouble */ -} - -static -bool enveloped(const VertLitInfo &cand_split, const RoseInGraph &ig, - const vdest_map_t &v_dest_map) { - for (auto v : cand_split.vv) { - const auto &images = v_dest_map.at(v); - for (const auto &image : images) { - /* check that we aren't enveloped by the successor */ - if (enveloped(vector(1, image.second), cand_split.lit, - *ig[image.first].graph, - ig[target(image.first, ig)])) { - return true; - } - - const RoseInVertexProps &pred = ig[source(image.first, ig)]; - if (pred.type != RIV_LITERAL) { - continue; - } - - /* check we don't envelop the pred */ - const NGHolder &g = *ig[image.first].graph; - u32 lhs_min_len = min_dist_between(g.start, image.second, g); - if (lhs_min_len + pred.s.length() < max_len(cand_split.lit)) { - return true; - } - } - } - - return false; -} - -static -bool attemptSplit(RoseInGraph &ig, vdest_map_t &v_dest_map, - vsrc_map_t &v_src_map, const vector &v_e, - LitCollection &lits) { - NGHolder &h = *ig[v_e.front()].graph; - unique_ptr split = lits.pickNext(); - - while (split) { - for (const auto &e : v_e) { - RoseInVertex t = target(e, ig); - if (enveloped(split->vv, split->lit, h, ig[t])) { - DEBUG_PRINTF("enveloped\n"); - split = lits.pickNext(); - goto next_split; - } - } - break; - next_split:; - } - - if (!split) { - return false; - } - - for (auto v : split->vv) { - if (edge(v, h.accept, h).second) { - return false; - } - } - - DEBUG_PRINTF("saved by a bad literal\n"); - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - return true; -} - -static -void appendLiteral(const ue2_literal &s, const CharReach &cr, - vector *out) { - for (size_t c = cr.find_first(); c != CharReach::npos; - c = cr.find_next(c)) { - bool nocase = ourisalpha(c) && cr.test(mytoupper(c)) - && cr.test(mytolower(c)); - - if (nocase && (char)c == mytolower(c)) { - continue; /* uppercase already handled us */ - } - - out->push_back(s); - out->back().push_back(c, nocase); - } -} - -static -bool findAnchoredLiterals(const NGHolder &g, vector *out, - vector *pivots_out) { - - DEBUG_PRINTF("trying for anchored\n"); -#define MAX_ANCHORED_LITERALS 30 -#define MAX_ANCHORED_LITERAL_LEN 30 - - /* TODO: this could be beefed up by going region-by-region but currently - * that brings back bad memories of ng_rose. OR any AA region we can build - * a dfa out of */ - assert(!proper_out_degree(g.startDs, g)); - - vector lits; - lits.push_back(ue2_literal()); - - set curr; - insert(&curr, adjacent_vertices(g.start, g)); - curr.erase(g.startDs); - - set old; - - if (contains(curr, g.accept) || curr.empty()) { - DEBUG_PRINTF("surprise accept/voidness\n"); - return false; - } - - while (!curr.empty()) { - set next_verts; - insert(&next_verts, adjacent_vertices(*curr.begin(), g)); - bool can_extend - = !next_verts.empty() && !contains(next_verts, g.accept); - CharReach cr; - - for (auto v : curr) { - assert(!is_special(v, g)); - - if (can_extend) { - /* next verts must agree */ - set next_verts_local; - insert(&next_verts_local, adjacent_vertices(v, g)); - can_extend = next_verts_local == next_verts; - } - - cr |= g[v].char_reach; - } - - if (!can_extend) { - goto bail; - } - - /* extend literals */ - assert(cr.any()); - vector next_lits; - for (const auto &lit : lits) { - appendLiteral(lit, cr, &next_lits); - if (next_lits.size() > MAX_ANCHORED_LITERALS) { - goto bail; - } - } - - assert(!next_lits.empty()); - old.swap(curr); - - if (next_lits[0].length() <= MAX_ANCHORED_LITERAL_LEN) { - curr.swap(next_verts); - } else { - curr.clear(); - } - - lits.swap(next_lits); - } - bail: - assert(!lits.empty()); - for (UNUSED const auto &lit : lits) { - DEBUG_PRINTF("found anchored string: %s\n", dumpString(lit).c_str()); - } - - insert(pivots_out, pivots_out->end(), old); - out->swap(lits); - return !out->empty() && !out->begin()->empty(); -} - -static -bool tryForAnchoredImprovement(RoseInGraph &ig, RoseInEdge e) { - vector lits; - vector pivots; - - if (!findAnchoredLiterals(*ig[e].graph, &lits, &pivots)) { - DEBUG_PRINTF("unable to find literals\n"); - return false; - } - DEBUG_PRINTF("found %zu literals to act as anchors\n", lits.size()); - - RoseInVertex s = source(e, ig); - RoseInVertex t = target(e, ig); - - assert(!ig[e].graph_lag); - - shared_ptr lhs = make_shared(); - shared_ptr rhs = make_shared(); - ue2::unordered_map temp1; - ue2::unordered_map temp2; - - splitGraph(*ig[e].graph, pivots, lhs.get(), &temp1, rhs.get(), &temp2); - - for (const auto &lit : lits) { - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), - ig); - add_edge(s, v, RoseInEdgeProps(lhs, 0U), ig); - add_edge(v, t, RoseInEdgeProps(rhs, 0U), ig); - } - remove_edge(e, ig); - - return true; -} - -#define MAX_SINGLE_BYTE_ANCHORED_DIST 30 - -/* returns true if we should make another pass */ -static -bool lastChanceImproveLHS(RoseInGraph &ig, RoseInEdge lhs, - const CompileContext &cc) { - DEBUG_PRINTF("argh lhs is nasty\n"); - assert(ig[lhs].graph); - - /* customise the lhs for this literal */ - /* TODO better, don't recalc */ - if (ig[target(lhs, ig)].type == RIV_LITERAL) { - const NGHolder &h = *ig[lhs].graph; - - /* sanitise literal on lhs */ - const ue2_literal &s = ig[target(lhs, ig)].s; - - if (!can_match(h, s, false)) { - DEBUG_PRINTF("found bogus edge\n"); - return false; - } - - /* see if we can build some anchored literals out of this */ - if (isAnchored(h) && tryForAnchoredImprovement(ig, lhs)) { - return true; - } - - unique_ptr cust = cloneHolder(h); - u32 d = removeTrailingLiteralStates(*cust, s, MO_INVALID_IDX); - if (d == MO_INVALID_IDX) { - DEBUG_PRINTF("found bogus edge\n"); - return false; - } - restoreTrailingLiteralStates(*cust, s, d); - ig[lhs].graph = move(cust); - } - - NGHolder &lhs_graph = *ig[lhs].graph; - set cand; - set cand_raw; - getCandidatePivots(lhs_graph, &cand, &cand_raw); - vdest_map_t v_dest_map; - vsrc_map_t v_src_map; - for (auto v : vertices_range(lhs_graph)) { - v_dest_map[v].emplace_back(lhs, v); - v_src_map[lhs].push_back(v); - } - - vector depths; - calcDepths(lhs_graph, depths); - - /* need to ensure regions are valid before we do lit discovery */ - auto region_map = assignRegions(lhs_graph); - - vector to_cut(1, lhs); - DEBUG_PRINTF("see if we can get a better lhs by another cut\n"); - LitCollection lit1(lhs_graph, depths, region_map, cand, cand_raw, - cc.grey.minRoseLiteralLength, true, cc); - if (attemptSplit(ig, v_dest_map, v_src_map, to_cut, lit1)) { - return true; - } - - if (doNetflowCut(ig, to_cut, cc.grey)) { - return true; - } - - DEBUG_PRINTF("eek last chance try len 1 if it creates an anchored lhs\n"); - { - LitCollection lits(lhs_graph, depths, region_map, cand, cand_raw, 1, - true, cc, true); - unique_ptr split = lits.pickNext(); - - /* TODO fix edge to accept check */ - while (split - && (enveloped(split->vv, split->lit, lhs_graph, - ig[target(lhs, ig)]) - || edge(split->vv.front(), lhs_graph.accept, lhs_graph).second - || !createsAnchoredLHS(lhs_graph, split->vv, depths, cc.grey, - MAX_SINGLE_BYTE_ANCHORED_DIST))) { - split = lits.pickNext(); - } - - if (split) { - DEBUG_PRINTF("saved by a really bad literal\n"); - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - return true; - } - } - - return false; -} - -/* returns false if nothing happened */ -static -bool lastChanceImproveLHS(RoseInGraph &ig, const vector &to_cut, - const CompileContext &cc) { - DEBUG_PRINTF("argh lhses are nasty\n"); - - NGHolder &lhs_graph = *ig[to_cut.front()].graph; - set cand; - set cand_raw; - getCandidatePivots(lhs_graph, &cand, &cand_raw); - vdest_map_t v_dest_map; - vsrc_map_t v_src_map; - for (auto v : vertices_range(lhs_graph)) { - for (const auto &e : to_cut) { - v_dest_map[v].emplace_back(e, v); - v_src_map[e].push_back(v); - } - } - - vector depths; - calcDepths(lhs_graph, depths); - - auto region_map = assignRegions(lhs_graph); - - DEBUG_PRINTF("see if we can get a better lhs by allowing another cut\n"); - LitCollection lit1(lhs_graph, depths, region_map, cand, cand_raw, - cc.grey.minRoseLiteralLength, true, cc); - if (attemptSplit(ig, v_dest_map, v_src_map, to_cut, lit1)) { - return true; - } - - return doNetflowCut(ig, to_cut, cc.grey); -} - -static -bool improveLHS(RoseInGraph &ig, const vector &edges, - const CompileContext &cc) { - bool rv = false; - - vector src_verts; - getSourceVerts(ig, edges, &src_verts); - - map> by_src; - for (const auto &e : edges) { - by_src[source(e, ig)].push_back(e); - } - - for (auto v : src_verts) { - const vector &local = by_src[v]; - - vector graphs; - map > by_graph; - for (const auto &e : local) { - NGHolder *gp = ig[e].graph.get(); - if (!contains(by_graph, gp)) { - graphs.push_back(gp); - } - by_graph[gp].push_back(e); - } - - for (auto h : graphs) { - const vector &local2 = by_graph[h]; - if (local2.size() == 1) { - rv |= lastChanceImproveLHS(ig, local2.front(), cc); - continue; - } - - bool lrv = lastChanceImproveLHS(ig, local2, cc); - if (lrv) { - rv = true; - } else { - for (const auto &e2 : local2) { - rv |= lastChanceImproveLHS(ig, e2, cc); - } - } - } - } - - return rv; -} - -static -void processLHS(RoseInGraph &ig, const CompileContext &cc) { - bool redo; - do { - redo = false; - vector to_improve; - for (const auto &lhs : edges_range(ig)) { - if (ig[source(lhs, ig)].type != RIV_START - && ig[source(lhs, ig)].type != RIV_ANCHORED_START) { - continue; - } - - if (ig[target(lhs, ig)].type == RIV_LITERAL) { - DEBUG_PRINTF("checking lhs->'%s'\n", - ig[target(lhs, ig)].s.c_str()); - } else { - DEBUG_PRINTF("checking lhs->?\n"); - } - - - /* if check if lhs is nasty */ - if (ig[target(lhs, ig)].type == RIV_ACCEPT) { - to_improve.push_back(lhs); - continue; - } - - assert(ig[lhs].graph); - const NGHolder *h = ig[lhs].graph.get(); - - vector depths; - calcDepths(*h, depths); - - if (!isLHSTransient(*h, depths, cc.grey) - && !literalIsWholeGraph(*h, ig[target(lhs, ig)].s) - && !isLHSUsablyAnchored(*h, depths, cc.grey)) { - to_improve.push_back(lhs); - } - } - - DEBUG_PRINTF("inspecting %zu lhs\n", to_improve.size()); - if (to_improve.size() > 50) { - DEBUG_PRINTF("too big\n"); - break; - } - - redo = improveLHS(ig, to_improve, cc); - DEBUG_PRINTF("redo = %d\n", (int)redo); - } while (redo); - - vector to_inspect; /* to prevent surprises caused by us - * altering the graph while iterating */ - for (const auto &e : edges_range(ig)) { - if (ig[source(e, ig)].type == RIV_START - || ig[source(e, ig)].type == RIV_ANCHORED_START) { - to_inspect.push_back(e); - } - } - - for (const auto &lhs : to_inspect) { - if (ig[target(lhs, ig)].type == RIV_LITERAL) { - if (removeLiteralFromLHS(ig, lhs, cc)) { - handleLhsCliche(ig, lhs); - } else { - /* telling us to delete the edge */ - remove_edge(lhs, ig); - } - } - } -} - -static -void tryNetflowCutForRHS(RoseInGraph &ig, const Grey &grey) { - vector to_improve; - for (const auto &rhs : edges_range(ig)) { - if (ig[target(rhs, ig)].type != RIV_ACCEPT) { - continue; - } - - if (ig[source(rhs, ig)].type == RIV_LITERAL) { - DEBUG_PRINTF("checking '%s'->rhs\n", ig[source(rhs, ig)].s.c_str()); - } else { - DEBUG_PRINTF("checking ?->rhs\n"); - } - - if (!ig[rhs].graph) { - continue; - } - - DEBUG_PRINTF("%zu vertices\n", num_vertices(*ig[rhs].graph)); - if (num_vertices(*ig[rhs].graph) < 512) { - DEBUG_PRINTF("small\n"); - continue; - } - - /* if check if rhs is nasty */ - to_improve.push_back(rhs); - } - - DEBUG_PRINTF("inspecting %zu lhs\n", to_improve.size()); - if (to_improve.size() > 50) { - DEBUG_PRINTF("too big\n"); - return; - } - - for (const auto &e : to_improve) { - vector to_cut(1, e); - doNetflowCut(ig, to_cut, grey); - } -} - -/* just make the string nocase and get the graph to handle case mask, TODO. - * This could be more nuanced but the effort would probably be better spent - * just making rose less bad. */ -static -void makeNocaseWithPrefixMask(RoseInGraph &g, RoseInVertex v) { - for (const auto &e : in_edges_range(v, g)) { - const RoseInVertex u = source(e, g); - - if (!g[e].graph) { - g[e].graph = make_shared(whatRoseIsThis(g, e)); - g[e].graph_lag = g[v].s.length(); - NGHolder &h = *g[e].graph; - - assert(!g[e].maxBound || g[e].maxBound == ROSE_BOUND_INF); - - if (g[u].type == RIV_START) { - add_edge(h.startDs, h.accept, h); - h[h.startDs].reports.insert(0); - } else if (g[e].maxBound == ROSE_BOUND_INF) { - add_edge(h.start, h.accept, h); - NFAVertex ds = add_vertex(h); - - h[ds].char_reach = CharReach::dot(); - - NFAEdge e_start_to_ds = add_edge(h.start, ds, h); - add_edge(ds, ds, h); - add_edge(ds, h.accept, h); - h[h.start].reports.insert(0); - h[ds].reports.insert(0); - - if (g[u].type == RIV_LITERAL) { - h[e_start_to_ds].tops.insert(DEFAULT_TOP); - } - } else { - assert(g[u].type == RIV_ANCHORED_START); - add_edge(h.start, h.accept, h); - h[h.start].reports.insert(0); - } - } - - if (!g[e].graph_lag) { - continue; - } - unique_ptr newg = cloneHolder(*g[e].graph); - restoreTrailingLiteralStates(*newg, g[v].s, g[e].graph_lag); - g[e].graph_lag = 0; - g[e].graph = move(newg); - } - - make_nocase(&g[v].s); -} - -static -unique_ptr makeGraphCopy(const NGHolder *g) { - if (g) { - return cloneHolder(*g); - } else { - return nullptr; - } -} - -static -void explodeLiteral(RoseInGraph &g, RoseInVertex v, - vector &exploded) { - for (const auto &lit : exploded) { - RoseInVertex v_new = add_vertex(g[v], g); - g[v_new].s = lit; - - for (const auto &e : in_edges_range(v, g)) { - RoseInEdge e2 = add_edge(source(e, g), v_new, g[e], g); - // FIXME: are we safe to share graphs here? For now, make our very - // own copy. - g[e2].graph = makeGraphCopy(g[e].graph.get()); - } - - for (const auto &e : out_edges_range(v, g)) { - RoseInEdge e2 = add_edge(v_new, target(e, g), g[e], g); - // FIXME: are we safe to share graphs here? For now, make our very - // own copy. - g[e2].graph = makeGraphCopy(g[e].graph.get()); - } - } - - clear_vertex(v, g); - remove_vertex(v, g); -} - -/* Sadly rose is hacky in terms of mixed case literals. TODO: remove when rose - * becomes less bad */ -static -void handleLongMixedSensitivityLiterals(RoseInGraph &g) { - const size_t maxExploded = 8; // only case-explode this far - - vector verts; - - for (auto v : vertices_range(g)) { - if (g[v].type != RIV_LITERAL) { - continue; - } - - ue2_literal &s = g[v].s; - - if (!mixed_sensitivity(s)) { - continue; - } - - if (s.length() < MAX_MASK2_WIDTH) { - DEBUG_PRINTF("mixed lit will be handled by benefits mask\n"); - continue; - } - - DEBUG_PRINTF("found mixed lit of len %zu\n", s.length()); - verts.push_back(v); - } - - for (auto v : verts) { - vector exploded; - case_iter cit = caseIterateBegin(g[v].s), cite = caseIterateEnd(); - for (; cit != cite; ++cit) { - exploded.emplace_back(*cit, false); - if (exploded.size() > maxExploded) { - goto dont_explode; - } - } - - DEBUG_PRINTF("exploding literal into %zu pieces\n", exploded.size()); - explodeLiteral(g, v, exploded); - continue; - - dont_explode: - DEBUG_PRINTF("converting to nocase with prefix mask\n"); - makeNocaseWithPrefixMask(g, v); - } - - DEBUG_PRINTF("done!\n"); -} - -static -void dedupe(RoseInGraph &g) { - /* We know that every prefix/infix is unique after the rose construction. - * - * If a vertex has out-going graphs with the same rewind and they are equal - * we can dedupe the graph. - * - * After this, we may share graphs on out-edges of a vertex. */ - map, vector>> buckets; - - for (auto v : vertices_range(g)) { - buckets.clear(); - - for (const auto &e : out_edges_range(v, g)) { - if (!g[e].graph || g[target(e, g)].type != RIV_LITERAL) { - continue; - } - auto k = make_pair(g[e].graph_lag, hash_holder(*g[e].graph)); - auto &bucket = buckets[k]; - for (const auto &h : bucket) { - if (is_equal(*g[e].graph, 0U, *h, 0U)) { - g[e].graph = h; - goto next_edge; - } - } - - bucket.push_back(g[e].graph); - next_edge:; - } - } -} - -static -bool pureReport(NFAVertex v, const NGHolder &g) { - for (auto w : adjacent_vertices_range(v, g)) { - if (w != g.accept && w != g.acceptEod) { - return false; - } - } - return true; -} - -static -bool pureReport(const vector &vv, const NGHolder &g) { - for (auto v : vv) { - if (!pureReport(v, g)) { - return false; - } - } - - return true; -} - -/* ensures that a vertex is followed by a start construct AND the cyclic states - * has a reasonably wide reach */ -static -bool followedByStar(NFAVertex v, const NGHolder &g) { - set succ; - insert(&succ, adjacent_vertices(v, g)); - - set asucc; - - for (auto w : adjacent_vertices_range(v, g)) { - if (g[w].char_reach.count() < N_CHARS - MAX_ESCAPE_CHARS) { - continue; /* state is too narrow to be considered as a sane star - cyclic */ - } - - asucc.clear(); - insert(&asucc, adjacent_vertices(w, g)); - - if (asucc == succ) { - return true; - } - } - return false; -} - -static -bool followedByStar(const vector &vv, const NGHolder &g) { - for (auto v : vv) { - if (!followedByStar(v, g)) { - return false; - } - } - - return true; -} - -static -bool isEodPrefixCandidate(const NGHolder &g) { - if (in_degree(g.accept, g)) { - DEBUG_PRINTF("graph isn't eod anchored\n"); - return false; - } - - // TODO: handle more than one report. - if (all_reports(g).size() != 1) { - return false; - } - - return true; -} - - -static -bool isEodWithPrefix(const RoseInGraph &g) { - if (num_vertices(g) != 2) { - return false; - } - - for (const auto &e : edges_range(g)) { - RoseInVertex u = source(e, g), v = target(e, g); - DEBUG_PRINTF("edge from %d -> %d\n", g[u].type, g[v].type); - - if (g[u].type != RIV_START && g[u].type != RIV_ANCHORED_START) { - DEBUG_PRINTF("source not start, type=%d\n", g[u].type); - return false; - } - - if (g[v].type != RIV_ACCEPT && g[v].type != RIV_ACCEPT_EOD) { - DEBUG_PRINTF("target not accept, type=%d\n", g[v].type); - return false; - } - - // Haigs not handled. - if (g[e].haig) { - DEBUG_PRINTF("edge has haig\n"); - return false; - } - - if (!g[e].graph) { - DEBUG_PRINTF("no graph on edge\n"); - return false; - } - - if (!isEodPrefixCandidate(*g[e].graph)) { - DEBUG_PRINTF("graph is not eod prefix candidate\n"); - return false; - } - } - - return true; -} - -static -void processEodPrefixes(RoseInGraph &g) { - // Find edges to accept with EOD-anchored graphs that we can move over to - // acceptEod. - vector acc_edges; - for (const auto &e : edges_range(g)) { - if (g[target(e, g)].type != RIV_ACCEPT) { - continue; - } - if (g[e].haig || !g[e].graph) { - continue; - } - if (!isEodPrefixCandidate(*g[e].graph)) { - continue; - } - - // TODO: handle cases with multiple out-edges. - if (out_degree(source(e, g), g) > 1) { - continue; - } - - acc_edges.push_back(e); - } - - set accepts; - - for (const RoseInEdge &e : acc_edges) { - RoseInVertex u = source(e, g), v = target(e, g); - assert(g[e].graph); - assert(g[v].type == RIV_ACCEPT); - assert(all_reports(*g[e].graph).size() == 1); - - // Move this edge from accept to acceptEod and give it the right reports - // from the graph on the edge. - const set reports = all_reports(*g[e].graph); - RoseInVertex w = add_vertex( - RoseInVertexProps::makeAcceptEod(reports), g); - add_edge(u, w, g[e], g); - - remove_edge(e, g); - accepts.insert(v); - } - - for (auto v : accepts) { - if (!in_degree(v, g)) { - remove_vertex(v, g); - } - } -} - -/** Run some reduction passes on the graphs on our edges. */ -static -void reduceGraphs(RoseInGraph &g, const CompileContext &cc) { - for (const auto &e : edges_range(g)) { - if (!g[e].graph) { - continue; - } - NGHolder &h = *g[e].graph; - assert(h.kind == whatRoseIsThis(g, e)); - DEBUG_PRINTF("before, graph %p has %zu vertices, %zu edges\n", &h, - num_vertices(h), num_edges(h)); - - pruneUseless(h); - - reduceGraphEquivalences(h, cc); - - removeRedundancy(h, SOM_NONE); /* rose doesn't track som */ - - DEBUG_PRINTF("after, graph %p has %zu vertices, %zu edges\n", &h, - num_vertices(h), num_edges(h)); - - // It's possible that one of our graphs may have reduced to a dot-star - // cliche, i.e. it contains a startDs->accept edge. If so, we can - // remove it from the edge and just use edge bounds to represent it. - if (edge(h.startDs, h.accept, h).second) { - DEBUG_PRINTF("graph reduces to dot-star, deleting\n"); - g[e].graph.reset(); - g[e].graph_lag = 0; - g[e].minBound = 0; - g[e].maxBound = ROSE_BOUND_INF; - } - } -} - -static -unique_ptr buildRose(const NGHolder &h, bool desperation, - const CompileContext &cc) { - /* Need to pick a pivot point which splits the graph in two with starts on - * one side and accepts on the other. Thus the pivot needs to dominate all - * the accept vertices */ - - /* maps a vertex in h to one of its images in the rose graph */ - vdest_map_t v_dest_map; - vsrc_map_t v_src_map; - - /* create trivial rose graph */ - unique_ptr igp = makeTrivialGraph(h, v_dest_map, v_src_map); - RoseInGraph &ig = *igp; - - /* root graph is the graph on the only edge in our new RoseInGraph */ - assert(num_edges(ig) == 1); - shared_ptr root_g = ig[*edges(ig).first].graph; - assert(root_g); - - /* find the literals */ - set cand; - set cand_raw; - getCandidatePivots(*root_g, &cand, &cand_raw); - - DEBUG_PRINTF("|cand| = %zu\n", cand.size()); - - vector depths; - calcDepths(*root_g, depths); - - auto region_map = assignRegions(*root_g); - - LitCollection lits(*root_g, depths, region_map, cand, cand_raw, - cc.grey.minRoseLiteralLength, desperation, cc); - - for (u32 i = 0; i < cc.grey.roseDesiredSplit; ++i) { - DEBUG_PRINTF("attempting split %u (desired %u)\n", i, - cc.grey.roseDesiredSplit); - unique_ptr split = lits.pickNext(); - - /* need to check we aren't creating any enveloping literals */ - while (split && enveloped(*split, ig, v_dest_map)) { - DEBUG_PRINTF("bad cand; getting next split\n"); - split = lits.pickNext(); - } - - if (!split) { - DEBUG_PRINTF("no more lits :(\n"); - break; - } - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - } - - /* try for more split literals if they are followed by .* or accept */ - for (;;) { - DEBUG_PRINTF("attempting bonus split\n"); - unique_ptr split = lits.pickNext(); - - /* need to check we aren't creating any enveloping literals */ - while (split - && (enveloped(*split, ig, v_dest_map) - || (!pureReport(split->vv, *root_g) - && !followedByStar(split->vv, *root_g)))) { - DEBUG_PRINTF("bad cand; getting next split\n"); - split = lits.pickNext(); - } - - if (!split) { - DEBUG_PRINTF("no more lits :(\n"); - break; - } - DEBUG_PRINTF("got bonus split\n"); - splitRoseEdge(ig, *split, v_dest_map, v_src_map); - } - - processLHS(ig, cc); - - if (num_vertices(ig) <= 2) { - // At present, we don't accept all outfixes. - // However, we do handle the specific case of a rose that precedes an - // acceptEod, which we will support as a prefix to a special EOD event - // "literal". - if (!isEodWithPrefix(ig)) { - igp.reset(); - return igp; - } - } - - processEodPrefixes(ig); - - processInfixes(ig, cc); - - handleLongMixedSensitivityLiterals(ig); - - dedupe(ig); - - pruneUseless(ig); - - reduceGraphs(ig, cc); - - dumpPreRoseGraph(ig, cc.grey); - - renumber_vertices(ig); - calcVertexOffsets(ig); - return igp; -} - -static -void desperationImprove(RoseInGraph &ig, const CompileContext &cc) { - DEBUG_PRINTF("rose said no; can we do better?\n"); - - /* infixes are tricky as we have to worry about delays, enveloping - * literals, etc */ - tryNetflowCutForRHS(ig, cc.grey); - processInfixes(ig, cc); - - handleLongMixedSensitivityLiterals(ig); - dedupe(ig); - pruneUseless(ig); - renumber_vertices(ig); - calcVertexOffsets(ig); -} - -static -bool addRose(RoseBuild &rose, RoseInGraph &ig, bool prefilter, - bool final_chance, const ReportManager &rm, - const CompileContext &cc) { - if (!ensureImplementable(rose, ig, false, final_chance, rm, cc) - && !prefilter) { - return false; - } - return rose.addRose(ig, prefilter); -} - -bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const ReportManager &rm, const CompileContext &cc) { - if (!cc.grey.allowRose) { - return false; - } - - // We should have at least one edge into accept or acceptEod! - assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); - - unique_ptr igp = buildRose(h, false, cc); - if (igp && addRose(rose, *igp, prefilter, false, rm, cc)) { - goto ok; - } - - igp = buildRose(h, true, cc); - - if (igp) { - if (addRose(rose, *igp, prefilter, false, rm, cc)) { - goto ok; - } - - desperationImprove(*igp, cc); - - if (addRose(rose, *igp, prefilter, false, rm, cc)) { - goto ok; - } - } - - DEBUG_PRINTF("rose build failed\n"); - return false; - -ok: - DEBUG_PRINTF("rose build ok\n"); - return true; -} - -bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const ReportManager &rm, const CompileContext &cc) { - DEBUG_PRINTF("final chance rose\n"); - if (!cc.grey.allowRose) { - return false; - } - assert(h.kind == NFA_OUTFIX); - - ue2_literal lit; - bool anch = false; - shared_ptr rhs = make_shared(); - if (!splitOffLeadingLiteral(h, &lit, &*rhs)) { - DEBUG_PRINTF("no floating literal\n"); - anch = true; - if (!splitOffAnchoredLeadingLiteral(h, &lit, &*rhs)) { - DEBUG_PRINTF("no anchored literal\n"); - return false; - } - } - - if (lit.length() < cc.grey.minRoseLiteralLength - || minStringPeriod(lit) < 2 ) { - DEBUG_PRINTF("lit too weak\n"); - return false; - } - - assert(lit.length() <= MAX_MASK2_WIDTH || !mixed_sensitivity(lit)); - - RoseInGraph ig; - RoseInVertex s - = add_vertex(RoseInVertexProps::makeStart(anch), ig); - RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig); - add_edge(s, v, RoseInEdgeProps(0, anch ? 0 : ROSE_BOUND_INF), ig); - - ue2_literal lit2; - if (getTrailingLiteral(h, &lit2) - && lit2.length() >= cc.grey.minRoseLiteralLength - && minStringPeriod(lit2) >= 2) { - - /* TODO: handle delay */ - size_t overlap = maxOverlap(lit, lit2, 0); - u32 delay2 = lit2.length() - overlap; - delay2 = min(delay2, maxDelay(cc)); - delay2 = removeTrailingLiteralStates(*rhs, lit2, delay2); - rhs->kind = NFA_INFIX; - assert(delay2 <= lit2.length()); - - RoseInVertex w - = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig); - add_edge(v, w, RoseInEdgeProps(rhs, delay2), ig); - - NFAVertex reporter = getSoleSourceVertex(h, h.accept); - assert(reporter); - const auto &reports = h[reporter].reports; - RoseInVertex a = - add_vertex(RoseInVertexProps::makeAccept(reports), ig); - add_edge(w, a, RoseInEdgeProps(0U, 0U), ig); - } else { - RoseInVertex a = - add_vertex(RoseInVertexProps::makeAccept(set()), ig); - add_edge(v, a, RoseInEdgeProps(rhs, 0U), ig); - } - - renumber_vertices(ig); - calcVertexOffsets(ig); - - return addRose(rose, ig, prefilter, true /* final chance */, rm, cc); -} - -bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, - const CompileContext &cc) { - if (!cc.grey.allowRose) { - return false; - } - - // We should have at least one edge into accept or acceptEod! - assert(in_degree(h.accept, h) || in_degree(h.acceptEod, h) > 1); - - unique_ptr igp; - - // First pass. - - igp = buildRose(h, false, cc); - if (igp && roseCheckRose(*igp, prefilter, rm, cc)) { - return true; - } - - // Second ("desperation") pass. - - igp = buildRose(h, true, cc); - if (igp) { - if (roseCheckRose(*igp, prefilter, rm, cc)) { - return true; - } - - desperationImprove(*igp, cc); - - if (roseCheckRose(*igp, prefilter, rm, cc)) { - return true; - } - } - - return false; -} - -} // namespace ue2 diff --git a/src/nfagraph/ng_rose.h b/src/nfagraph/ng_rose.h deleted file mode 100644 index 9f69fe0c..00000000 --- a/src/nfagraph/ng_rose.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose construction from NGHolder. - */ - -#ifndef NG_ROSE_H -#define NG_ROSE_H - -#include "ng_holder.h" -#include "ue2common.h" - -#include - -namespace ue2 { - -class NGHolder; -class ReportManager; -class RoseBuild; - -struct CompileContext; -struct ue2_literal; - -/** \brief Attempt to consume the entire pattern in graph \a h with Rose. - * Returns true if successful. */ -bool splitOffRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const ReportManager &rm, const CompileContext &cc); - -/** \brief Attempt to consume the entire pattern in graph \a h with Rose. - * This is the last attempt to handle a pattern before we resort to an outfix. - * Returns true if successful. */ -bool finalChanceRose(RoseBuild &rose, const NGHolder &h, bool prefilter, - const ReportManager &rm, const CompileContext &cc); - -/** \brief True if the pattern in \a h is consumable by Rose. This function - * may be conservative (return false even if supported) for efficiency. */ -bool checkRose(const ReportManager &rm, const NGHolder &h, bool prefilter, - const CompileContext &cc); - -} // namespace ue2 - -#endif // NG_ROSE_H diff --git a/src/nfagraph/ng_violet.cpp b/src/nfagraph/ng_violet.cpp index 28ad9549..b6618194 100644 --- a/src/nfagraph/ng_violet.cpp +++ b/src/nfagraph/ng_violet.cpp @@ -45,7 +45,6 @@ #include "ng_redundancy.h" #include "ng_region.h" #include "ng_reports.h" -#include "ng_rose.h" #include "ng_split.h" #include "ng_util.h" #include "ng_width.h" diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index 68cc67a1..e6861ea4 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -46,7 +46,6 @@ #include "nfagraph/ng_region.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" -#include "nfagraph/ng_rose.h" #include "nfagraph/ng_util.h" #include "nfagraph/ng_width.h" #include "util/charreach.h"