/* * Copyright (c) 2015, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** \file * \brief Edge redundancy graph reductions. */ #include "ng_edge_redundancy.h" #include "ng_holder.h" #include "ng_prune.h" #include "ng_util.h" #include "ue2common.h" #include "parser/position.h" #include "util/compile_context.h" #include "util/container.h" #include "util/graph_range.h" #include "util/ue2_containers.h" #include #include using namespace std; namespace ue2 { /* reverse edge redundancy removal is possible but is not implemented as it * regressed rose pattern support in the regression suite: 19026 - 19027 * (foo.{1,5}b?ar) * * If rose becomes smarter we can reimplement. */ static never_inline bool checkVerticesFwd(const NGHolder &g, const set &sad, const set &happy) { /* need to check if for each vertex in sad if it has an edge to a happy * vertex */ for (auto u : sad) { bool ok = false; for (auto v : adjacent_vertices_range(u, g)) { if (contains(happy, v)) { ok = true; break; } } if (!ok) { return false; } } return true; } static never_inline bool checkVerticesRev(const NGHolder &g, const set &sad, const set &happy) { /* need to check if for each vertex in sad if it has an edge to a happy * vertex */ for (auto v : sad) { bool ok = false; for (auto u : inv_adjacent_vertices_range(v, g)) { if (contains(happy, u)) { ok = true; break; } } if (!ok) { return false; } } return true; } /** \brief Redundant self-loop removal. * * A self loop on a vertex v can be removed if: * * For every vertex u in pred(v) either: * 1: u has a self loop and cr(v) subset of cr(u) * OR * 2: u has an edge to vertex satisfying criterion 1 * * Note: we remove all dead loops at the end of the pass and do not check the * live status of the loops we are depending on during the analysis. * * We don't end up in situations where we remove a group of loops which depend * on each other as: * * - there must be at least one vertex not in the group which is a pred of some * member of the group (as we don't remove loops on specials) * * For each pred vertex of the group: * - the vertex must be 'sad' as it is not part of the group * - therefore it must have edges to each member of the group (to happy, trans) * - therefore the group is enabled simultaneously * - due to internal group edges, all members will still be active after the * next character. * * Actually, the vertex redundancy code will merge the entire group into one * cyclic state. */ static bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) { unsigned dead_count = 0; set happy; set sad; for (auto v : vertices_range(g)) { if (is_special(v, g) || !hasSelfLoop(v, g)) { continue; } const CharReach &cr_v = g[v].char_reach; happy.clear(); sad.clear(); for (auto u : inv_adjacent_vertices_range(v, g)) { if (u == v) { continue; } if (!hasSelfLoop(u, g)) { sad.insert(u); continue; } if (ignore_starts) { if (u == g.startDs || is_virtual_start(u, g)) { sad.insert(u); continue; } } const CharReach &cr_u = g[u].char_reach; if ((cr_u & cr_v) != cr_v) { sad.insert(u); continue; } happy.insert(u); } if (!happy.empty() && checkVerticesFwd(g, sad, happy)) { dead_count++; remove_edge(v, v, g); } } DEBUG_PRINTF("found %u removable edges.\n", dead_count); return dead_count; } /** \brief Redundant self-loop removal (reverse version). * * A self loop on a vertex v can be removed if: * * For every vertex u in succ(v) either: * 1: u has a self loop and cr(v) is a subset of cr(u). * OR * 2: u is not an accept and u has an edge from a vertex satisfying * criterion 1. * OR * 3: u is in an accept and u has an edge from a vertex v' satisfying * criterion 1 and report(v) == report(v'). */ static bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) { unsigned dead_count = 0; set happy; set sad; for (auto v : vertices_range(g)) { if (is_special(v, g) || !hasSelfLoop(v, g)) { continue; } const CharReach &cr_v = g[v].char_reach; happy.clear(); sad.clear(); for (auto u : adjacent_vertices_range(v, g)) { if (u == v) { continue; } if (!hasSelfLoop(u, g)) { sad.insert(u); continue; } assert(!is_special(u, g)); const CharReach &cr_u = g[u].char_reach; if (!cr_v.isSubsetOf(cr_u)) { sad.insert(u); continue; } happy.insert(u); } if (!happy.empty() && checkVerticesRev(g, sad, happy)) { dead_count++; remove_edge(v, v, g); } } DEBUG_PRINTF("found %u removable edges.\n", dead_count); return dead_count; } static bool parentsSubsetOf(const NGHolder &g, NFAVertex v, const flat_set &other_parents, NFAVertex other, map &done) { map::const_iterator dit = done.find(v); if (dit != done.end()) { return dit->second; } for (auto u : inv_adjacent_vertices_range(v, g)) { if (u == v && contains(other_parents, other)) { continue; } if (!contains(other_parents, u)) { done[v] = false; return false; } } done[v] = true; return true; } static bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src, const flat_set &fixed_parents, const NFAEdge &candidate, map &done) { NFAVertex w = source(candidate, g); NFAVertex v = target(candidate, g); const CharReach &cr_w = g[w].char_reach; const CharReach &cr_u = g[fixed_src].char_reach; /* There is no reason why self loops cannot be considered by this * transformation but the removal is already handled by many other * transformations. */ if (w == v) { return false; } if (is_special(w, g)) { return false; } if (!cr_w.isSubsetOf(cr_u)) { return false; } /* check that each parent of w is also a parent of u */ if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) { return false; } DEBUG_PRINTF("edge (%u, %u) killed by edge (%u, %u)\n", g[w].index, g[v].index, g[fixed_src].index, g[v].index); return true; } static never_inline void checkLargeOutU(const NGHolder &g, NFAVertex u, const flat_set &parents_u, flat_set &possible_w, map &done, set *dead) { /* only vertices with at least one parent in common with u need to be * considered, and we also only consider potential siblings with subset * reach. */ possible_w.clear(); const CharReach &cr_u = g[u].char_reach; for (auto p : parents_u) { for (auto v : adjacent_vertices_range(p, g)) { const CharReach &cr_w = g[v].char_reach; if (cr_w.isSubsetOf(cr_u)) { possible_w.insert(v); } } } // If there's only one, it's us, and we have no work to do. if (possible_w.size() <= 1) { assert(possible_w.empty() || *possible_w.begin() == u); return; } for (const auto &e : out_edges_range(u, g)) { const NFAVertex v = target(e, g); if (is_special(v, g)) { continue; } if (contains(*dead, e)) { continue; } /* Now need check to find any edges which can be removed due to the * existence of edge e */ for (const auto &e2 : in_edges_range(v, g)) { if (e == e2 || contains(*dead, e2)) { continue; } const NFAVertex w = source(e2, g); if (!contains(possible_w, w)) { continue; } if (checkFwdCandidate(g, u, parents_u, e2, done)) { dead->insert(e2); } } } } static never_inline void checkSmallOutU(const NGHolder &g, NFAVertex u, const flat_set &parents_u, map &done, set *dead) { for (const auto &e : out_edges_range(u, g)) { const NFAVertex v = target(e, g); if (is_special(v, g)) { continue; } if (contains(*dead, e)) { continue; } /* Now need check to find any edges which can be removed due to the * existence of edge e */ for (const auto &e2 : in_edges_range(v, g)) { if (e == e2 || contains(*dead, e2)) { continue; } if (checkFwdCandidate(g, u, parents_u, e2, done)) { dead->insert(e2); } } } } /** \brief Forward edge redundancy pass. * * An edge e from w to v is redundant if there exists an edge e' such that: * e' is from u to v * and: reach(w) is a subset of reach(u) * and: proper_pred(w) is a subset of pred(u) * and: self_loop(w) implies self_loop(u) or edge from (w to u) * * Note: edges to accepts also require report ID checks. */ static bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) { set dead; map done; flat_set parents_u; flat_set possible_w; for (auto u : vertices_range(g)) { if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) { continue; } parents_u.clear(); pred(g, u, &parents_u); done.clear(); if (hasGreaterOutDegree(1, u, g)) { checkLargeOutU(g, u, parents_u, possible_w, done, &dead); } else { checkSmallOutU(g, u, parents_u, done, &dead); } } if (dead.empty()) { return false; } DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size()); remove_edges(dead, g); pruneUseless(g); return true; } /** Entry point: Runs all the edge redundancy passes. If SoM is tracked, * don't consider startDs or virtual starts as cyclic vertices. */ bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) { if (!cc.grey.removeEdgeRedundancy) { return false; } bool changed = false; changed |= removeEdgeRedundancyNearCyclesFwd(g, som); changed |= removeEdgeRedundancyNearCyclesRev(g); changed |= removeEdgeRedundancyFwd(g, som); return changed; } /** \brief Removes optional stuff from the front of floating patterns, since it's * redundant with startDs. * * For each successor of startDs, remove any in-edges that aren't from either * start or startDs. This allows us to prune redundant vertices at the start of * a pattern: * * /(hat)?stand --> /stand/ * */ bool removeSiblingsOfStartDotStar(NGHolder &g) { vector dead; for (auto v : adjacent_vertices_range(g.startDs, g)) { DEBUG_PRINTF("checking %u\n", g[v].index); if (is_special(v, g)) { continue; } for (const auto &e : in_edges_range(v, g)) { NFAVertex u = source(e, g); if (is_special(u, g)) { continue; } DEBUG_PRINTF("removing %u->%u\n", g[u].index, g[v].index); dead.push_back(e); } } if (dead.empty()) { return false; } DEBUG_PRINTF("found %zu removable edges.\n", dead.size()); remove_edges(dead, g); pruneUseless(g); return true; } /** Removes all edges into virtual starts other than those from start/startDs, * providing there is an edge from startDs. This operation is an optimisation * for SOM mode. (see UE-1544) */ bool optimiseVirtualStarts(NGHolder &g) { vector dead; for (auto v : adjacent_vertices_range(g.startDs, g)) { u32 flags = g[v].assert_flags; if (!(flags & POS_FLAG_VIRTUAL_START)) { continue; } for (const auto &e : in_edges_range(v, g)) { if (!is_any_start(source(e, g), g)) { dead.push_back(e); } } } if (dead.empty()) { return false; } DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size()); remove_edges(dead, g); pruneUseless(g); return true; } } // namespace ue2