vectorscan/src/nfagraph/ng_edge_redundancy.cpp
2015-10-20 09:13:35 +11:00

518 lines
15 KiB
C++

/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Edge redundancy graph reductions.
*/
#include "ng_edge_redundancy.h"
#include "ng_holder.h"
#include "ng_prune.h"
#include "ng_util.h"
#include "ue2common.h"
#include "parser/position.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/graph_range.h"
#include "util/ue2_containers.h"
#include <set>
#include <vector>
using namespace std;
namespace ue2 {
/* reverse edge redundancy removal is possible but is not implemented as it
* regressed rose pattern support in the regression suite: 19026 - 19027
* (foo.{1,5}b?ar)
*
* If rose becomes smarter we can reimplement.
*/
static never_inline
bool checkVerticesFwd(const NGHolder &g, const set<NFAVertex> &sad,
const set<NFAVertex> &happy) {
/* need to check if for each vertex in sad if it has an edge to a happy
* vertex */
for (auto u : sad) {
bool ok = false;
for (auto v : adjacent_vertices_range(u, g)) {
if (contains(happy, v)) {
ok = true;
break;
}
}
if (!ok) {
return false;
}
}
return true;
}
static never_inline
bool checkVerticesRev(const NGHolder &g, const set<NFAVertex> &sad,
const set<NFAVertex> &happy) {
/* need to check if for each vertex in sad if it has an edge to a happy
* vertex */
for (auto v : sad) {
bool ok = false;
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (contains(happy, u)) {
ok = true;
break;
}
}
if (!ok) {
return false;
}
}
return true;
}
/** \brief Redundant self-loop removal.
*
* A self loop on a vertex v can be removed if:
*
* For every vertex u in pred(v) either:
* 1: u has a self loop and cr(v) subset of cr(u)
* OR
* 2: u has an edge to vertex satisfying criterion 1
*
* Note: we remove all dead loops at the end of the pass and do not check the
* live status of the loops we are depending on during the analysis.
*
* We don't end up in situations where we remove a group of loops which depend
* on each other as:
*
* - there must be at least one vertex not in the group which is a pred of some
* member of the group (as we don't remove loops on specials)
*
* For each pred vertex of the group:
* - the vertex must be 'sad' as it is not part of the group
* - therefore it must have edges to each member of the group (to happy, trans)
* - therefore the group is enabled simultaneously
* - due to internal group edges, all members will still be active after the
* next character.
*
* Actually, the vertex redundancy code will merge the entire group into one
* cyclic state.
*/
static
bool removeEdgeRedundancyNearCyclesFwd(NGHolder &g, bool ignore_starts) {
unsigned dead_count = 0;
set<NFAVertex> happy;
set<NFAVertex> sad;
for (auto v : vertices_range(g)) {
if (is_special(v, g) || !hasSelfLoop(v, g)) {
continue;
}
const CharReach &cr_v = g[v].char_reach;
happy.clear();
sad.clear();
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (u == v) {
continue;
}
if (!hasSelfLoop(u, g)) {
sad.insert(u);
continue;
}
if (ignore_starts) {
if (u == g.startDs || is_virtual_start(u, g)) {
sad.insert(u);
continue;
}
}
const CharReach &cr_u = g[u].char_reach;
if ((cr_u & cr_v) != cr_v) {
sad.insert(u);
continue;
}
happy.insert(u);
}
if (!happy.empty() && checkVerticesFwd(g, sad, happy)) {
dead_count++;
remove_edge(v, v, g);
}
}
DEBUG_PRINTF("found %u removable edges.\n", dead_count);
return dead_count;
}
/** \brief Redundant self-loop removal (reverse version).
*
* A self loop on a vertex v can be removed if:
*
* For every vertex u in succ(v) either:
* 1: u has a self loop and cr(v) is a subset of cr(u).
* OR
* 2: u is not an accept and u has an edge from a vertex satisfying
* criterion 1.
* OR
* 3: u is in an accept and u has an edge from a vertex v' satisfying
* criterion 1 and report(v) == report(v').
*/
static
bool removeEdgeRedundancyNearCyclesRev(NGHolder &g) {
unsigned dead_count = 0;
set<NFAVertex> happy;
set<NFAVertex> sad;
for (auto v : vertices_range(g)) {
if (is_special(v, g) || !hasSelfLoop(v, g)) {
continue;
}
const CharReach &cr_v = g[v].char_reach;
happy.clear();
sad.clear();
for (auto u : adjacent_vertices_range(v, g)) {
if (u == v) {
continue;
}
if (!hasSelfLoop(u, g)) {
sad.insert(u);
continue;
}
assert(!is_special(u, g));
const CharReach &cr_u = g[u].char_reach;
if (!cr_v.isSubsetOf(cr_u)) {
sad.insert(u);
continue;
}
happy.insert(u);
}
if (!happy.empty() && checkVerticesRev(g, sad, happy)) {
dead_count++;
remove_edge(v, v, g);
}
}
DEBUG_PRINTF("found %u removable edges.\n", dead_count);
return dead_count;
}
static
bool parentsSubsetOf(const NGHolder &g, NFAVertex v,
const flat_set<NFAVertex> &other_parents, NFAVertex other,
map<NFAVertex, bool> &done) {
map<NFAVertex, bool>::const_iterator dit = done.find(v);
if (dit != done.end()) {
return dit->second;
}
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (u == v && contains(other_parents, other)) {
continue;
}
if (!contains(other_parents, u)) {
done[v] = false;
return false;
}
}
done[v] = true;
return true;
}
static
bool checkFwdCandidate(const NGHolder &g, NFAVertex fixed_src,
const flat_set<NFAVertex> &fixed_parents,
const NFAEdge &candidate,
map<NFAVertex, bool> &done) {
NFAVertex w = source(candidate, g);
NFAVertex v = target(candidate, g);
const CharReach &cr_w = g[w].char_reach;
const CharReach &cr_u = g[fixed_src].char_reach;
/* There is no reason why self loops cannot be considered by this
* transformation but the removal is already handled by many other
* transformations. */
if (w == v) {
return false;
}
if (is_special(w, g)) {
return false;
}
if (!cr_w.isSubsetOf(cr_u)) {
return false;
}
/* check that each parent of w is also a parent of u */
if (!parentsSubsetOf(g, w, fixed_parents, fixed_src, done)) {
return false;
}
DEBUG_PRINTF("edge (%u, %u) killed by edge (%u, %u)\n",
g[w].index, g[v].index,
g[fixed_src].index, g[v].index);
return true;
}
static never_inline
void checkLargeOutU(const NGHolder &g, NFAVertex u,
const flat_set<NFAVertex> &parents_u,
flat_set<NFAVertex> &possible_w,
map<NFAVertex, bool> &done,
set<NFAEdge> *dead) {
/* only vertices with at least one parent in common with u need to be
* considered, and we also only consider potential siblings with subset
* reach. */
possible_w.clear();
const CharReach &cr_u = g[u].char_reach;
for (auto p : parents_u) {
for (auto v : adjacent_vertices_range(p, g)) {
const CharReach &cr_w = g[v].char_reach;
if (cr_w.isSubsetOf(cr_u)) {
possible_w.insert(v);
}
}
}
// If there's only one, it's us, and we have no work to do.
if (possible_w.size() <= 1) {
assert(possible_w.empty() || *possible_w.begin() == u);
return;
}
for (const auto &e : out_edges_range(u, g)) {
const NFAVertex v = target(e, g);
if (is_special(v, g)) {
continue;
}
if (contains(*dead, e)) {
continue;
}
/* Now need check to find any edges which can be removed due to the
* existence of edge e */
for (const auto &e2 : in_edges_range(v, g)) {
if (e == e2 || contains(*dead, e2)) {
continue;
}
const NFAVertex w = source(e2, g);
if (!contains(possible_w, w)) {
continue;
}
if (checkFwdCandidate(g, u, parents_u, e2, done)) {
dead->insert(e2);
}
}
}
}
static never_inline
void checkSmallOutU(const NGHolder &g, NFAVertex u,
const flat_set<NFAVertex> &parents_u,
map<NFAVertex, bool> &done,
set<NFAEdge> *dead) {
for (const auto &e : out_edges_range(u, g)) {
const NFAVertex v = target(e, g);
if (is_special(v, g)) {
continue;
}
if (contains(*dead, e)) {
continue;
}
/* Now need check to find any edges which can be removed due to the
* existence of edge e */
for (const auto &e2 : in_edges_range(v, g)) {
if (e == e2 || contains(*dead, e2)) {
continue;
}
if (checkFwdCandidate(g, u, parents_u, e2, done)) {
dead->insert(e2);
}
}
}
}
/** \brief Forward edge redundancy pass.
*
* An edge e from w to v is redundant if there exists an edge e' such that:
* e' is from u to v
* and: reach(w) is a subset of reach(u)
* and: proper_pred(w) is a subset of pred(u)
* and: self_loop(w) implies self_loop(u) or edge from (w to u)
*
* Note: edges to accepts also require report ID checks.
*/
static
bool removeEdgeRedundancyFwd(NGHolder &g, bool ignore_starts) {
set<NFAEdge> dead;
map<NFAVertex, bool> done;
flat_set<NFAVertex> parents_u;
flat_set<NFAVertex> possible_w;
for (auto u : vertices_range(g)) {
if (ignore_starts && (u == g.startDs || is_virtual_start(u, g))) {
continue;
}
parents_u.clear();
pred(g, u, &parents_u);
done.clear();
if (hasGreaterOutDegree(1, u, g)) {
checkLargeOutU(g, u, parents_u, possible_w, done, &dead);
} else {
checkSmallOutU(g, u, parents_u, done, &dead);
}
}
if (dead.empty()) {
return false;
}
DEBUG_PRINTF("found %zu removable non-selfloops.\n", dead.size());
remove_edges(dead, g);
pruneUseless(g);
return true;
}
/** Entry point: Runs all the edge redundancy passes. If SoM is tracked,
* don't consider startDs or virtual starts as cyclic vertices. */
bool removeEdgeRedundancy(NGHolder &g, som_type som, const CompileContext &cc) {
if (!cc.grey.removeEdgeRedundancy) {
return false;
}
bool changed = false;
changed |= removeEdgeRedundancyNearCyclesFwd(g, som);
changed |= removeEdgeRedundancyNearCyclesRev(g);
changed |= removeEdgeRedundancyFwd(g, som);
return changed;
}
/** \brief Removes optional stuff from the front of floating patterns, since it's
* redundant with startDs.
*
* For each successor of startDs, remove any in-edges that aren't from either
* start or startDs. This allows us to prune redundant vertices at the start of
* a pattern:
*
* /(hat)?stand --> /stand/
*
*/
bool removeSiblingsOfStartDotStar(NGHolder &g) {
vector<NFAEdge> dead;
for (auto v : adjacent_vertices_range(g.startDs, g)) {
DEBUG_PRINTF("checking %u\n", g[v].index);
if (is_special(v, g)) {
continue;
}
for (const auto &e : in_edges_range(v, g)) {
NFAVertex u = source(e, g);
if (is_special(u, g)) {
continue;
}
DEBUG_PRINTF("removing %u->%u\n", g[u].index,
g[v].index);
dead.push_back(e);
}
}
if (dead.empty()) {
return false;
}
DEBUG_PRINTF("found %zu removable edges.\n", dead.size());
remove_edges(dead, g);
pruneUseless(g);
return true;
}
/** Removes all edges into virtual starts other than those from start/startDs,
* providing there is an edge from startDs. This operation is an optimisation
* for SOM mode. (see UE-1544) */
bool optimiseVirtualStarts(NGHolder &g) {
vector<NFAEdge> dead;
for (auto v : adjacent_vertices_range(g.startDs, g)) {
u32 flags = g[v].assert_flags;
if (!(flags & POS_FLAG_VIRTUAL_START)) {
continue;
}
for (const auto &e : in_edges_range(v, g)) {
if (!is_any_start(source(e, g), g)) {
dead.push_back(e);
}
}
}
if (dead.empty()) {
return false;
}
DEBUG_PRINTF("removing %zu edges into virtual starts\n", dead.size());
remove_edges(dead, g);
pruneUseless(g);
return true;
}
} // namespace ue2