simple pass to pick up paths redundant with those from cyclic's succs

This commit is contained in:
Alex Coyte 2016-10-06 15:54:48 +11:00 committed by Matthew Barr
parent 8cadba0bdd
commit 47f53f63a7
13 changed files with 234 additions and 68 deletions

View File

@ -788,10 +788,8 @@ u32 getEffectiveAccelStates(const build_info &args,
if (!is_subset_of(h[v].reports, h[a].reports)) {
continue;
}
flat_set<NFAVertex> v_succ;
flat_set<NFAVertex> a_succ;
succ(h, v, &v_succ);
succ(h, a, &a_succ);
auto v_succ = succs(v, h);
auto a_succ = succs(a, h);
if (is_subset_of(v_succ, a_succ)) {
dominated_by[accel_id] |= 1U << accel_id_map[a];
}

View File

@ -421,6 +421,7 @@ bool NG::addGraph(NGWrapper &w) {
// Perform a reduction pass to merge sibling character classes together.
if (cc.grey.performGraphSimplification) {
removeRedundancy(w, som);
prunePathsRedundantWithSuccessorOfCyclics(w, som);
}
dumpDotWrapper(w, "04_reduced", cc.grey);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -134,8 +134,8 @@ void findLoopReachable(const GraphT &g, const NFAVertex srcVertex,
depth_first_search(g, visitor(be).root_vertex(srcVertex).vertex_index_map(
index_map));
AcyclicFilter<EdgeSet> af(&deadEdges);
filtered_graph<GraphT, AcyclicFilter<EdgeSet> > acyclic_g(g, af);
auto af = make_bad_edge_filter(&deadEdges);
auto acyclic_g = make_filtered_graph(g, af);
vector<NFAVertex> topoOrder; /* actually reverse topological order */
topoOrder.reserve(deadNodes.size());

View File

@ -382,8 +382,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
while (v != cyclic) {
DEBUG_PRINTF("vertex %u\n", g[v].index);
width++;
tie(ai, ae) = adjacent_vertices(v, g);
set<NFAVertex> succ(ai, ae);
auto succ = succs(v, g);
if (contains(succ, cyclic)) {
if (succ.size() == 1) {
v = cyclic;
@ -421,8 +420,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) {
while (!is_any_accept(v, g)) {
DEBUG_PRINTF("vertex %u\n", g[v].index);
width++;
tie(ai, ae) = adjacent_vertices(v, g);
set<NFAVertex> succ(ai, ae);
auto succ = succs(v, g);
if (succ.size() != 1) {
DEBUG_PRINTF("bad form\n");
return false;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -69,8 +69,12 @@
#include "util/charreach.h"
#include "util/container.h"
#include "util/graph_range.h"
#include "util/ue2_containers.h"
#include "ue2common.h"
#include <boost/graph/depth_first_search.hpp>
#include <boost/graph/filtered_graph.hpp>
#include <map>
#include <set>
#include <vector>
@ -94,8 +98,8 @@ void findCandidates(NGHolder &g, const vector<NFAVertex> &ordering,
// For `v' to be a candidate, its predecessors must all have the same
// successor set as `v'.
set<NFAVertex> succ_v, succ_u;
succ(g, v, &succ_v);
auto succ_v = succs(v, g);
flat_set<NFAVertex> succ_u;
for (auto u : inv_adjacent_vertices_range(v, g)) {
succ_u.clear();
@ -125,8 +129,8 @@ void findCandidates_rev(NGHolder &g, const vector<NFAVertex> &ordering,
// For `v' to be a candidate, its predecessors must all have the same
// successor set as `v'.
set<NFAVertex> pred_v, pred_u;
pred(g, v, &pred_v);
auto pred_v = preds(v, g);
flat_set<NFAVertex> pred_u;
for (auto u : adjacent_vertices_range(v, g)) {
pred_u.clear();
@ -172,8 +176,7 @@ void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) {
static
set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p,
bool ignore_starts, const CharReach &new_cr) {
set<NFAVertex> cand;
pred(g, p, &cand);
auto cand = preds<set<NFAVertex>>(p, g);
if (ignore_starts) {
cand.erase(g.startDs);
}
@ -209,8 +212,7 @@ set<NFAVertex> findSustainSet(const NGHolder &g, NFAVertex p,
static
set<NFAVertex> findSustainSet_rev(const NGHolder &g, NFAVertex p,
const CharReach &new_cr) {
set<NFAVertex> cand;
succ(g, p, &cand);
auto cand = succs<set<NFAVertex>>(p, g);
/* remove elements from cand until the sustain set property holds */
bool changed;
do {
@ -546,4 +548,139 @@ bool mergeCyclicDotStars(NGHolder &g) {
return true;
}
/**
* Returns the set of vertices that cannot be on if v is not on.
*/
static
flat_set<NFAVertex> findDependentVertices(const NGHolder &g, NFAVertex v) {
auto v_pred = preds(v, g);
flat_set<NFAVertex> may_be_on;
/* We need to exclude any vertex that may be reached on a path which is
* incompatible with the vertex v being on. */
/* A vertex u is bad if:
* 1) its reach may be incompatible with v (not a subset)
* 2) it if there is an edge from a bad vertex b and there is either not an
* edge v->u or not an edge b->v.
* Note: 2) means v is never bad as it has a selfloop
*
* Can do this with a DFS from all the initial bad states with a conditional
* check down edges. Alternately can just filter these edges out of the
* graph first.
*/
flat_set<NFAEdge> no_explore;
for (NFAVertex t : adjacent_vertices_range(v, g)) {
for (NFAEdge e : in_edges_range(t, g)) {
NFAVertex s = source(e, g);
if (edge(s, v, g).second) {
no_explore.insert(e);
}
}
}
auto filtered_g = make_filtered_graph(g.g,
make_bad_edge_filter(&no_explore));
vector<boost::default_color_type> color_raw(num_vertices(g));
auto color = make_iterator_property_map(color_raw.begin(),
get(&NFAGraphVertexProps::index, g.g));
flat_set<NFAVertex> bad;
for (NFAVertex b : vertices_range(g)) {
if (b != g.start && g[b].char_reach.isSubsetOf(g[v].char_reach)) {
continue;
}
boost::depth_first_visit(filtered_g, b, make_vertex_recorder(bad),
color);
}
flat_set<NFAVertex> rv;
for (NFAVertex u : vertices_range(g)) {
if (!contains(bad, u)) {
DEBUG_PRINTF("%u is good\n", g[u].index);
rv.insert(u);
}
}
return rv;
}
static
bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) {
if (som && (is_virtual_start(u, g) || u == g.startDs)) {
return false;
}
bool changed = false;
DEBUG_PRINTF("using cyclic %u as base\n", g[u].index);
auto children = findDependentVertices(g, u);
vector<NFAVertex> u_succs;
for (NFAVertex v : adjacent_vertices_range(u, g)) {
if (som && is_virtual_start(v, g)) {
/* as v is virtual start, its som has been reset so can not override
* existing in progress matches. */
continue;
}
u_succs.push_back(v);
}
sort(u_succs.begin(), u_succs.end(),
[&](NFAVertex a, NFAVertex b) {
return g[a].char_reach.count() > g[b].char_reach.count();
});
for (NFAVertex v : u_succs) {
DEBUG_PRINTF(" using %u as killer\n", g[v].index);
set<NFAEdge> dead;
for (NFAVertex s : adjacent_vertices_range(v, g)) {
DEBUG_PRINTF(" looking at preds of %u\n", g[s].index);
for (NFAEdge e : in_edges_range(s, g)) {
NFAVertex p = source(e, g);
if (!contains(children, p) || p == v || p == u
|| p == g.accept) {
DEBUG_PRINTF("%u not a cand\n", g[p].index);
continue;
}
if (is_any_accept(s, g) && g[p].reports != g[v].reports) {
DEBUG_PRINTF("%u bad reports\n", g[p].index);
continue;
}
if (g[p].char_reach.isSubsetOf(g[v].char_reach)) {
dead.insert(e);
changed = true;
DEBUG_PRINTF("removing edge %u->%u\n", g[p].index,
g[s].index);
} else if (is_subset_of(succs(p, g), succs(u, g))) {
if (is_match_vertex(p, g)
&& !is_subset_of(g[p].reports, g[v].reports)) {
continue;
}
DEBUG_PRINTF("updating reach on %u\n", g[p].index);
changed |= (g[p].char_reach & g[v].char_reach).any();
g[p].char_reach &= ~g[v].char_reach;
}
}
}
remove_edges(dead, g);
}
DEBUG_PRINTF("changed %d\n", (int)changed);
return changed;
}
bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) {
/* TODO: the reverse form of this is also possible */
bool changed = false;
for (NFAVertex v : vertices_range(g)) {
if (hasSelfLoop(v, g) && g[v].char_reach.all()) {
changed |= pruneUsingSuccessors(g, v, som);
}
}
if (changed) {
pruneUseless(g);
clearReports(g);
}
return changed;
}
} // namespace ue2

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -72,6 +72,13 @@ std::vector<CharReach> reduced_cr(const NGHolder &g,
/** Remove cyclic stars connected to start */
bool mergeCyclicDotStars(NGHolder &g);
/**
* Given a cyclic state 'c' with a broad reach and a later state 'v' that is
* only reachable if c is still on, then any edges to a successor of a direct
* successor of c with reach a superset of v are redundant.
*/
bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &h, som_type som);
} // namespace ue2
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -71,7 +71,7 @@ using namespace std;
namespace ue2 {
typedef ue2::unordered_set<NFAEdge> BackEdgeSet;
typedef boost::filtered_graph<NFAGraph, AcyclicFilter<BackEdgeSet>>
typedef boost::filtered_graph<NFAGraph, bad_edge_filter<BackEdgeSet>>
AcyclicGraph;
namespace {
@ -454,7 +454,7 @@ ue2::unordered_map<NFAVertex, u32> assignRegions(const NGHolder &g) {
.color_map(make_iterator_property_map(
colours.begin(), get(&NFAGraphVertexProps::index, g.g))));
AcyclicFilter<BackEdgeSet> af(&deadEdges);
auto af = make_bad_edge_filter(&deadEdges);
AcyclicGraph acyclic_g(g.g, af);
// Build a (reverse) topological ordering.

View File

@ -138,8 +138,7 @@ void buildTopoOrder(const Graph &g, vector<NFAVertex> &topoOrder) {
depth_first_search(g, visitor(BackEdges<EdgeSet>(deadEdges)).
color_map(make_assoc_property_map(colours)));
AcyclicFilter<EdgeSet> af(&deadEdges);
boost::filtered_graph<Graph, AcyclicFilter<EdgeSet> > acyclic_g(g, af);
auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges));
topological_sort(acyclic_g, back_inserter(topoOrder),
color_map(make_assoc_property_map(colours)));

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -187,10 +187,9 @@ bool expandCyclic(NGHolder &h, NFAVertex v) {
DEBUG_PRINTF("inspecting %u\n", h[v].index);
bool changes = false;
set<NFAVertex> v_preds;
set<NFAVertex> v_succs;
pred(h, v, &v_preds);
succ(h, v, &v_succs);
auto v_preds = preds(v, h);
auto v_succs = succs(v, h);
set<NFAVertex> start_siblings;
set<NFAVertex> end_siblings;
@ -199,8 +198,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) {
/* We need to find start vertices which have all of our preds.
* As we have a self loop, it must be one of our succs. */
for (auto a : adjacent_vertices_range(v, h)) {
set<NFAVertex> a_preds;
pred(h, a, &a_preds);
auto a_preds = preds(a, h);
if (a_preds == v_preds && isutf8start(h[a].char_reach)) {
DEBUG_PRINTF("%u is a start v\n", h[a].index);
@ -211,8 +209,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) {
/* We also need to find full cont vertices which have all our own succs;
* As we have a self loop, it must be one of our preds. */
for (auto a : inv_adjacent_vertices_range(v, h)) {
set<NFAVertex> a_succs;
succ(h, a, &a_succs);
auto a_succs = succs(a, h);
if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) {
DEBUG_PRINTF("%u is a full tail cont\n", h[a].index);

View File

@ -403,8 +403,7 @@ vector<NFAVertex> getTopoOrdering(const NGHolder &g) {
colour.begin(), index_map))
.vertex_index_map(index_map));
AcyclicFilter<EdgeSet> af(&be.backEdges);
filtered_graph<NFAGraph, AcyclicFilter<EdgeSet>> acyclic_g(g.g, af);
auto acyclic_g = make_filtered_graph(g.g, make_bad_edge_filter(&backEdges));
vector<NFAVertex> ordering;
ordering.reserve(num_verts);
@ -435,9 +434,7 @@ void mustBeSetBefore_int(NFAVertex u, const NGHolder &g,
}
}
// The AcyclicFilter is badly named, it's really just an edge-set filter.
filtered_graph<NFAGraph, AcyclicFilter<set<NFAEdge>>> prefix(g.g,
AcyclicFilter<set<NFAEdge>>(&dead));
auto prefix = make_filtered_graph(g.g, make_bad_edge_filter(&dead));
depth_first_visit(
prefix, g.start, make_dfs_visitor(boost::null_visitor()),

View File

@ -70,6 +70,13 @@ void succ(const NGHolder &g, NFAVertex v, U *s) {
s->insert(ai, ae);
}
template<class ContTemp = flat_set<NFAVertex>>
ContTemp succs(NFAVertex u, const NGHolder &g) {
ContTemp rv;
succ(g, u, &rv);
return rv;
}
/** adds predecessors of v to s */
template<class U>
static really_inline
@ -79,6 +86,13 @@ void pred(const NGHolder &g, NFAVertex v, U *p) {
p->insert(it, ite);
}
template<class ContTemp = flat_set<NFAVertex>>
ContTemp preds(NFAVertex u, const NGHolder &g) {
ContTemp rv;
pred(g, u, &rv);
return rv;
}
/** returns a vertex with an out edge from v and is not v.
* v must have exactly one out-edge excluding self-loops.
* will return NGHolder::null_vertex() if the preconditions don't hold.
@ -88,6 +102,30 @@ NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v);
/** Like getSoleDestVertex but for in-edges */
NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v);
/** \brief edge filtered graph.
*
* This will give you a view over the graph that has none of the edges from
* the provided set included.
*
* If this is provided with the back edges of the graph, this will result in an
* acyclic subgraph view. This is useful for topological_sort and other
* algorithms that require a DAG.
*/
template<typename EdgeSet>
struct bad_edge_filter {
bad_edge_filter() {}
explicit bad_edge_filter(const EdgeSet *bad_e) : bad_edges(bad_e) {}
bool operator()(const typename EdgeSet::value_type &e) const {
return !contains(*bad_edges, e); /* keep edges not in the bad set */
}
const EdgeSet *bad_edges = nullptr;
};
template<typename EdgeSet>
bad_edge_filter<EdgeSet> make_bad_edge_filter(const EdgeSet *e) {
return bad_edge_filter<EdgeSet>(e);
}
/** Visitor that records back edges */
template <typename BackEdgeSet>
class BackEdges : public boost::default_dfs_visitor {
@ -100,23 +138,6 @@ public:
BackEdgeSet &backEdges;
};
/** \brief Acyclic filtered graph.
*
* This will give you a view over the graph that is directed and acyclic:
* useful for topological_sort and other algorithms that require a DAG.
*/
template <typename BackEdgeSet>
struct AcyclicFilter {
AcyclicFilter() {}
explicit AcyclicFilter(const BackEdgeSet *edges) : backEdges(edges) {}
template <typename EdgeT>
bool operator()(const EdgeT &e) const {
// Only keep edges that aren't in the back edge set.
return (backEdges->find(e) == backEdges->end());
}
const BackEdgeSet *backEdges = nullptr;
};
/**
* Generic code to renumber all the vertices in a graph. Assumes that we're
* using a vertex_index property of type u32, and that we always have

View File

@ -733,10 +733,8 @@ bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
u32 repeatCount = 0;
NFAVertex hu = h.startDs;
set<NFAVertex> start_succ;
set<NFAVertex> startds_succ;
succ(h, h.start, &start_succ);
succ(h, h.startDs, &startds_succ);
auto start_succ = succs<set<NFAVertex>>(h.start, h);
auto startds_succ = succs<set<NFAVertex>>(h.startDs, h);
if (!is_subset_of(start_succ, startds_succ)) {
DEBUG_PRINTF("not a simple chain\n");
@ -790,10 +788,8 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
NFAVertex base = anchored ? h.start : h.startDs;
if (!anchored) {
set<NFAVertex> start_succ;
set<NFAVertex> startds_succ;
succ(h, h.start, &start_succ);
succ(h, h.startDs, &startds_succ);
auto start_succ = succs<set<NFAVertex>>(h.start, h);
auto startds_succ = succs<set<NFAVertex>>(h.startDs, h);
if (!is_subset_of(start_succ, startds_succ)) {
DEBUG_PRINTF("not a simple chain\n");
@ -852,8 +848,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
exits = exits_and_repeat_verts;
erase_all(&exits, rep_verts);
set<NFAVertex> base_succ;
succ(h, base, &base_succ);
auto base_succ = succs<set<NFAVertex>>(base, h);
base_succ.erase(h.startDs);
if (is_subset_of(base_succ, rep_verts)) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Intel Corporation
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@ -291,6 +291,22 @@ bool is_dag(const Graph &g, bool ignore_self_loops = false) {
return true;
}
template<typename Cont>
class vertex_recorder : public boost::default_dfs_visitor {
public:
explicit vertex_recorder(Cont &o) : out(o) {}
template<class G>
void discover_vertex(typename Cont::value_type v, const G &) {
out.insert(v);
}
Cont &out;
};
template<typename Cont>
vertex_recorder<Cont> make_vertex_recorder(Cont &o) {
return vertex_recorder<Cont>(o);
}
template <class Graph>
std::pair<typename Graph::edge_descriptor, bool>
add_edge_if_not_present(typename Graph::vertex_descriptor u,