diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp index 2c164090..b7ea93d9 100644 --- a/src/nfa/limex_compile.cpp +++ b/src/nfa/limex_compile.cpp @@ -788,10 +788,8 @@ u32 getEffectiveAccelStates(const build_info &args, if (!is_subset_of(h[v].reports, h[a].reports)) { continue; } - flat_set v_succ; - flat_set a_succ; - succ(h, v, &v_succ); - succ(h, a, &a_succ); + auto v_succ = succs(v, h); + auto a_succ = succs(a, h); if (is_subset_of(v_succ, a_succ)) { dominated_by[accel_id] |= 1U << accel_id_map[a]; } diff --git a/src/nfagraph/ng.cpp b/src/nfagraph/ng.cpp index deca3fd5..071e5c63 100644 --- a/src/nfagraph/ng.cpp +++ b/src/nfagraph/ng.cpp @@ -421,6 +421,7 @@ bool NG::addGraph(NGWrapper &w) { // Perform a reduction pass to merge sibling character classes together. if (cc.grey.performGraphSimplification) { removeRedundancy(w, som); + prunePathsRedundantWithSuccessorOfCyclics(w, som); } dumpDotWrapper(w, "04_reduced", cc.grey); diff --git a/src/nfagraph/ng_depth.cpp b/src/nfagraph/ng_depth.cpp index d7945be9..8afa644a 100644 --- a/src/nfagraph/ng_depth.cpp +++ b/src/nfagraph/ng_depth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -134,8 +134,8 @@ void findLoopReachable(const GraphT &g, const NFAVertex srcVertex, depth_first_search(g, visitor(be).root_vertex(srcVertex).vertex_index_map( index_map)); - AcyclicFilter af(&deadEdges); - filtered_graph > acyclic_g(g, af); + auto af = make_bad_edge_filter(&deadEdges); + auto acyclic_g = make_filtered_graph(g, af); vector topoOrder; /* actually reverse topological order */ topoOrder.reserve(deadNodes.size()); diff --git a/src/nfagraph/ng_extparam.cpp b/src/nfagraph/ng_extparam.cpp index bc101df2..eeb15299 100644 --- a/src/nfagraph/ng_extparam.cpp +++ b/src/nfagraph/ng_extparam.cpp @@ -382,8 +382,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { while (v != cyclic) { DEBUG_PRINTF("vertex %u\n", g[v].index); width++; - tie(ai, ae) = adjacent_vertices(v, g); - set succ(ai, ae); + auto succ = succs(v, g); if (contains(succ, cyclic)) { if (succ.size() == 1) { v = cyclic; @@ -421,8 +420,7 @@ bool transformMinLengthToRepeat(const ReportManager &rm, NGWrapper &g) { while (!is_any_accept(v, g)) { DEBUG_PRINTF("vertex %u\n", g[v].index); width++; - tie(ai, ae) = adjacent_vertices(v, g); - set succ(ai, ae); + auto succ = succs(v, g); if (succ.size() != 1) { DEBUG_PRINTF("bad form\n"); return false; diff --git a/src/nfagraph/ng_misc_opt.cpp b/src/nfagraph/ng_misc_opt.cpp index 2e02933a..3f685226 100644 --- a/src/nfagraph/ng_misc_opt.cpp +++ b/src/nfagraph/ng_misc_opt.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -69,8 +69,12 @@ #include "util/charreach.h" #include "util/container.h" #include "util/graph_range.h" +#include "util/ue2_containers.h" #include "ue2common.h" +#include +#include + #include #include #include @@ -94,8 +98,8 @@ void findCandidates(NGHolder &g, const vector &ordering, // For `v' to be a candidate, its predecessors must all have the same // successor set as `v'. - set succ_v, succ_u; - succ(g, v, &succ_v); + auto succ_v = succs(v, g); + flat_set succ_u; for (auto u : inv_adjacent_vertices_range(v, g)) { succ_u.clear(); @@ -125,8 +129,8 @@ void findCandidates_rev(NGHolder &g, const vector &ordering, // For `v' to be a candidate, its predecessors must all have the same // successor set as `v'. - set pred_v, pred_u; - pred(g, v, &pred_v); + auto pred_v = preds(v, g); + flat_set pred_u; for (auto u : adjacent_vertices_range(v, g)) { pred_u.clear(); @@ -172,8 +176,7 @@ void succCRIntersection(const NGHolder &g, NFAVertex v, CharReach &add) { static set findSustainSet(const NGHolder &g, NFAVertex p, bool ignore_starts, const CharReach &new_cr) { - set cand; - pred(g, p, &cand); + auto cand = preds>(p, g); if (ignore_starts) { cand.erase(g.startDs); } @@ -209,8 +212,7 @@ set findSustainSet(const NGHolder &g, NFAVertex p, static set findSustainSet_rev(const NGHolder &g, NFAVertex p, const CharReach &new_cr) { - set cand; - succ(g, p, &cand); + auto cand = succs>(p, g); /* remove elements from cand until the sustain set property holds */ bool changed; do { @@ -546,4 +548,139 @@ bool mergeCyclicDotStars(NGHolder &g) { return true; } +/** + * Returns the set of vertices that cannot be on if v is not on. + */ +static +flat_set findDependentVertices(const NGHolder &g, NFAVertex v) { + auto v_pred = preds(v, g); + flat_set may_be_on; + + /* We need to exclude any vertex that may be reached on a path which is + * incompatible with the vertex v being on. */ + + /* A vertex u is bad if: + * 1) its reach may be incompatible with v (not a subset) + * 2) it if there is an edge from a bad vertex b and there is either not an + * edge v->u or not an edge b->v. + * Note: 2) means v is never bad as it has a selfloop + * + * Can do this with a DFS from all the initial bad states with a conditional + * check down edges. Alternately can just filter these edges out of the + * graph first. + */ + flat_set no_explore; + for (NFAVertex t : adjacent_vertices_range(v, g)) { + for (NFAEdge e : in_edges_range(t, g)) { + NFAVertex s = source(e, g); + if (edge(s, v, g).second) { + no_explore.insert(e); + } + } + } + + auto filtered_g = make_filtered_graph(g.g, + make_bad_edge_filter(&no_explore)); + + vector color_raw(num_vertices(g)); + auto color = make_iterator_property_map(color_raw.begin(), + get(&NFAGraphVertexProps::index, g.g)); + flat_set bad; + for (NFAVertex b : vertices_range(g)) { + if (b != g.start && g[b].char_reach.isSubsetOf(g[v].char_reach)) { + continue; + } + boost::depth_first_visit(filtered_g, b, make_vertex_recorder(bad), + color); + } + + flat_set rv; + for (NFAVertex u : vertices_range(g)) { + if (!contains(bad, u)) { + DEBUG_PRINTF("%u is good\n", g[u].index); + rv.insert(u); + } + } + return rv; +} + +static +bool pruneUsingSuccessors(NGHolder &g, NFAVertex u, som_type som) { + if (som && (is_virtual_start(u, g) || u == g.startDs)) { + return false; + } + + bool changed = false; + DEBUG_PRINTF("using cyclic %u as base\n", g[u].index); + auto children = findDependentVertices(g, u); + vector u_succs; + for (NFAVertex v : adjacent_vertices_range(u, g)) { + if (som && is_virtual_start(v, g)) { + /* as v is virtual start, its som has been reset so can not override + * existing in progress matches. */ + continue; + } + u_succs.push_back(v); + } + sort(u_succs.begin(), u_succs.end(), + [&](NFAVertex a, NFAVertex b) { + return g[a].char_reach.count() > g[b].char_reach.count(); + }); + for (NFAVertex v : u_succs) { + DEBUG_PRINTF(" using %u as killer\n", g[v].index); + set dead; + for (NFAVertex s : adjacent_vertices_range(v, g)) { + DEBUG_PRINTF(" looking at preds of %u\n", g[s].index); + for (NFAEdge e : in_edges_range(s, g)) { + NFAVertex p = source(e, g); + if (!contains(children, p) || p == v || p == u + || p == g.accept) { + DEBUG_PRINTF("%u not a cand\n", g[p].index); + continue; + } + if (is_any_accept(s, g) && g[p].reports != g[v].reports) { + DEBUG_PRINTF("%u bad reports\n", g[p].index); + continue; + } + if (g[p].char_reach.isSubsetOf(g[v].char_reach)) { + dead.insert(e); + changed = true; + DEBUG_PRINTF("removing edge %u->%u\n", g[p].index, + g[s].index); + } else if (is_subset_of(succs(p, g), succs(u, g))) { + if (is_match_vertex(p, g) + && !is_subset_of(g[p].reports, g[v].reports)) { + continue; + } + DEBUG_PRINTF("updating reach on %u\n", g[p].index); + changed |= (g[p].char_reach & g[v].char_reach).any(); + g[p].char_reach &= ~g[v].char_reach; + } + + } + } + remove_edges(dead, g); + } + + DEBUG_PRINTF("changed %d\n", (int)changed); + return changed; +} + +bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &g, som_type som) { + /* TODO: the reverse form of this is also possible */ + bool changed = false; + for (NFAVertex v : vertices_range(g)) { + if (hasSelfLoop(v, g) && g[v].char_reach.all()) { + changed |= pruneUsingSuccessors(g, v, som); + } + } + + if (changed) { + pruneUseless(g); + clearReports(g); + } + + return changed; +} + } // namespace ue2 diff --git a/src/nfagraph/ng_misc_opt.h b/src/nfagraph/ng_misc_opt.h index 4955c7af..5ed089dc 100644 --- a/src/nfagraph/ng_misc_opt.h +++ b/src/nfagraph/ng_misc_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -72,6 +72,13 @@ std::vector reduced_cr(const NGHolder &g, /** Remove cyclic stars connected to start */ bool mergeCyclicDotStars(NGHolder &g); +/** + * Given a cyclic state 'c' with a broad reach and a later state 'v' that is + * only reachable if c is still on, then any edges to a successor of a direct + * successor of c with reach a superset of v are redundant. + */ +bool prunePathsRedundantWithSuccessorOfCyclics(NGHolder &h, som_type som); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_region.cpp b/src/nfagraph/ng_region.cpp index 124e9fa5..c7472e0d 100644 --- a/src/nfagraph/ng_region.cpp +++ b/src/nfagraph/ng_region.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,7 +71,7 @@ using namespace std; namespace ue2 { typedef ue2::unordered_set BackEdgeSet; -typedef boost::filtered_graph> +typedef boost::filtered_graph> AcyclicGraph; namespace { @@ -454,7 +454,7 @@ ue2::unordered_map assignRegions(const NGHolder &g) { .color_map(make_iterator_property_map( colours.begin(), get(&NFAGraphVertexProps::index, g.g)))); - AcyclicFilter af(&deadEdges); + auto af = make_bad_edge_filter(&deadEdges); AcyclicGraph acyclic_g(g.g, af); // Build a (reverse) topological ordering. diff --git a/src/nfagraph/ng_repeat.cpp b/src/nfagraph/ng_repeat.cpp index 5bff21b0..6eb2a9d7 100644 --- a/src/nfagraph/ng_repeat.cpp +++ b/src/nfagraph/ng_repeat.cpp @@ -138,8 +138,7 @@ void buildTopoOrder(const Graph &g, vector &topoOrder) { depth_first_search(g, visitor(BackEdges(deadEdges)). color_map(make_assoc_property_map(colours))); - AcyclicFilter af(&deadEdges); - boost::filtered_graph > acyclic_g(g, af); + auto acyclic_g = make_filtered_graph(g, make_bad_edge_filter(&deadEdges)); topological_sort(acyclic_g, back_inserter(topoOrder), color_map(make_assoc_property_map(colours))); diff --git a/src/nfagraph/ng_utf8.cpp b/src/nfagraph/ng_utf8.cpp index 719e42e2..352359f2 100644 --- a/src/nfagraph/ng_utf8.cpp +++ b/src/nfagraph/ng_utf8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -187,10 +187,9 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { DEBUG_PRINTF("inspecting %u\n", h[v].index); bool changes = false; - set v_preds; - set v_succs; - pred(h, v, &v_preds); - succ(h, v, &v_succs); + auto v_preds = preds(v, h); + auto v_succs = succs(v, h); + set start_siblings; set end_siblings; @@ -199,8 +198,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { /* We need to find start vertices which have all of our preds. * As we have a self loop, it must be one of our succs. */ for (auto a : adjacent_vertices_range(v, h)) { - set a_preds; - pred(h, a, &a_preds); + auto a_preds = preds(a, h); if (a_preds == v_preds && isutf8start(h[a].char_reach)) { DEBUG_PRINTF("%u is a start v\n", h[a].index); @@ -211,8 +209,7 @@ bool expandCyclic(NGHolder &h, NFAVertex v) { /* We also need to find full cont vertices which have all our own succs; * As we have a self loop, it must be one of our preds. */ for (auto a : inv_adjacent_vertices_range(v, h)) { - set a_succs; - succ(h, a, &a_succs); + auto a_succs = succs(a, h); if (a_succs == v_succs && h[a].char_reach == UTF_CONT_CR) { DEBUG_PRINTF("%u is a full tail cont\n", h[a].index); diff --git a/src/nfagraph/ng_util.cpp b/src/nfagraph/ng_util.cpp index 71eef7eb..de4ca656 100644 --- a/src/nfagraph/ng_util.cpp +++ b/src/nfagraph/ng_util.cpp @@ -403,8 +403,7 @@ vector getTopoOrdering(const NGHolder &g) { colour.begin(), index_map)) .vertex_index_map(index_map)); - AcyclicFilter af(&be.backEdges); - filtered_graph> acyclic_g(g.g, af); + auto acyclic_g = make_filtered_graph(g.g, make_bad_edge_filter(&backEdges)); vector ordering; ordering.reserve(num_verts); @@ -435,9 +434,7 @@ void mustBeSetBefore_int(NFAVertex u, const NGHolder &g, } } - // The AcyclicFilter is badly named, it's really just an edge-set filter. - filtered_graph>> prefix(g.g, - AcyclicFilter>(&dead)); + auto prefix = make_filtered_graph(g.g, make_bad_edge_filter(&dead)); depth_first_visit( prefix, g.start, make_dfs_visitor(boost::null_visitor()), diff --git a/src/nfagraph/ng_util.h b/src/nfagraph/ng_util.h index 6c6907a3..6b5090ce 100644 --- a/src/nfagraph/ng_util.h +++ b/src/nfagraph/ng_util.h @@ -70,6 +70,13 @@ void succ(const NGHolder &g, NFAVertex v, U *s) { s->insert(ai, ae); } +template> +ContTemp succs(NFAVertex u, const NGHolder &g) { + ContTemp rv; + succ(g, u, &rv); + return rv; +} + /** adds predecessors of v to s */ template static really_inline @@ -79,6 +86,13 @@ void pred(const NGHolder &g, NFAVertex v, U *p) { p->insert(it, ite); } +template> +ContTemp preds(NFAVertex u, const NGHolder &g) { + ContTemp rv; + pred(g, u, &rv); + return rv; +} + /** returns a vertex with an out edge from v and is not v. * v must have exactly one out-edge excluding self-loops. * will return NGHolder::null_vertex() if the preconditions don't hold. @@ -88,6 +102,30 @@ NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v); /** Like getSoleDestVertex but for in-edges */ NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v); +/** \brief edge filtered graph. + * + * This will give you a view over the graph that has none of the edges from + * the provided set included. + * + * If this is provided with the back edges of the graph, this will result in an + * acyclic subgraph view. This is useful for topological_sort and other + * algorithms that require a DAG. + */ +template +struct bad_edge_filter { + bad_edge_filter() {} + explicit bad_edge_filter(const EdgeSet *bad_e) : bad_edges(bad_e) {} + bool operator()(const typename EdgeSet::value_type &e) const { + return !contains(*bad_edges, e); /* keep edges not in the bad set */ + } + const EdgeSet *bad_edges = nullptr; +}; + +template +bad_edge_filter make_bad_edge_filter(const EdgeSet *e) { + return bad_edge_filter(e); +} + /** Visitor that records back edges */ template class BackEdges : public boost::default_dfs_visitor { @@ -100,23 +138,6 @@ public: BackEdgeSet &backEdges; }; -/** \brief Acyclic filtered graph. - * - * This will give you a view over the graph that is directed and acyclic: - * useful for topological_sort and other algorithms that require a DAG. - */ -template -struct AcyclicFilter { - AcyclicFilter() {} - explicit AcyclicFilter(const BackEdgeSet *edges) : backEdges(edges) {} - template - bool operator()(const EdgeT &e) const { - // Only keep edges that aren't in the back edge set. - return (backEdges->find(e) == backEdges->end()); - } - const BackEdgeSet *backEdges = nullptr; -}; - /** * Generic code to renumber all the vertices in a graph. Assumes that we're * using a vertex_index property of type u32, and that we always have diff --git a/src/rose/rose_build_convert.cpp b/src/rose/rose_build_convert.cpp index d3fa1ac6..dfc0ed23 100644 --- a/src/rose/rose_build_convert.cpp +++ b/src/rose/rose_build_convert.cpp @@ -733,10 +733,8 @@ bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, u32 repeatCount = 0; NFAVertex hu = h.startDs; - set start_succ; - set startds_succ; - succ(h, h.start, &start_succ); - succ(h, h.startDs, &startds_succ); + auto start_succ = succs>(h.start, h); + auto startds_succ = succs>(h.startDs, h); if (!is_subset_of(start_succ, startds_succ)) { DEBUG_PRINTF("not a simple chain\n"); @@ -790,10 +788,8 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, NFAVertex base = anchored ? h.start : h.startDs; if (!anchored) { - set start_succ; - set startds_succ; - succ(h, h.start, &start_succ); - succ(h, h.startDs, &startds_succ); + auto start_succ = succs>(h.start, h); + auto startds_succ = succs>(h.startDs, h); if (!is_subset_of(start_succ, startds_succ)) { DEBUG_PRINTF("not a simple chain\n"); @@ -852,8 +848,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, exits = exits_and_repeat_verts; erase_all(&exits, rep_verts); - set base_succ; - succ(h, base, &base_succ); + auto base_succ = succs>(base, h); base_succ.erase(h.startDs); if (is_subset_of(base_succ, rep_verts)) { diff --git a/src/util/graph.h b/src/util/graph.h index 90589f14..d15e77aa 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -291,6 +291,22 @@ bool is_dag(const Graph &g, bool ignore_self_loops = false) { return true; } +template +class vertex_recorder : public boost::default_dfs_visitor { +public: + explicit vertex_recorder(Cont &o) : out(o) {} + template + void discover_vertex(typename Cont::value_type v, const G &) { + out.insert(v); + } + Cont &out; +}; + +template +vertex_recorder make_vertex_recorder(Cont &o) { + return vertex_recorder(o); +} + template std::pair add_edge_if_not_present(typename Graph::vertex_descriptor u,