/* * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** \file * \brief Region analysis. * * Definition: a \a region is a subset of vertices in a graph such that: * - the edges entering the region are a cutset of the graph * - for every in-edge (u, v) to the region there exist edges (u, w) for all * w in {w : w in region and w has an in-edge} * - the regions in a graph partition the graph * * Note: * - we partition a graph into the maximal number of regions * - similar properties for exit edges should hold as a consequence * - graph == sequence of regions * - a region is considered to have an epsilon vertex to allow jumps * - vertices which only lead to back edges need to be floated up in the topo * order * * Algorithm overview: * -# topo-order over the DAG skeleton; * -# incrementally add vertices to the current region until the boundary edges * form a valid cut-set; * -# for each back-edge, if the source and target are in different regions, * merge the regions (and all intervening regions) into a common region. */ #include "ng_region.h" #include "ng_holder.h" #include "ng_util.h" #include "ue2common.h" #include "util/container.h" #include "util/ue2_containers.h" #include "util/graph_range.h" #include #include #include #include #include using namespace std; namespace ue2 { typedef ue2::unordered_set BackEdgeSet; typedef boost::filtered_graph> AcyclicGraph; namespace { struct exit_info { explicit exit_info(NFAVertex v) : exit(v) {} NFAVertex exit; flat_set open; }; } static void checkAndAddExitCandidate(const AcyclicGraph &g, const ue2::unordered_set &r, NFAVertex v, vector &exits) { exit_info v_exit(v); auto &open = v_exit.open; /* find the set of vertices reachable from v which are not in r */ for (auto w : adjacent_vertices_range(v, g)) { if (!contains(r, w)) { open.insert(w); } } if (!open.empty()) { DEBUG_PRINTF("exit %zu\n", g[v].index); exits.push_back(move(v_exit)); } } static vector findExits(const AcyclicGraph &g, const ue2::unordered_set &r) { vector exits; for (auto v : r) { checkAndAddExitCandidate(g, r, v, exits); } return exits; } static void refineExits(const AcyclicGraph &g, const ue2::unordered_set &r, NFAVertex new_v, vector &exits) { /* new_v is no long an open edge */ for (auto &exit : exits) { exit.open.erase(new_v); } /* no open edges: no longer an exit */ exits.erase( remove_if(exits.begin(), exits.end(), [&](const exit_info &exit) { return exit.open.empty(); }), exits.end()); checkAndAddExitCandidate(g, r, new_v, exits); } /** the set of exits from a candidate region are valid if: FIXME: document */ static bool exitValid(UNUSED const AcyclicGraph &g, const vector &exits, const flat_set &open_jumps) { if (exits.empty() || (exits.size() < 2 && open_jumps.empty())) { return true; } if (exits.size() == 1 && open_jumps.size() == 1) { DEBUG_PRINTF("oj %zu, e %zu\n", g[*open_jumps.begin()].index, g[exits[0].exit].index); if (*open_jumps.begin() == exits[0].exit) { return true; } } assert(!exits.empty()); const auto &enters = exits.front().open; if (!open_jumps.empty() && enters != open_jumps) { return false; } for (auto it = begin(exits) + 1; it != end(exits); ++it) { if (it->open != enters) { return false; } } return true; } static void setRegion(const ue2::unordered_set &r, u32 rid, ue2::unordered_map ®ions) { for (auto v : r) { regions[v] = rid; } } static void buildInitialCandidate(const AcyclicGraph &g, vector::const_reverse_iterator &it, const vector::const_reverse_iterator &ite, ue2::unordered_set *candidate, /* in exits of prev region; * out exits from candidate */ vector *exits, flat_set *open_jumps) { if (it == ite) { candidate->clear(); exits->clear(); return; } if (exits->empty()) { DEBUG_PRINTF("odd\n"); candidate->clear(); DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate->insert(*it); open_jumps->erase(*it); checkAndAddExitCandidate(g, *candidate, *it, *exits); ++it; return; } auto enters = (*exits)[0].open; // copy candidate->clear(); for (; it != ite; ++it) { DEBUG_PRINTF("adding %zu to initial\n", g[*it].index); candidate->insert(*it); if (contains(enters, *it)) { break; } } if (it != ite) { enters.erase(*it); *open_jumps = move(enters); DEBUG_PRINTF("oj size = %zu\n", open_jumps->size()); ++it; } else { open_jumps->clear(); } *exits = findExits(g, *candidate); } static void findDagLeaders(const NGHolder &h, const AcyclicGraph &g, const vector &topo, ue2::unordered_map ®ions) { assert(!topo.empty()); u32 curr_id = 0; vector::const_reverse_iterator t_it = topo.rbegin(); ue2::unordered_set candidate; flat_set open_jumps; DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); DEBUG_PRINTF("adding %zu to current\n", g[*t_it].index); assert(t_it != topo.rend()); candidate.insert(*t_it++); auto exits = findExits(g, candidate); while (t_it != topo.rend()) { assert(!candidate.empty()); if (exitValid(g, exits, open_jumps)) { if (contains(candidate, h.accept) && !open_jumps.empty()) { /* we have tried to make an optional region containing accept as * we have an open jump to eod. This candidate region needs to * be put in with the previous region. */ curr_id--; DEBUG_PRINTF("merging in with region %u\n", curr_id); } else { DEBUG_PRINTF("setting region %u\n", curr_id); } setRegion(candidate, curr_id++, regions); buildInitialCandidate(g, t_it, topo.rend(), &candidate, &exits, &open_jumps); } else { NFAVertex curr = *t_it; DEBUG_PRINTF("adding %zu to current\n", g[curr].index); candidate.insert(curr); open_jumps.erase(curr); refineExits(g, candidate, *t_it, exits); DEBUG_PRINTF(" open jumps %zu exits %zu\n", open_jumps.size(), exits.size()); ++t_it; } } /* assert exits valid */ setRegion(candidate, curr_id, regions); } static void mergeUnderBackEdges(const NGHolder &g, const vector &topo, const BackEdgeSet &backEdges, ue2::unordered_map ®ions) { for (const auto &e : backEdges) { NFAVertex u = source(e, g); NFAVertex v = target(e, g); u32 ru = regions[u]; u32 rv = regions[v]; if (ru == rv) { continue; } DEBUG_PRINTF("merging v = %zu(%u), u = %zu(%u)\n", g[v].index, rv, g[u].index, ru); assert(rv < ru); for (auto t : topo) { u32 r = regions[t]; if (r <= ru && r > rv) { regions[t] = rv; } else if (r > ru) { regions[t] = rv + r - ru; } } } } static void reorderSpecials(const NGHolder &w, const AcyclicGraph &acyclic_g, vector &topoOrder) { // Start is last element of reverse topo ordering. auto it = find(topoOrder.begin(), topoOrder.end(), w.start); if (it != topoOrder.end() - 1) { DEBUG_PRINTF("repositioning start\n"); assert(it != topoOrder.end()); topoOrder.erase(it); topoOrder.insert(topoOrder.end(), w.start); } // StartDs is second-to-last element of reverse topo ordering. it = find(topoOrder.begin(), topoOrder.end(), w.startDs); if (it != topoOrder.end() - 2) { DEBUG_PRINTF("repositioning start ds\n"); assert(it != topoOrder.end()); topoOrder.erase(it); topoOrder.insert(topoOrder.end() - 1, w.startDs); } // AcceptEOD is first element of reverse topo ordering. it = find(topoOrder.begin(), topoOrder.end(), w.acceptEod); if (it != topoOrder.begin()) { DEBUG_PRINTF("repositioning accept\n"); assert(it != topoOrder.end()); topoOrder.erase(it); topoOrder.insert(topoOrder.begin(), w.acceptEod); } // Accept is second element of reverse topo ordering, if it's connected. it = find(topoOrder.begin(), topoOrder.end(), w.accept); if (it != topoOrder.begin() + 1) { DEBUG_PRINTF("repositioning accept\n"); assert(it != topoOrder.end()); topoOrder.erase(it); if (in_degree(w.accept, acyclic_g) != 0) { topoOrder.insert(topoOrder.begin() + 1, w.accept); } } } static void liftSinks(const AcyclicGraph &acyclic_g, vector &topoOrder) { ue2::unordered_set sinks; for (auto v : vertices_range(acyclic_g)) { if (is_special(v, acyclic_g)) { continue; } if (isLeafNode(v, acyclic_g)) { DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); sinks.insert(NFAVertex(v)); } } if (sinks.empty()) { DEBUG_PRINTF("no sinks found\n"); return; } bool changed; do { DEBUG_PRINTF("look\n"); changed = false; for (auto v : vertices_range(acyclic_g)) { if (is_special(v, acyclic_g) || contains(sinks, NFAVertex(v))) { continue; } for (auto w : adjacent_vertices_range(v, acyclic_g)) { if (!contains(sinks, NFAVertex(w))) { goto next; } } DEBUG_PRINTF("sink found %zu\n", acyclic_g[v].index); sinks.insert(NFAVertex(v)); changed = true; next:; } } while (changed); for (auto ri = topoOrder.rbegin() + 1; ri != topoOrder.rend(); ++ri) { if (!contains(sinks, *ri)) { continue; } NFAVertex s = *ri; DEBUG_PRINTF("handling sink %zu\n", acyclic_g[s].index); ue2::unordered_set parents; for (const auto &e : in_edges_range(s, acyclic_g)) { parents.insert(NFAVertex(source(e, acyclic_g))); } /* vertex has no children not reachable on a back edge, bubble the * vertex up the topo order to be near its parents */ vector::reverse_iterator rj = ri; --rj; while (rj != topoOrder.rbegin() && !contains(parents, *rj)) { /* sink is in rj + 1 */ assert(*(rj + 1) == s); DEBUG_PRINTF("lifting\n"); using std::swap; swap(*rj, *(rj + 1)); --rj; } } } /** Build a reverse topo ordering (with only the specials that are in use). We * also want to ensure vertices which only lead to back edges are placed near * their parents. */ static vector buildTopoOrder(const NGHolder &w, const AcyclicGraph &acyclic_g, vector &colours) { vector topoOrder; topoOrder.reserve(num_vertices(w)); topological_sort(acyclic_g, back_inserter(topoOrder), color_map(make_iterator_property_map(colours.begin(), get(vertex_index, acyclic_g)))); reorderSpecials(w, acyclic_g, topoOrder); if (topoOrder.empty()) { return topoOrder; } liftSinks(acyclic_g, topoOrder); DEBUG_PRINTF("TOPO ORDER\n"); for (auto ri = topoOrder.rbegin(); ri != topoOrder.rend(); ++ri) { DEBUG_PRINTF("[%zu]\n", acyclic_g[*ri].index); } DEBUG_PRINTF("----------\n"); return topoOrder; } ue2::unordered_map assignRegions(const NGHolder &g) { assert(hasCorrectlyNumberedVertices(g)); const u32 numVertices = num_vertices(g); DEBUG_PRINTF("assigning regions for %u vertices in holder\n", numVertices); vector colours(numVertices); // Build an acyclic graph for this NGHolder. BackEdgeSet deadEdges; depth_first_search(g, visitor(BackEdges(deadEdges)) .root_vertex(g.start) .color_map(make_iterator_property_map(colours.begin(), get(vertex_index, g)))); auto af = make_bad_edge_filter(&deadEdges); AcyclicGraph acyclic_g(g, af); // Build a (reverse) topological ordering. vector topoOrder = buildTopoOrder(g, acyclic_g, colours); // Everybody starts in region 0. ue2::unordered_map regions; regions.reserve(numVertices); for (auto v : vertices_range(g)) { regions.emplace(v, 0); } findDagLeaders(g, acyclic_g, topoOrder, regions); mergeUnderBackEdges(g, topoOrder, deadEdges, regions); return regions; } } // namespace ue2