From 06cde4c94dfa7524b8d7eeb75ac1b446f5fea9eb Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 29 Nov 2016 14:49:01 +1100 Subject: [PATCH] ng_literal_analysis: use ue2_graph This reduces compile time ~10% on a number of large cases. --- src/nfagraph/ng_literal_analysis.cpp | 160 +++++++++++++-------------- 1 file changed, 76 insertions(+), 84 deletions(-) diff --git a/src/nfagraph/ng_literal_analysis.cpp b/src/nfagraph/ng_literal_analysis.cpp index 68c1bdd6..a5f3468b 100644 --- a/src/nfagraph/ng_literal_analysis.cpp +++ b/src/nfagraph/ng_literal_analysis.cpp @@ -40,17 +40,16 @@ #include "util/depth.h" #include "util/graph.h" #include "util/graph_range.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include #include #include -#include #include using namespace std; -using boost::vertex_index; namespace ue2 { @@ -65,24 +64,29 @@ namespace { /* Small literal graph type used for the suffix tree used in * compressAndScore. */ - struct LitGraphVertexProps { - LitGraphVertexProps() {} - explicit LitGraphVertexProps(const ue2_literal::elem &c_in) : c(c_in) {} + LitGraphVertexProps() = default; + explicit LitGraphVertexProps(ue2_literal::elem c_in) : c(move(c_in)) {} ue2_literal::elem c; // string element (char + bool) + size_t index; // managed by ue2_graph }; struct LitGraphEdgeProps { - LitGraphEdgeProps() {} + LitGraphEdgeProps() = default; explicit LitGraphEdgeProps(u64a score_in) : score(score_in) {} u64a score = NO_LITERAL_AT_EDGE_SCORE; - size_t index; /* only initialised when the reverse edges are added. */ + size_t index; // managed by ue2_graph +}; + +struct LitGraph + : public ue2_graph { + + LitGraph() : root(add_vertex(*this)), sink(add_vertex(*this)) {} + + const vertex_descriptor root; + const vertex_descriptor sink; }; -/* keep edgeList = listS as you cannot remove edges if edgeList = vecS */ -typedef boost::adjacency_list LitGraph; typedef LitGraph::vertex_descriptor LitVertex; typedef LitGraph::edge_descriptor LitEdge; @@ -95,17 +99,16 @@ typedef std::queue LitVertexQ; /** \brief Dump the literal graph in Graphviz format. */ static UNUSED -void dumpGraph(const char *filename, const LitGraph &lg, const LitVertex &root, - const LitVertex &sink) { +void dumpGraph(const char *filename, const LitGraph &lg) { ofstream fout(filename); fout << "digraph G {" << endl; for (auto v : vertices_range(lg)) { - fout << boost::get(vertex_index, lg, v); - if (v == root) { + fout << lg[v].index; + if (v == lg.root) { fout << "[label=\"ROOT\"];"; - } else if (v == sink) { + } else if (v == lg.sink) { fout << "[label=\"SINK\"];"; } else { ue2_literal s; @@ -117,10 +120,9 @@ void dumpGraph(const char *filename, const LitGraph &lg, const LitVertex &root, for (const auto &e : edges_range(lg)) { LitVertex u = source(e, lg), v = target(e, lg); - fout << boost::get(vertex_index, lg, u) << " -> " << - boost::get(vertex_index, lg, v) << - "[label=\"" << lg[e].score << "\"]" << - ";" << endl; + fout << lg[u].index << " -> " << lg[v].index << "[label=\"" + << lg[e].score << "\"]" + << ";" << endl; } fout << "}" << endl; @@ -142,11 +144,11 @@ bool allowExpand(size_t numItems, size_t totalPathsSoFar) { } static -LitVertex addToLitGraph(LitGraph &lg, LitVertex sink, - LitVertex pred, const ue2_literal::elem &c) { +LitVertex addToLitGraph(LitGraph &lg, LitVertex pred, + const ue2_literal::elem &c) { // Check if we already have this in the graph. for (auto v : adjacent_vertices_range(pred, lg)) { - if (v == sink) { + if (v == lg.sink) { continue; } if (lg[v].c == c) { @@ -160,9 +162,10 @@ LitVertex addToLitGraph(LitGraph &lg, LitVertex sink, } static -void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex sink, - LitVertex pred, const CharReach &cr, NFAVertex v) { - for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) { +void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex pred, + const CharReach &cr, NFAVertex v) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -170,14 +173,14 @@ void addToQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex sink, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lv = addToLitGraph(lg, sink, pred, c); + LitVertex lv = addToLitGraph(lg, pred, c); workQ.push(VertexPair(lv, v)); } } static -void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex root, - LitVertex sink, const NGHolder &g, const NFAEdge &e) { +void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, const NGHolder &g, + const NFAEdge &e) { NFAVertex u = source(e, g); NFAVertex v = target(e, g); const CharReach &cr = g[v].char_reach; @@ -186,7 +189,7 @@ void initWorkQueue(LitVertexQ &workQ, LitGraph &lg, LitVertex root, return; } - addToQueue(workQ, lg, sink, root, cr, u); + addToQueue(workQ, lg, lg.root, cr, u); } static @@ -198,7 +201,8 @@ u32 crCardinality(const CharReach &cr) { } u32 rv = 0; - for (size_t i = cr.find_first(); i != CharReach::npos; i = cr.find_next(i)) { + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { if (myisupper(i) && cr.test(mytolower(i))) { // ignore upper half of a nocase pair continue; @@ -213,10 +217,10 @@ u32 crCardinality(const CharReach &cr) { * identifying vertices connected to the sink and removing their other * out-edges. */ static -void filterLitGraph(LitGraph &lg, const LitVertex sink) { - for (auto v : inv_adjacent_vertices_range(sink, lg)) { - remove_out_edge_if(v, [&lg, &sink](const LitEdge &e) { - return target(e, lg) != sink; +void filterLitGraph(LitGraph &lg) { + for (auto v : inv_adjacent_vertices_range(lg.sink, lg)) { + remove_out_edge_if(v, [&lg](const LitEdge &e) { + return target(e, lg) != lg.sink; }, lg); } @@ -229,13 +233,12 @@ void filterLitGraph(LitGraph &lg, const LitVertex sink) { * from each predecessor of the sink (note: it's a suffix tree except for this * convenience) towards the source, storing each string as we go. */ static -void extractLiterals(const LitGraph &lg, const LitVertex root, - const LitVertex sink, set &s) { +void extractLiterals(const LitGraph &lg, set &s) { ue2_literal lit; - for (auto u : inv_adjacent_vertices_range(sink, lg)) { + for (auto u : inv_adjacent_vertices_range(lg.sink, lg)) { lit.clear(); - while (u != root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) <= 1); LitGraph::inv_adjacency_iterator ai2, ae2; @@ -277,11 +280,9 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, } LitGraph lg; - LitVertex root = add_vertex(lg); - LitVertex sink = add_vertex(lg); LitVertexQ workQ; - initWorkQueue(workQ, lg, root, sink, g, e); + initWorkQueue(workQ, lg, g, e); while (!workQ.empty()) { const LitVertex lv = workQ.front().first; @@ -290,18 +291,18 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, u32 cr_card = crCardinality(cr); size_t numItems = cr_card * in_degree(t, g); - size_t committed_count = workQ.size() + in_degree(sink, lg) - 1; + size_t committed_count = workQ.size() + in_degree(lg.sink, lg) - 1; if (g[t].index == NODE_START) { // reached start, add to literal set - add_edge_if_not_present(lv, sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); goto next_work_elem; } // Expand next vertex if (allowExpand(numItems, committed_count)) { for (auto u : inv_adjacent_vertices_range(t, g)) { - addToQueue(workQ, lg, sink, lv, cr, u); + addToQueue(workQ, lg, lv, cr, u); } goto next_work_elem; } @@ -317,21 +318,21 @@ void processWorkQueue(const NGHolder &g, const NFAEdge &e, bool nocase = myislower(i) && cr.test(mytoupper(i)); ue2_literal::elem c((char)i, nocase); - LitVertex lt = addToLitGraph(lg, sink, lv, c); - add_edge_if_not_present(lt, sink, lg); + LitVertex lt = addToLitGraph(lg, lv, c); + add_edge_if_not_present(lt, lg.sink, lg); } goto next_work_elem; } // add to literal set - add_edge_if_not_present(lv, sink, lg); + add_edge_if_not_present(lv, lg.sink, lg); next_work_elem: workQ.pop(); } - filterLitGraph(lg, sink); - //dumpGraph("litgraph.dot", lg, root, sink); - extractLiterals(lg, root, sink, s); + filterLitGraph(lg); + //dumpGraph("litgraph.dot", lg); + extractLiterals(lg, s); // Our literal set should contain no literal that is a suffix of another. assert(!hasSuffixLiterals(s)); @@ -410,16 +411,15 @@ u64a calculateScore(const ue2_literal &s) { /** Adds a literal in reverse order, building up a suffix tree. */ static -void addReversedLiteral(const ue2_literal &lit, LitGraph &lg, - const LitVertex &root, const LitVertex &sink) { +void addReversedLiteral(const ue2_literal &lit, LitGraph &lg) { DEBUG_PRINTF("literal: '%s'\n", escapeString(lit).c_str()); ue2_literal suffix; - LitVertex v = root; + LitVertex v = lg.root; for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { suffix.push_back(*it); LitVertex w; for (auto v2 : adjacent_vertices_range(v, lg)) { - if (v2 != sink && lg[v2].c == *it) { + if (v2 != lg.sink && lg[v2].c == *it) { w = v2; goto next_char; } @@ -431,17 +431,18 @@ next_char: } // Wire the last vertex to the sink. - add_edge(v, sink, lg); + add_edge(v, lg.sink, lg); } static void extractLiterals(const vector &cutset, const LitGraph &lg, - const LitVertex &root, set &s) { + set &s) { for (const auto &e : cutset) { - LitVertex u = source(e, lg), v = target(e, lg); + LitVertex u = source(e, lg); + LitVertex v = target(e, lg); ue2_literal lit; lit.push_back(lg[v].c); - while (u != root) { + while (u != lg.root) { lit.push_back(lg[u].c); assert(in_degree(u, lg) == 1); LitGraph::inv_adjacency_iterator ai, ae; @@ -488,10 +489,7 @@ const char *describeColor(boost::default_color_type c) { static vector add_reverse_edges_and_index(LitGraph &lg) { vector fwd_edges; - - size_t next_index = 0; for (const auto &e : edges_range(lg)) { - lg[e].index = next_index++; fwd_edges.push_back(e); } @@ -503,9 +501,7 @@ vector add_reverse_edges_and_index(LitGraph &lg) { assert(!edge(v, u, lg).second); - LitEdge rev = add_edge(v, u, lg).first; - lg[rev].score = 0; - lg[rev].index = next_index++; + LitEdge rev = add_edge(v, u, LitGraphEdgeProps(0), lg).first; rev_map[lg[e].index] = rev; rev_map[lg[rev].index] = e; } @@ -514,20 +510,19 @@ vector add_reverse_edges_and_index(LitGraph &lg) { } static -void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, - vector &cutset) { +void findMinCut(LitGraph &lg, vector &cutset) { cutset.clear(); - //dumpGraph("litgraph.dot", lg, root, sink); + //dumpGraph("litgraph.dot", lg); - assert(!in_degree(root, lg)); - assert(!out_degree(sink, lg)); + assert(!in_degree(lg.root, lg)); + assert(!out_degree(lg.sink, lg)); size_t num_real_edges = num_edges(lg); // Add reverse edges for the convenience of the BGL's max flow algorithm. vector rev_edges = add_reverse_edges_and_index(lg); - const auto v_index_map = get(vertex_index, lg); + const auto v_index_map = get(&LitGraphVertexProps::index, lg); const auto e_index_map = get(&LitGraphEdgeProps::index, lg); const size_t num_verts = num_vertices(lg); vector colors(num_verts); @@ -542,7 +537,7 @@ void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, make_iterator_property_map(predecessors.begin(), v_index_map), make_iterator_property_map(colors.begin(), v_index_map), make_iterator_property_map(distances.begin(), v_index_map), - v_index_map, root, sink); + v_index_map, lg.root, lg.sink); DEBUG_PRINTF("done, flow = %llu\n", flow); /* remove reverse edges */ @@ -555,21 +550,20 @@ void findMinCut(LitGraph &lg, const LitVertex &root, const LitVertex &sink, for (const auto &e : edges_range(lg)) { const LitVertex u = source(e, lg), v = target(e, lg); - const auto ucolor = colors[boost::get(vertex_index, lg, u)]; - const auto vcolor = colors[boost::get(vertex_index, lg, v)]; + const auto ucolor = colors[lg[u].index]; + const auto vcolor = colors[lg[v].index]; - DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", - boost::get(vertex_index, lg, u), describeColor(ucolor), - boost::get(vertex_index, lg, v), describeColor(vcolor), + DEBUG_PRINTF("edge %zu:%s -> %zu:%s score %llu\n", lg[u].index, + describeColor(ucolor), lg[v].index, describeColor(vcolor), lg[e].score); if (ucolor != boost::white_color && vcolor == boost::white_color) { - assert(target(e, lg) != sink); + assert(v != lg.sink); white_cut.push_back(e); white_flow += lg[e].score; } if (ucolor == boost::black_color && vcolor != boost::black_color) { - assert(target(e, lg) != sink); + assert(v != lg.sink); black_cut.push_back(e); black_flow += lg[e].score; } @@ -609,21 +603,19 @@ u64a compressAndScore(set &s) { initialScore); LitGraph lg; - const LitVertex root = add_vertex(lg); - const LitVertex sink = add_vertex(lg); for (const auto &lit : s) { - addReversedLiteral(lit, lg, root, sink); + addReversedLiteral(lit, lg); } DEBUG_PRINTF("suffix tree has %zu vertices and %zu edges\n", num_vertices(lg), num_edges(lg)); vector cutset; - findMinCut(lg, root, sink, cutset); + findMinCut(lg, cutset); s.clear(); - extractLiterals(cutset, lg, root, s); + extractLiterals(cutset, lg, s); u64a score = scoreSet(s); DEBUG_PRINTF("compressed score is %llu\n", score);