diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 6bd76381..04d5e7d0 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -118,303 +118,6 @@ size_t small_rose_threshold(const CompileContext &cc) { : SMALL_ROSE_THRESHOLD_BLOCK; } -namespace { -struct DupeLeafKey { - explicit DupeLeafKey(const RoseVertexProps &litv) - : literals(litv.literals), reports(litv.reports), - eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left), - som_adjust(litv.som_adjust) { - DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept); - DEBUG_PRINTF("report %u\n", left.leftfix_report); - DEBUG_PRINTF("lag %u\n", left.lag); - } - - bool operator<(const DupeLeafKey &b) const { - const DupeLeafKey &a = *this; - ORDER_CHECK(literals); - ORDER_CHECK(eod_accept); - ORDER_CHECK(suffix); - ORDER_CHECK(reports); - ORDER_CHECK(som_adjust); - ORDER_CHECK(left.leftfix_report); - ORDER_CHECK(left.lag); - return false; - } - - flat_set literals; - flat_set reports; - bool eod_accept; - suffix_id suffix; - LeftEngInfo left; - u32 som_adjust; -}; - -struct UncalcLeafKey { - UncalcLeafKey(const RoseGraph &g, RoseVertex v) - : literals(g[v].literals), rose(g[v].left) { - for (const auto &e : in_edges_range(v, g)) { - RoseVertex u = source(e, g); - preds.insert(make_pair(u, g[e])); - } - } - - bool operator<(const UncalcLeafKey &b) const { - const UncalcLeafKey &a = *this; - ORDER_CHECK(literals); - ORDER_CHECK(preds); - ORDER_CHECK(rose); - return false; - } - - flat_set literals; - flat_set> preds; - LeftEngInfo rose; -}; -} // namespace - -/** - * This function merges leaf vertices with the same literals and report - * id/suffix. The leaf vertices of the graph are inspected and a mapping of - * leaf vertex properties to vertices is built. If the same set of leaf - * properties has already been seen when we inspect a vertex, we attempt to - * merge the vertex in with the previously seen vertex. This process can fail - * if the vertices share a common predecessor vertex but have a differing, - * incompatible relationship (different bounds or infix) with the predecessor. - * - * This takes place after \ref dedupeSuffixes to increase effectiveness as the - * same suffix is required for a merge to occur. - */ -void mergeDupeLeaves(RoseBuildImpl &tbi) { - map leaves; - vector changed; - - RoseGraph &g = tbi.g; - for (auto v : vertices_range(g)) { - if (in_degree(v, g) == 0) { - assert(tbi.isAnyStart(v)); - continue; - } - - DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu " - "out_degree %zu\n", g[v].index, in_degree(v, g), - out_degree(v, g)); - - // Vertex must be a reporting leaf node - if (g[v].reports.empty() || !isLeafNode(v, g)) { - continue; - } - - // At the moment, we ignore all successors of root or anchored_root, - // since many parts of our runtime assume that these have in-degree 1. - if (tbi.isRootSuccessor(v)) { - continue; - } - - DupeLeafKey dupe(g[v]); - if (leaves.find(dupe) == leaves.end()) { - leaves.insert(make_pair(dupe, v)); - continue; - } - - RoseVertex t = leaves.find(dupe)->second; - DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index, - g[t].index); - - vector deadEdges; - for (const auto &e : in_edges_range(v, g)) { - RoseVertex u = source(e, g); - DEBUG_PRINTF("u index=%zu\n", g[u].index); - if (RoseEdge et = edge(u, t, g)) { - if (g[et].minBound <= g[e].minBound - && g[et].maxBound >= g[e].maxBound) { - DEBUG_PRINTF("remove more constrained edge\n"); - deadEdges.push_back(e); - } - } else { - DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index, - g[t].index); - add_edge(u, t, g[e], g); - deadEdges.push_back(e); - } - } - - if (!deadEdges.empty()) { - for (auto &e : deadEdges) { - remove_edge(e, g); - } - changed.push_back(v); - g[t].min_offset = min(g[t].min_offset, g[v].min_offset); - g[t].max_offset = max(g[t].max_offset, g[v].max_offset); - } - } - DEBUG_PRINTF("find loop done\n"); - - // Remove any vertices that now have no in-edges. - size_t countRemovals = 0; - for (size_t i = 0; i < changed.size(); i++) { - RoseVertex v = changed[i]; - if (in_degree(v, g) == 0) { - DEBUG_PRINTF("remove vertex\n"); - if (!tbi.isVirtualVertex(v)) { - for (u32 lit_id : g[v].literals) { - tbi.literal_info[lit_id].vertices.erase(v); - } - } - remove_vertex(v, g); - countRemovals++; - } - } - - // if we've removed anything, we need to renumber vertices - if (countRemovals) { - renumber_vertices(g); - DEBUG_PRINTF("removed %zu vertices.\n", countRemovals); - } -} - -/** Merges the suffixes on the (identical) vertices in \a vcluster, used by - * \ref uncalcLeaves. */ -static -void mergeCluster(RoseGraph &g, const ReportManager &rm, - const vector &vcluster, - vector &dead, const CompileContext &cc) { - if (vcluster.size() <= 1) { - return; // No merge to perform. - } - - // Note that we batch merges up fairly crudely for performance reasons. - vector::const_iterator it = vcluster.begin(), it2; - while (it != vcluster.end()) { - vector cluster; - map rev; - - for (it2 = it; - it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX; - ++it2) { - RoseVertex v = *it2; - NGHolder *h = g[v].suffix.graph.get(); - assert(!g[v].suffix.haig); /* should not be here if haig */ - rev[h] = v; - cluster.push_back(h); - } - it = it2; - - DEBUG_PRINTF("merging cluster %zu\n", cluster.size()); - auto merged = mergeNfaCluster(cluster, &rm, cc); - DEBUG_PRINTF("done\n"); - - for (const auto &m : merged) { - NGHolder *h_victim = m.first; // mergee - NGHolder *h_winner = m.second; - RoseVertex victim = rev[h_victim]; - RoseVertex winner = rev[h_winner]; - - LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset); - ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset); - insert(&g[winner].reports, g[victim].reports); - - dead.push_back(victim); - } - } -} - -static -void findUncalcLeavesCandidates(RoseBuildImpl &tbi, - map > &clusters, - deque &ordered) { - const RoseGraph &g = tbi.g; - - vector suffix_vertices; // vertices with suffix graphs - unordered_map fcount; // ref count per graph - - for (auto v : vertices_range(g)) { - if (g[v].suffix) { - if (!g[v].suffix.graph) { - continue; /* cannot uncalc (haig/mcclellan); TODO */ - } - - assert(g[v].suffix.graph->kind == NFA_SUFFIX); - - // Ref count all suffixes, as we don't want to merge a suffix - // that happens to be shared with a non-leaf vertex somewhere. - DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index, - g[v].suffix.graph.get()); - fcount[g[v].suffix.graph.get()]++; - - // Vertex must be a reporting pseudo accept - if (!isLeafNode(v, g)) { - continue; - } - - suffix_vertices.push_back(v); - } - } - - for (auto v : suffix_vertices) { - if (in_degree(v, g) == 0) { - assert(tbi.isAnyStart(v)); - continue; - } - - const NGHolder *h = g[v].suffix.graph.get(); - assert(h); - DEBUG_PRINTF("suffix %p\n", h); - - // We can't easily merge suffixes shared with other vertices, and - // creating a unique copy to do so may just mean we end up tracking - // more NFAs. Better to leave shared suffixes alone. - if (fcount[h] != 1) { - DEBUG_PRINTF("skipping shared suffix\n"); - continue; - } - - UncalcLeafKey key(g, v); - vector &vec = clusters[key]; - if (vec.empty()) { - - ordered.push_back(key); - } - vec.push_back(v); - } - - DEBUG_PRINTF("find loop done\n"); -} - -/** - * This function attempts to combine identical roles (same literals, same - * predecessors, etc) with different suffixes into a single role which - * activates a larger suffix. The leaf vertices of the graph with a suffix are - * grouped into clusters which have members triggered by identical roles. The - * \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised - * to build a set of larger (and still implementable) suffixes. The graph is - * then updated to point to the new suffixes and any unneeded roles are - * removed. - * - * Note: suffixes which are shared amongst multiple roles are not considered - * for this pass as the individual suffixes would have to continue to exist for - * the other roles to trigger resulting in the transformation not producing any - * savings. - * - * Note: as \ref mergeNfaCluster is slow when the cluster sizes are large, - * clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller - * chunks for processing. - */ -void uncalcLeaves(RoseBuildImpl &tbi) { - DEBUG_PRINTF("uncalcing\n"); - - map > clusters; - deque ordered; - findUncalcLeavesCandidates(tbi, clusters, ordered); - - vector dead; - - for (const auto &key : ordered) { - DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size()); - mergeCluster(tbi.g, tbi.rm, clusters[key], dead, tbi.cc); - } - tbi.removeVertices(dead); -} - /** * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that * reports should not contribute to the hash. diff --git a/src/rose/rose_build_merge.h b/src/rose/rose_build_merge.h index 0f765bff..6de6c778 100644 --- a/src/rose/rose_build_merge.h +++ b/src/rose/rose_build_merge.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,8 +27,8 @@ */ /** \file - * \brief Rose Build: functions for reducing the size of the Rose graph - * through merging. + * \brief Rose Build: functions for reducing the number of engines in a Rose + * graph through merging or deduplicating engines. */ #ifndef ROSE_BUILD_MERGE_H @@ -44,9 +44,6 @@ namespace ue2 { class NGHolder; class RoseBuildImpl; -void mergeDupeLeaves(RoseBuildImpl &tbi); -void uncalcLeaves(RoseBuildImpl &tbi); - bool dedupeLeftfixes(RoseBuildImpl &tbi); void mergeLeftfixesVariableLag(RoseBuildImpl &tbi); void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi); diff --git a/src/rose/rose_build_role_aliasing.cpp b/src/rose/rose_build_role_aliasing.cpp index ba71a3ea..22581caf 100644 --- a/src/rose/rose_build_role_aliasing.cpp +++ b/src/rose/rose_build_role_aliasing.cpp @@ -62,6 +62,8 @@ using boost::adaptors::map_values; namespace ue2 { +static constexpr size_t MERGE_GROUP_SIZE_MAX = 200; + namespace { // Used for checking edge sets (both in- and out-) against each other. struct EdgeAndVertex { @@ -2026,4 +2028,304 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { assert(canImplementGraphs(build)); } +namespace { +struct DupeLeafKey { + explicit DupeLeafKey(const RoseVertexProps &litv) + : literals(litv.literals), reports(litv.reports), + eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left), + som_adjust(litv.som_adjust) { + DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept); + DEBUG_PRINTF("report %u\n", left.leftfix_report); + DEBUG_PRINTF("lag %u\n", left.lag); + } + + bool operator<(const DupeLeafKey &b) const { + const DupeLeafKey &a = *this; + ORDER_CHECK(literals); + ORDER_CHECK(eod_accept); + ORDER_CHECK(suffix); + ORDER_CHECK(reports); + ORDER_CHECK(som_adjust); + ORDER_CHECK(left.leftfix_report); + ORDER_CHECK(left.lag); + return false; + } + + flat_set literals; + flat_set reports; + bool eod_accept; + suffix_id suffix; + LeftEngInfo left; + u32 som_adjust; +}; + +struct UncalcLeafKey { + UncalcLeafKey(const RoseGraph &g, RoseVertex v) + : literals(g[v].literals), rose(g[v].left) { + for (const auto &e : in_edges_range(v, g)) { + RoseVertex u = source(e, g); + preds.insert(make_pair(u, g[e])); + } + } + + bool operator<(const UncalcLeafKey &b) const { + const UncalcLeafKey &a = *this; + ORDER_CHECK(literals); + ORDER_CHECK(preds); + ORDER_CHECK(rose); + return false; + } + + flat_set literals; + flat_set> preds; + LeftEngInfo rose; +}; +} // namespace + +/** + * This function merges leaf vertices with the same literals and report + * id/suffix. The leaf vertices of the graph are inspected and a mapping of + * leaf vertex properties to vertices is built. If the same set of leaf + * properties has already been seen when we inspect a vertex, we attempt to + * merge the vertex in with the previously seen vertex. This process can fail + * if the vertices share a common predecessor vertex but have a differing, + * incompatible relationship (different bounds or infix) with the predecessor. + * + * This takes place after \ref dedupeSuffixes to increase effectiveness as the + * same suffix is required for a merge to occur. + * + * TODO: work if this is a subset of role aliasing (and if it can be eliminated) + * or clearly document cases that would not be covered by role aliasing. + */ +void mergeDupeLeaves(RoseBuildImpl &build) { + map leaves; + vector changed; + + RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (in_degree(v, g) == 0) { + assert(build.isAnyStart(v)); + continue; + } + + DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu " + "out_degree %zu\n", g[v].index, in_degree(v, g), + out_degree(v, g)); + + // Vertex must be a reporting leaf node + if (g[v].reports.empty() || !isLeafNode(v, g)) { + continue; + } + + // At the moment, we ignore all successors of root or anchored_root, + // since many parts of our runtime assume that these have in-degree 1. + if (build.isRootSuccessor(v)) { + continue; + } + + DupeLeafKey dupe(g[v]); + if (leaves.find(dupe) == leaves.end()) { + leaves.insert(make_pair(dupe, v)); + continue; + } + + RoseVertex t = leaves.find(dupe)->second; + DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index, + g[t].index); + + vector deadEdges; + for (const auto &e : in_edges_range(v, g)) { + RoseVertex u = source(e, g); + DEBUG_PRINTF("u index=%zu\n", g[u].index); + if (RoseEdge et = edge(u, t, g)) { + if (g[et].minBound <= g[e].minBound + && g[et].maxBound >= g[e].maxBound) { + DEBUG_PRINTF("remove more constrained edge\n"); + deadEdges.push_back(e); + } + } else { + DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index, + g[t].index); + add_edge(u, t, g[e], g); + deadEdges.push_back(e); + } + } + + if (!deadEdges.empty()) { + for (auto &e : deadEdges) { + remove_edge(e, g); + } + changed.push_back(v); + g[t].min_offset = min(g[t].min_offset, g[v].min_offset); + g[t].max_offset = max(g[t].max_offset, g[v].max_offset); + } + } + DEBUG_PRINTF("find loop done\n"); + + // Remove any vertices that now have no in-edges. + size_t countRemovals = 0; + for (size_t i = 0; i < changed.size(); i++) { + RoseVertex v = changed[i]; + if (in_degree(v, g) == 0) { + DEBUG_PRINTF("remove vertex\n"); + if (!build.isVirtualVertex(v)) { + for (u32 lit_id : g[v].literals) { + build.literal_info[lit_id].vertices.erase(v); + } + } + remove_vertex(v, g); + countRemovals++; + } + } + + // if we've removed anything, we need to renumber vertices + if (countRemovals) { + renumber_vertices(g); + DEBUG_PRINTF("removed %zu vertices.\n", countRemovals); + } +} + +/** Merges the suffixes on the (identical) vertices in \a vcluster, used by + * \ref uncalcLeaves. */ +static +void mergeCluster(RoseGraph &g, const ReportManager &rm, + const vector &vcluster, + vector &dead, const CompileContext &cc) { + if (vcluster.size() <= 1) { + return; // No merge to perform. + } + + // Note that we batch merges up fairly crudely for performance reasons. + vector::const_iterator it = vcluster.begin(), it2; + while (it != vcluster.end()) { + vector cluster; + map rev; + + for (it2 = it; + it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX; + ++it2) { + RoseVertex v = *it2; + NGHolder *h = g[v].suffix.graph.get(); + assert(!g[v].suffix.haig); /* should not be here if haig */ + rev[h] = v; + cluster.push_back(h); + } + it = it2; + + DEBUG_PRINTF("merging cluster %zu\n", cluster.size()); + auto merged = mergeNfaCluster(cluster, &rm, cc); + DEBUG_PRINTF("done\n"); + + for (const auto &m : merged) { + NGHolder *h_victim = m.first; // mergee + NGHolder *h_winner = m.second; + RoseVertex victim = rev[h_victim]; + RoseVertex winner = rev[h_winner]; + + LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset); + ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset); + insert(&g[winner].reports, g[victim].reports); + + dead.push_back(victim); + } + } +} + +static +void findUncalcLeavesCandidates(RoseBuildImpl &build, + map > &clusters, + deque &ordered) { + const RoseGraph &g = build.g; + + vector suffix_vertices; // vertices with suffix graphs + unordered_map fcount; // ref count per graph + + for (auto v : vertices_range(g)) { + if (g[v].suffix) { + if (!g[v].suffix.graph) { + continue; /* cannot uncalc (haig/mcclellan); TODO */ + } + + assert(g[v].suffix.graph->kind == NFA_SUFFIX); + + // Ref count all suffixes, as we don't want to merge a suffix + // that happens to be shared with a non-leaf vertex somewhere. + DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index, + g[v].suffix.graph.get()); + fcount[g[v].suffix.graph.get()]++; + + // Vertex must be a reporting pseudo accept + if (!isLeafNode(v, g)) { + continue; + } + + suffix_vertices.push_back(v); + } + } + + for (auto v : suffix_vertices) { + if (in_degree(v, g) == 0) { + assert(build.isAnyStart(v)); + continue; + } + + const NGHolder *h = g[v].suffix.graph.get(); + assert(h); + DEBUG_PRINTF("suffix %p\n", h); + + // We can't easily merge suffixes shared with other vertices, and + // creating a unique copy to do so may just mean we end up tracking + // more NFAs. Better to leave shared suffixes alone. + if (fcount[h] != 1) { + DEBUG_PRINTF("skipping shared suffix\n"); + continue; + } + + UncalcLeafKey key(g, v); + vector &vec = clusters[key]; + if (vec.empty()) { + + ordered.push_back(key); + } + vec.push_back(v); + } + + DEBUG_PRINTF("find loop done\n"); +} + +/** + * This function attempts to combine identical roles (same literals, same + * predecessors, etc) with different suffixes into a single role which + * activates a larger suffix. The leaf vertices of the graph with a suffix are + * grouped into clusters which have members triggered by identical roles. The + * \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised + * to build a set of larger (and still implementable) suffixes. The graph is + * then updated to point to the new suffixes and any unneeded roles are + * removed. + * + * Note: suffixes which are shared amongst multiple roles are not considered + * for this pass as the individual suffixes would have to continue to exist for + * the other roles to trigger resulting in the transformation not producing any + * savings. + * + * Note: as \ref mergeNfaCluster is slow when the cluster sizes are large, + * clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller + * chunks for processing. + */ +void uncalcLeaves(RoseBuildImpl &build) { + DEBUG_PRINTF("uncalcing\n"); + + map > clusters; + deque ordered; + findUncalcLeavesCandidates(build, clusters, ordered); + + vector dead; + + for (const auto &key : ordered) { + DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size()); + mergeCluster(build.g, build.rm, clusters[key], dead, build.cc); + } + build.removeVertices(dead); +} + } // namespace ue2 diff --git a/src/rose/rose_build_role_aliasing.h b/src/rose/rose_build_role_aliasing.h index 274b76f9..4655f10d 100644 --- a/src/rose/rose_build_role_aliasing.h +++ b/src/rose/rose_build_role_aliasing.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef ROSE_BUILD_ROLE_ALIASING -#define ROSE_BUILD_ROLE_ALIASING +#ifndef ROSE_BUILD_ROLE_ALIASING_H +#define ROSE_BUILD_ROLE_ALIASING_H + +/** \file + * \brief Rose Build: functions for reducing the size of the Rose graph + * through merging roles (RoseVertices) together. + */ namespace ue2 { @@ -35,6 +40,9 @@ class RoseBuildImpl; void aliasRoles(RoseBuildImpl &build, bool mergeRoses); +void mergeDupeLeaves(RoseBuildImpl &build); +void uncalcLeaves(RoseBuildImpl &build); + } // namespace ue2 #endif diff --git a/unit/internal/rose_build_merge.cpp b/unit/internal/rose_build_merge.cpp index ed7c2bdc..5029f0a5 100644 --- a/unit/internal/rose_build_merge.cpp +++ b/unit/internal/rose_build_merge.cpp @@ -34,6 +34,7 @@ #include "rose/rose_build.h" #include "rose/rose_build_impl.h" #include "rose/rose_build_merge.h" +#include "rose/rose_build_role_aliasing.h" #include "util/report_manager.h" #include "util/boundary_reports.h" #include "util/compile_context.h"