move mergeDupeLeaves() and uncalcLeaves() to rose_build_role_aliasing

Unlike the rest of rose_build_mergem, these functions relate to merging roles/vertices rather than merging engines.
2025-06-28 16:41:01 +03:00 · 2017-08-10 15:42:26 +10:00 · 2017-08-10 15:42:26 +10:00 · 47e64646b4
commit 47e64646b4
parent a97cdba8cc
5 changed files with 317 additions and 306 deletions
--- a/src/rose/rose_build_merge.cpp
+++ b/src/rose/rose_build_merge.cpp
@ -118,303 +118,6 @@ size_t small_rose_threshold(const CompileContext &cc) {
                        : SMALL_ROSE_THRESHOLD_BLOCK;
 }

-namespace {
-struct DupeLeafKey {
-    explicit DupeLeafKey(const RoseVertexProps &litv)
-        : literals(litv.literals), reports(litv.reports),
-          eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left),
-          som_adjust(litv.som_adjust) {
-        DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept);
-        DEBUG_PRINTF("report %u\n", left.leftfix_report);
-        DEBUG_PRINTF("lag %u\n", left.lag);
-    }
-
-    bool operator<(const DupeLeafKey &b) const {
-        const DupeLeafKey &a = *this;
-        ORDER_CHECK(literals);
-        ORDER_CHECK(eod_accept);
-        ORDER_CHECK(suffix);
-        ORDER_CHECK(reports);
-        ORDER_CHECK(som_adjust);
-        ORDER_CHECK(left.leftfix_report);
-        ORDER_CHECK(left.lag);
-        return false;
-    }
-
-    flat_set<u32> literals;
-    flat_set<ReportID> reports;
-    bool eod_accept;
-    suffix_id suffix;
-    LeftEngInfo left;
-    u32 som_adjust;
-};
-
-struct UncalcLeafKey {
-    UncalcLeafKey(const RoseGraph &g, RoseVertex v)
-        : literals(g[v].literals), rose(g[v].left) {
-        for (const auto &e : in_edges_range(v, g)) {
-            RoseVertex u = source(e, g);
-            preds.insert(make_pair(u, g[e]));
-        }
-    }
-
-    bool operator<(const UncalcLeafKey &b) const {
-        const UncalcLeafKey &a = *this;
-        ORDER_CHECK(literals);
-        ORDER_CHECK(preds);
-        ORDER_CHECK(rose);
-        return false;
-    }
-
-    flat_set<u32> literals;
-    flat_set<pair<RoseVertex, RoseEdgeProps>> preds;
-    LeftEngInfo rose;
-};
-} // namespace
-
-/**
- * This function merges leaf vertices with the same literals and report
- * id/suffix. The leaf vertices of the graph are inspected and a mapping of
- * leaf vertex properties to vertices is built. If the same set of leaf
- * properties has already been seen when we inspect a vertex, we attempt to
- * merge the vertex in with the previously seen vertex. This process can fail
- * if the vertices share a common predecessor vertex but have a differing,
- * incompatible relationship (different bounds or infix) with the predecessor.
- *
- * This takes place after \ref dedupeSuffixes to increase effectiveness as the
- * same suffix is required for a merge to occur.
- */
-void mergeDupeLeaves(RoseBuildImpl &tbi) {
-    map<DupeLeafKey, RoseVertex> leaves;
-    vector<RoseVertex> changed;
-
-    RoseGraph &g = tbi.g;
-    for (auto v : vertices_range(g)) {
-        if (in_degree(v, g) == 0) {
-            assert(tbi.isAnyStart(v));
-            continue;
-        }
-
-        DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu "
-                     "out_degree %zu\n", g[v].index, in_degree(v, g),
-                     out_degree(v, g));
-
-        // Vertex must be a reporting leaf node
-        if (g[v].reports.empty() || !isLeafNode(v, g)) {
-            continue;
-        }
-
-        // At the moment, we ignore all successors of root or anchored_root,
-        // since many parts of our runtime assume that these have in-degree 1.
-        if (tbi.isRootSuccessor(v)) {
-            continue;
-        }
-
-        DupeLeafKey dupe(g[v]);
-        if (leaves.find(dupe) == leaves.end()) {
-            leaves.insert(make_pair(dupe, v));
-            continue;
-        }
-
-        RoseVertex t = leaves.find(dupe)->second;
-        DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index,
-                     g[t].index);
-
-        vector<RoseEdge> deadEdges;
-        for (const auto &e : in_edges_range(v, g)) {
-            RoseVertex u = source(e, g);
-            DEBUG_PRINTF("u index=%zu\n", g[u].index);
-            if (RoseEdge et = edge(u, t, g)) {
-                if (g[et].minBound <= g[e].minBound
-                    && g[et].maxBound >= g[e].maxBound) {
-                    DEBUG_PRINTF("remove more constrained edge\n");
-                    deadEdges.push_back(e);
-                }
-            } else {
-                DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index,
-                             g[t].index);
-                add_edge(u, t, g[e], g);
-                deadEdges.push_back(e);
-            }
-        }
-
-        if (!deadEdges.empty()) {
-            for (auto &e : deadEdges) {
-                remove_edge(e, g);
-            }
-            changed.push_back(v);
-            g[t].min_offset = min(g[t].min_offset, g[v].min_offset);
-            g[t].max_offset = max(g[t].max_offset, g[v].max_offset);
-        }
-    }
-    DEBUG_PRINTF("find loop done\n");
-
-    // Remove any vertices that now have no in-edges.
-    size_t countRemovals = 0;
-    for (size_t i = 0; i < changed.size(); i++) {
-        RoseVertex v = changed[i];
-        if (in_degree(v, g) == 0) {
-            DEBUG_PRINTF("remove vertex\n");
-            if (!tbi.isVirtualVertex(v)) {
-                for (u32 lit_id : g[v].literals) {
-                    tbi.literal_info[lit_id].vertices.erase(v);
-                }
-            }
-            remove_vertex(v, g);
-            countRemovals++;
-        }
-    }
-
-    // if we've removed anything, we need to renumber vertices
-    if (countRemovals) {
-        renumber_vertices(g);
-        DEBUG_PRINTF("removed %zu vertices.\n", countRemovals);
-    }
-}
-
-/** Merges the suffixes on the (identical) vertices in \a vcluster, used by
- * \ref uncalcLeaves. */
-static
-void mergeCluster(RoseGraph &g, const ReportManager &rm,
-                  const vector<RoseVertex> &vcluster,
-                  vector<RoseVertex> &dead, const CompileContext &cc) {
-    if (vcluster.size() <= 1) {
-        return; // No merge to perform.
-    }
-
-    // Note that we batch merges up fairly crudely for performance reasons.
-    vector<RoseVertex>::const_iterator it = vcluster.begin(), it2;
-    while (it != vcluster.end()) {
-        vector<NGHolder *> cluster;
-        map<NGHolder *, RoseVertex> rev;
-
-        for (it2 = it;
-             it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX;
-             ++it2) {
-            RoseVertex v = *it2;
-            NGHolder *h = g[v].suffix.graph.get();
-            assert(!g[v].suffix.haig); /* should not be here if haig */
-            rev[h] = v;
-            cluster.push_back(h);
-        }
-        it = it2;
-
-        DEBUG_PRINTF("merging cluster %zu\n", cluster.size());
-        auto merged = mergeNfaCluster(cluster, &rm, cc);
-        DEBUG_PRINTF("done\n");
-
-        for (const auto &m : merged) {
-            NGHolder *h_victim = m.first; // mergee
-            NGHolder *h_winner = m.second;
-            RoseVertex victim = rev[h_victim];
-            RoseVertex winner = rev[h_winner];
-
-            LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset);
-            ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset);
-            insert(&g[winner].reports, g[victim].reports);
-
-            dead.push_back(victim);
-        }
-    }
-}
-
-static
-void findUncalcLeavesCandidates(RoseBuildImpl &tbi,
-                           map<UncalcLeafKey, vector<RoseVertex> > &clusters,
-                           deque<UncalcLeafKey> &ordered) {
-    const RoseGraph &g = tbi.g;
-
-    vector<RoseVertex> suffix_vertices; // vertices with suffix graphs
-    unordered_map<const NGHolder *, u32> fcount; // ref count per graph
-
-    for (auto v : vertices_range(g)) {
-        if (g[v].suffix) {
-            if (!g[v].suffix.graph) {
-                continue; /* cannot uncalc (haig/mcclellan); TODO */
-            }
-
-            assert(g[v].suffix.graph->kind == NFA_SUFFIX);
-
-            // Ref count all suffixes, as we don't want to merge a suffix
-            // that happens to be shared with a non-leaf vertex somewhere.
-            DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index,
-                         g[v].suffix.graph.get());
-            fcount[g[v].suffix.graph.get()]++;
-
-            // Vertex must be a reporting pseudo accept
-            if (!isLeafNode(v, g)) {
-                continue;
-            }
-
-            suffix_vertices.push_back(v);
-        }
-    }
-
-    for (auto v : suffix_vertices) {
-        if (in_degree(v, g) == 0) {
-            assert(tbi.isAnyStart(v));
-            continue;
-        }
-
-        const NGHolder *h = g[v].suffix.graph.get();
-        assert(h);
-        DEBUG_PRINTF("suffix %p\n", h);
-
-        // We can't easily merge suffixes shared with other vertices, and
-        // creating a unique copy to do so may just mean we end up tracking
-        // more NFAs. Better to leave shared suffixes alone.
-        if (fcount[h] != 1) {
-            DEBUG_PRINTF("skipping shared suffix\n");
-            continue;
-        }
-
-        UncalcLeafKey key(g, v);
-        vector<RoseVertex> &vec = clusters[key];
-        if (vec.empty()) {
-
-            ordered.push_back(key);
-        }
-        vec.push_back(v);
-    }
-
-    DEBUG_PRINTF("find loop done\n");
-}
-
-/**
- * This function attempts to combine identical roles (same literals, same
- * predecessors, etc) with different suffixes into a single role which
- * activates a larger suffix. The leaf vertices of the graph with a suffix are
- * grouped into clusters which have members triggered by identical roles. The
- * \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised
- * to build a set of larger (and still implementable) suffixes. The graph is
- * then updated to point to the new suffixes and any unneeded roles are
- * removed.
- *
- * Note: suffixes which are shared amongst multiple roles are not considered
- * for this pass as the individual suffixes would have to continue to exist for
- * the other roles to trigger resulting in the transformation not producing any
- * savings.
- *
- * Note: as \ref mergeNfaCluster is slow when the cluster sizes are large,
- * clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller
- * chunks for processing.
- */
-void uncalcLeaves(RoseBuildImpl &tbi) {
-    DEBUG_PRINTF("uncalcing\n");
-
-    map<UncalcLeafKey, vector<RoseVertex> > clusters;
-    deque<UncalcLeafKey> ordered;
-    findUncalcLeavesCandidates(tbi, clusters, ordered);
-
-    vector<RoseVertex> dead;
-
-    for (const auto &key : ordered) {
-        DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size());
-        mergeCluster(tbi.g, tbi.rm, clusters[key], dead, tbi.cc);
-    }
-    tbi.removeVertices(dead);
-}
-
 /**
 * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
 * reports should not contribute to the hash.
--- a/src/rose/rose_build_merge.h
+++ b/src/rose/rose_build_merge.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@ -27,8 +27,8 @@
 */

 /** \file
- * \brief Rose Build: functions for reducing the size of the Rose graph
- * through merging.
+ * \brief Rose Build: functions for reducing the number of engines in a Rose
+ * graph through merging or deduplicating engines.
 */

 #ifndef ROSE_BUILD_MERGE_H
@ -44,9 +44,6 @@ namespace ue2 {
 class NGHolder;
 class RoseBuildImpl;

-void mergeDupeLeaves(RoseBuildImpl &tbi);
-void uncalcLeaves(RoseBuildImpl &tbi);
-
 bool dedupeLeftfixes(RoseBuildImpl &tbi);
 void mergeLeftfixesVariableLag(RoseBuildImpl &tbi);
 void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi);
--- a/src/rose/rose_build_role_aliasing.cpp
+++ b/src/rose/rose_build_role_aliasing.cpp
@ -62,6 +62,8 @@ using boost::adaptors::map_values;

 namespace ue2 {

+static constexpr size_t MERGE_GROUP_SIZE_MAX = 200;
+
 namespace {
 // Used for checking edge sets (both in- and out-) against each other.
 struct EdgeAndVertex {
@ -2026,4 +2028,304 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) {
    assert(canImplementGraphs(build));
 }

+namespace {
+struct DupeLeafKey {
+    explicit DupeLeafKey(const RoseVertexProps &litv)
+        : literals(litv.literals), reports(litv.reports),
+          eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left),
+          som_adjust(litv.som_adjust) {
+        DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept);
+        DEBUG_PRINTF("report %u\n", left.leftfix_report);
+        DEBUG_PRINTF("lag %u\n", left.lag);
+    }
+
+    bool operator<(const DupeLeafKey &b) const {
+        const DupeLeafKey &a = *this;
+        ORDER_CHECK(literals);
+        ORDER_CHECK(eod_accept);
+        ORDER_CHECK(suffix);
+        ORDER_CHECK(reports);
+        ORDER_CHECK(som_adjust);
+        ORDER_CHECK(left.leftfix_report);
+        ORDER_CHECK(left.lag);
+        return false;
+    }
+
+    flat_set<u32> literals;
+    flat_set<ReportID> reports;
+    bool eod_accept;
+    suffix_id suffix;
+    LeftEngInfo left;
+    u32 som_adjust;
+};
+
+struct UncalcLeafKey {
+    UncalcLeafKey(const RoseGraph &g, RoseVertex v)
+        : literals(g[v].literals), rose(g[v].left) {
+        for (const auto &e : in_edges_range(v, g)) {
+            RoseVertex u = source(e, g);
+            preds.insert(make_pair(u, g[e]));
+        }
+    }
+
+    bool operator<(const UncalcLeafKey &b) const {
+        const UncalcLeafKey &a = *this;
+        ORDER_CHECK(literals);
+        ORDER_CHECK(preds);
+        ORDER_CHECK(rose);
+        return false;
+    }
+
+    flat_set<u32> literals;
+    flat_set<pair<RoseVertex, RoseEdgeProps>> preds;
+    LeftEngInfo rose;
+};
+} // namespace
+
+/**
+ * This function merges leaf vertices with the same literals and report
+ * id/suffix. The leaf vertices of the graph are inspected and a mapping of
+ * leaf vertex properties to vertices is built. If the same set of leaf
+ * properties has already been seen when we inspect a vertex, we attempt to
+ * merge the vertex in with the previously seen vertex. This process can fail
+ * if the vertices share a common predecessor vertex but have a differing,
+ * incompatible relationship (different bounds or infix) with the predecessor.
+ *
+ * This takes place after \ref dedupeSuffixes to increase effectiveness as the
+ * same suffix is required for a merge to occur.
+ *
+ * TODO: work if this is a subset of role aliasing (and if it can be eliminated)
+ * or clearly document cases that would not be covered by role aliasing.
+ */
+void mergeDupeLeaves(RoseBuildImpl &build) {
+    map<DupeLeafKey, RoseVertex> leaves;
+    vector<RoseVertex> changed;
+
+    RoseGraph &g = build.g;
+    for (auto v : vertices_range(g)) {
+        if (in_degree(v, g) == 0) {
+            assert(build.isAnyStart(v));
+            continue;
+        }
+
+        DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu "
+                     "out_degree %zu\n", g[v].index, in_degree(v, g),
+                     out_degree(v, g));
+
+        // Vertex must be a reporting leaf node
+        if (g[v].reports.empty() || !isLeafNode(v, g)) {
+            continue;
+        }
+
+        // At the moment, we ignore all successors of root or anchored_root,
+        // since many parts of our runtime assume that these have in-degree 1.
+        if (build.isRootSuccessor(v)) {
+            continue;
+        }
+
+        DupeLeafKey dupe(g[v]);
+        if (leaves.find(dupe) == leaves.end()) {
+            leaves.insert(make_pair(dupe, v));
+            continue;
+        }
+
+        RoseVertex t = leaves.find(dupe)->second;
+        DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index,
+                     g[t].index);
+
+        vector<RoseEdge> deadEdges;
+        for (const auto &e : in_edges_range(v, g)) {
+            RoseVertex u = source(e, g);
+            DEBUG_PRINTF("u index=%zu\n", g[u].index);
+            if (RoseEdge et = edge(u, t, g)) {
+                if (g[et].minBound <= g[e].minBound
+                    && g[et].maxBound >= g[e].maxBound) {
+                    DEBUG_PRINTF("remove more constrained edge\n");
+                    deadEdges.push_back(e);
+                }
+            } else {
+                DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index,
+                             g[t].index);
+                add_edge(u, t, g[e], g);
+                deadEdges.push_back(e);
+            }
+        }
+
+        if (!deadEdges.empty()) {
+            for (auto &e : deadEdges) {
+                remove_edge(e, g);
+            }
+            changed.push_back(v);
+            g[t].min_offset = min(g[t].min_offset, g[v].min_offset);
+            g[t].max_offset = max(g[t].max_offset, g[v].max_offset);
+        }
+    }
+    DEBUG_PRINTF("find loop done\n");
+
+    // Remove any vertices that now have no in-edges.
+    size_t countRemovals = 0;
+    for (size_t i = 0; i < changed.size(); i++) {
+        RoseVertex v = changed[i];
+        if (in_degree(v, g) == 0) {
+            DEBUG_PRINTF("remove vertex\n");
+            if (!build.isVirtualVertex(v)) {
+                for (u32 lit_id : g[v].literals) {
+                    build.literal_info[lit_id].vertices.erase(v);
+                }
+            }
+            remove_vertex(v, g);
+            countRemovals++;
+        }
+    }
+
+    // if we've removed anything, we need to renumber vertices
+    if (countRemovals) {
+        renumber_vertices(g);
+        DEBUG_PRINTF("removed %zu vertices.\n", countRemovals);
+    }
+}
+
+/** Merges the suffixes on the (identical) vertices in \a vcluster, used by
+ * \ref uncalcLeaves. */
+static
+void mergeCluster(RoseGraph &g, const ReportManager &rm,
+                  const vector<RoseVertex> &vcluster,
+                  vector<RoseVertex> &dead, const CompileContext &cc) {
+    if (vcluster.size() <= 1) {
+        return; // No merge to perform.
+    }
+
+    // Note that we batch merges up fairly crudely for performance reasons.
+    vector<RoseVertex>::const_iterator it = vcluster.begin(), it2;
+    while (it != vcluster.end()) {
+        vector<NGHolder *> cluster;
+        map<NGHolder *, RoseVertex> rev;
+
+        for (it2 = it;
+             it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX;
+             ++it2) {
+            RoseVertex v = *it2;
+            NGHolder *h = g[v].suffix.graph.get();
+            assert(!g[v].suffix.haig); /* should not be here if haig */
+            rev[h] = v;
+            cluster.push_back(h);
+        }
+        it = it2;
+
+        DEBUG_PRINTF("merging cluster %zu\n", cluster.size());
+        auto merged = mergeNfaCluster(cluster, &rm, cc);
+        DEBUG_PRINTF("done\n");
+
+        for (const auto &m : merged) {
+            NGHolder *h_victim = m.first; // mergee
+            NGHolder *h_winner = m.second;
+            RoseVertex victim = rev[h_victim];
+            RoseVertex winner = rev[h_winner];
+
+            LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset);
+            ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset);
+            insert(&g[winner].reports, g[victim].reports);
+
+            dead.push_back(victim);
+        }
+    }
+}
+
+static
+void findUncalcLeavesCandidates(RoseBuildImpl &build,
+                           map<UncalcLeafKey, vector<RoseVertex> > &clusters,
+                           deque<UncalcLeafKey> &ordered) {
+    const RoseGraph &g = build.g;
+
+    vector<RoseVertex> suffix_vertices; // vertices with suffix graphs
+    unordered_map<const NGHolder *, u32> fcount; // ref count per graph
+
+    for (auto v : vertices_range(g)) {
+        if (g[v].suffix) {
+            if (!g[v].suffix.graph) {
+                continue; /* cannot uncalc (haig/mcclellan); TODO */
+            }
+
+            assert(g[v].suffix.graph->kind == NFA_SUFFIX);
+
+            // Ref count all suffixes, as we don't want to merge a suffix
+            // that happens to be shared with a non-leaf vertex somewhere.
+            DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index,
+                         g[v].suffix.graph.get());
+            fcount[g[v].suffix.graph.get()]++;
+
+            // Vertex must be a reporting pseudo accept
+            if (!isLeafNode(v, g)) {
+                continue;
+            }
+
+            suffix_vertices.push_back(v);
+        }
+    }
+
+    for (auto v : suffix_vertices) {
+        if (in_degree(v, g) == 0) {
+            assert(build.isAnyStart(v));
+            continue;
+        }
+
+        const NGHolder *h = g[v].suffix.graph.get();
+        assert(h);
+        DEBUG_PRINTF("suffix %p\n", h);
+
+        // We can't easily merge suffixes shared with other vertices, and
+        // creating a unique copy to do so may just mean we end up tracking
+        // more NFAs. Better to leave shared suffixes alone.
+        if (fcount[h] != 1) {
+            DEBUG_PRINTF("skipping shared suffix\n");
+            continue;
+        }
+
+        UncalcLeafKey key(g, v);
+        vector<RoseVertex> &vec = clusters[key];
+        if (vec.empty()) {
+
+            ordered.push_back(key);
+        }
+        vec.push_back(v);
+    }
+
+    DEBUG_PRINTF("find loop done\n");
+}
+
+/**
+ * This function attempts to combine identical roles (same literals, same
+ * predecessors, etc) with different suffixes into a single role which
+ * activates a larger suffix. The leaf vertices of the graph with a suffix are
+ * grouped into clusters which have members triggered by identical roles. The
+ * \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised
+ * to build a set of larger (and still implementable) suffixes. The graph is
+ * then updated to point to the new suffixes and any unneeded roles are
+ * removed.
+ *
+ * Note: suffixes which are shared amongst multiple roles are not considered
+ * for this pass as the individual suffixes would have to continue to exist for
+ * the other roles to trigger resulting in the transformation not producing any
+ * savings.
+ *
+ * Note: as \ref mergeNfaCluster is slow when the cluster sizes are large,
+ * clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller
+ * chunks for processing.
+ */
+void uncalcLeaves(RoseBuildImpl &build) {
+    DEBUG_PRINTF("uncalcing\n");
+
+    map<UncalcLeafKey, vector<RoseVertex> > clusters;
+    deque<UncalcLeafKey> ordered;
+    findUncalcLeavesCandidates(build, clusters, ordered);
+
+    vector<RoseVertex> dead;
+
+    for (const auto &key : ordered) {
+        DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size());
+        mergeCluster(build.g, build.rm, clusters[key], dead, build.cc);
+    }
+    build.removeVertices(dead);
+}
+
 } // namespace ue2
--- a/src/rose/rose_build_role_aliasing.h
+++ b/src/rose/rose_build_role_aliasing.h
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
@ -26,8 +26,13 @@
 * POSSIBILITY OF SUCH DAMAGE.
 */

-#ifndef ROSE_BUILD_ROLE_ALIASING
-#define ROSE_BUILD_ROLE_ALIASING
+#ifndef ROSE_BUILD_ROLE_ALIASING_H
+#define ROSE_BUILD_ROLE_ALIASING_H
+
+/** \file
+ * \brief Rose Build: functions for reducing the size of the Rose graph
+ * through merging roles (RoseVertices) together.
+ */

 namespace ue2 {

@ -35,6 +40,9 @@ class RoseBuildImpl;

 void aliasRoles(RoseBuildImpl &build, bool mergeRoses);

+void mergeDupeLeaves(RoseBuildImpl &build);
+void uncalcLeaves(RoseBuildImpl &build);
+
 } // namespace ue2

 #endif
--- a/unit/internal/rose_build_merge.cpp
+++ b/unit/internal/rose_build_merge.cpp
@ -34,6 +34,7 @@
 #include "rose/rose_build.h"
 #include "rose/rose_build_impl.h"
 #include "rose/rose_build_merge.h"
+#include "rose/rose_build_role_aliasing.h"
 #include "util/report_manager.h"
 #include "util/boundary_reports.h"
 #include "util/compile_context.h"