mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
move mergeDupeLeaves() and uncalcLeaves() to rose_build_role_aliasing
Unlike the rest of rose_build_mergem, these functions relate to merging roles/vertices rather than merging engines.
This commit is contained in:
parent
a97cdba8cc
commit
47e64646b4
@ -118,303 +118,6 @@ size_t small_rose_threshold(const CompileContext &cc) {
|
|||||||
: SMALL_ROSE_THRESHOLD_BLOCK;
|
: SMALL_ROSE_THRESHOLD_BLOCK;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
struct DupeLeafKey {
|
|
||||||
explicit DupeLeafKey(const RoseVertexProps &litv)
|
|
||||||
: literals(litv.literals), reports(litv.reports),
|
|
||||||
eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left),
|
|
||||||
som_adjust(litv.som_adjust) {
|
|
||||||
DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept);
|
|
||||||
DEBUG_PRINTF("report %u\n", left.leftfix_report);
|
|
||||||
DEBUG_PRINTF("lag %u\n", left.lag);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator<(const DupeLeafKey &b) const {
|
|
||||||
const DupeLeafKey &a = *this;
|
|
||||||
ORDER_CHECK(literals);
|
|
||||||
ORDER_CHECK(eod_accept);
|
|
||||||
ORDER_CHECK(suffix);
|
|
||||||
ORDER_CHECK(reports);
|
|
||||||
ORDER_CHECK(som_adjust);
|
|
||||||
ORDER_CHECK(left.leftfix_report);
|
|
||||||
ORDER_CHECK(left.lag);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
flat_set<u32> literals;
|
|
||||||
flat_set<ReportID> reports;
|
|
||||||
bool eod_accept;
|
|
||||||
suffix_id suffix;
|
|
||||||
LeftEngInfo left;
|
|
||||||
u32 som_adjust;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct UncalcLeafKey {
|
|
||||||
UncalcLeafKey(const RoseGraph &g, RoseVertex v)
|
|
||||||
: literals(g[v].literals), rose(g[v].left) {
|
|
||||||
for (const auto &e : in_edges_range(v, g)) {
|
|
||||||
RoseVertex u = source(e, g);
|
|
||||||
preds.insert(make_pair(u, g[e]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator<(const UncalcLeafKey &b) const {
|
|
||||||
const UncalcLeafKey &a = *this;
|
|
||||||
ORDER_CHECK(literals);
|
|
||||||
ORDER_CHECK(preds);
|
|
||||||
ORDER_CHECK(rose);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
flat_set<u32> literals;
|
|
||||||
flat_set<pair<RoseVertex, RoseEdgeProps>> preds;
|
|
||||||
LeftEngInfo rose;
|
|
||||||
};
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This function merges leaf vertices with the same literals and report
|
|
||||||
* id/suffix. The leaf vertices of the graph are inspected and a mapping of
|
|
||||||
* leaf vertex properties to vertices is built. If the same set of leaf
|
|
||||||
* properties has already been seen when we inspect a vertex, we attempt to
|
|
||||||
* merge the vertex in with the previously seen vertex. This process can fail
|
|
||||||
* if the vertices share a common predecessor vertex but have a differing,
|
|
||||||
* incompatible relationship (different bounds or infix) with the predecessor.
|
|
||||||
*
|
|
||||||
* This takes place after \ref dedupeSuffixes to increase effectiveness as the
|
|
||||||
* same suffix is required for a merge to occur.
|
|
||||||
*/
|
|
||||||
void mergeDupeLeaves(RoseBuildImpl &tbi) {
|
|
||||||
map<DupeLeafKey, RoseVertex> leaves;
|
|
||||||
vector<RoseVertex> changed;
|
|
||||||
|
|
||||||
RoseGraph &g = tbi.g;
|
|
||||||
for (auto v : vertices_range(g)) {
|
|
||||||
if (in_degree(v, g) == 0) {
|
|
||||||
assert(tbi.isAnyStart(v));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu "
|
|
||||||
"out_degree %zu\n", g[v].index, in_degree(v, g),
|
|
||||||
out_degree(v, g));
|
|
||||||
|
|
||||||
// Vertex must be a reporting leaf node
|
|
||||||
if (g[v].reports.empty() || !isLeafNode(v, g)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// At the moment, we ignore all successors of root or anchored_root,
|
|
||||||
// since many parts of our runtime assume that these have in-degree 1.
|
|
||||||
if (tbi.isRootSuccessor(v)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
DupeLeafKey dupe(g[v]);
|
|
||||||
if (leaves.find(dupe) == leaves.end()) {
|
|
||||||
leaves.insert(make_pair(dupe, v));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
RoseVertex t = leaves.find(dupe)->second;
|
|
||||||
DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index,
|
|
||||||
g[t].index);
|
|
||||||
|
|
||||||
vector<RoseEdge> deadEdges;
|
|
||||||
for (const auto &e : in_edges_range(v, g)) {
|
|
||||||
RoseVertex u = source(e, g);
|
|
||||||
DEBUG_PRINTF("u index=%zu\n", g[u].index);
|
|
||||||
if (RoseEdge et = edge(u, t, g)) {
|
|
||||||
if (g[et].minBound <= g[e].minBound
|
|
||||||
&& g[et].maxBound >= g[e].maxBound) {
|
|
||||||
DEBUG_PRINTF("remove more constrained edge\n");
|
|
||||||
deadEdges.push_back(e);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index,
|
|
||||||
g[t].index);
|
|
||||||
add_edge(u, t, g[e], g);
|
|
||||||
deadEdges.push_back(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!deadEdges.empty()) {
|
|
||||||
for (auto &e : deadEdges) {
|
|
||||||
remove_edge(e, g);
|
|
||||||
}
|
|
||||||
changed.push_back(v);
|
|
||||||
g[t].min_offset = min(g[t].min_offset, g[v].min_offset);
|
|
||||||
g[t].max_offset = max(g[t].max_offset, g[v].max_offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DEBUG_PRINTF("find loop done\n");
|
|
||||||
|
|
||||||
// Remove any vertices that now have no in-edges.
|
|
||||||
size_t countRemovals = 0;
|
|
||||||
for (size_t i = 0; i < changed.size(); i++) {
|
|
||||||
RoseVertex v = changed[i];
|
|
||||||
if (in_degree(v, g) == 0) {
|
|
||||||
DEBUG_PRINTF("remove vertex\n");
|
|
||||||
if (!tbi.isVirtualVertex(v)) {
|
|
||||||
for (u32 lit_id : g[v].literals) {
|
|
||||||
tbi.literal_info[lit_id].vertices.erase(v);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
remove_vertex(v, g);
|
|
||||||
countRemovals++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we've removed anything, we need to renumber vertices
|
|
||||||
if (countRemovals) {
|
|
||||||
renumber_vertices(g);
|
|
||||||
DEBUG_PRINTF("removed %zu vertices.\n", countRemovals);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Merges the suffixes on the (identical) vertices in \a vcluster, used by
|
|
||||||
* \ref uncalcLeaves. */
|
|
||||||
static
|
|
||||||
void mergeCluster(RoseGraph &g, const ReportManager &rm,
|
|
||||||
const vector<RoseVertex> &vcluster,
|
|
||||||
vector<RoseVertex> &dead, const CompileContext &cc) {
|
|
||||||
if (vcluster.size() <= 1) {
|
|
||||||
return; // No merge to perform.
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note that we batch merges up fairly crudely for performance reasons.
|
|
||||||
vector<RoseVertex>::const_iterator it = vcluster.begin(), it2;
|
|
||||||
while (it != vcluster.end()) {
|
|
||||||
vector<NGHolder *> cluster;
|
|
||||||
map<NGHolder *, RoseVertex> rev;
|
|
||||||
|
|
||||||
for (it2 = it;
|
|
||||||
it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX;
|
|
||||||
++it2) {
|
|
||||||
RoseVertex v = *it2;
|
|
||||||
NGHolder *h = g[v].suffix.graph.get();
|
|
||||||
assert(!g[v].suffix.haig); /* should not be here if haig */
|
|
||||||
rev[h] = v;
|
|
||||||
cluster.push_back(h);
|
|
||||||
}
|
|
||||||
it = it2;
|
|
||||||
|
|
||||||
DEBUG_PRINTF("merging cluster %zu\n", cluster.size());
|
|
||||||
auto merged = mergeNfaCluster(cluster, &rm, cc);
|
|
||||||
DEBUG_PRINTF("done\n");
|
|
||||||
|
|
||||||
for (const auto &m : merged) {
|
|
||||||
NGHolder *h_victim = m.first; // mergee
|
|
||||||
NGHolder *h_winner = m.second;
|
|
||||||
RoseVertex victim = rev[h_victim];
|
|
||||||
RoseVertex winner = rev[h_winner];
|
|
||||||
|
|
||||||
LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset);
|
|
||||||
ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset);
|
|
||||||
insert(&g[winner].reports, g[victim].reports);
|
|
||||||
|
|
||||||
dead.push_back(victim);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
void findUncalcLeavesCandidates(RoseBuildImpl &tbi,
|
|
||||||
map<UncalcLeafKey, vector<RoseVertex> > &clusters,
|
|
||||||
deque<UncalcLeafKey> &ordered) {
|
|
||||||
const RoseGraph &g = tbi.g;
|
|
||||||
|
|
||||||
vector<RoseVertex> suffix_vertices; // vertices with suffix graphs
|
|
||||||
unordered_map<const NGHolder *, u32> fcount; // ref count per graph
|
|
||||||
|
|
||||||
for (auto v : vertices_range(g)) {
|
|
||||||
if (g[v].suffix) {
|
|
||||||
if (!g[v].suffix.graph) {
|
|
||||||
continue; /* cannot uncalc (haig/mcclellan); TODO */
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(g[v].suffix.graph->kind == NFA_SUFFIX);
|
|
||||||
|
|
||||||
// Ref count all suffixes, as we don't want to merge a suffix
|
|
||||||
// that happens to be shared with a non-leaf vertex somewhere.
|
|
||||||
DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index,
|
|
||||||
g[v].suffix.graph.get());
|
|
||||||
fcount[g[v].suffix.graph.get()]++;
|
|
||||||
|
|
||||||
// Vertex must be a reporting pseudo accept
|
|
||||||
if (!isLeafNode(v, g)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
suffix_vertices.push_back(v);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto v : suffix_vertices) {
|
|
||||||
if (in_degree(v, g) == 0) {
|
|
||||||
assert(tbi.isAnyStart(v));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const NGHolder *h = g[v].suffix.graph.get();
|
|
||||||
assert(h);
|
|
||||||
DEBUG_PRINTF("suffix %p\n", h);
|
|
||||||
|
|
||||||
// We can't easily merge suffixes shared with other vertices, and
|
|
||||||
// creating a unique copy to do so may just mean we end up tracking
|
|
||||||
// more NFAs. Better to leave shared suffixes alone.
|
|
||||||
if (fcount[h] != 1) {
|
|
||||||
DEBUG_PRINTF("skipping shared suffix\n");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
UncalcLeafKey key(g, v);
|
|
||||||
vector<RoseVertex> &vec = clusters[key];
|
|
||||||
if (vec.empty()) {
|
|
||||||
|
|
||||||
ordered.push_back(key);
|
|
||||||
}
|
|
||||||
vec.push_back(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("find loop done\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This function attempts to combine identical roles (same literals, same
|
|
||||||
* predecessors, etc) with different suffixes into a single role which
|
|
||||||
* activates a larger suffix. The leaf vertices of the graph with a suffix are
|
|
||||||
* grouped into clusters which have members triggered by identical roles. The
|
|
||||||
* \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised
|
|
||||||
* to build a set of larger (and still implementable) suffixes. The graph is
|
|
||||||
* then updated to point to the new suffixes and any unneeded roles are
|
|
||||||
* removed.
|
|
||||||
*
|
|
||||||
* Note: suffixes which are shared amongst multiple roles are not considered
|
|
||||||
* for this pass as the individual suffixes would have to continue to exist for
|
|
||||||
* the other roles to trigger resulting in the transformation not producing any
|
|
||||||
* savings.
|
|
||||||
*
|
|
||||||
* Note: as \ref mergeNfaCluster is slow when the cluster sizes are large,
|
|
||||||
* clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller
|
|
||||||
* chunks for processing.
|
|
||||||
*/
|
|
||||||
void uncalcLeaves(RoseBuildImpl &tbi) {
|
|
||||||
DEBUG_PRINTF("uncalcing\n");
|
|
||||||
|
|
||||||
map<UncalcLeafKey, vector<RoseVertex> > clusters;
|
|
||||||
deque<UncalcLeafKey> ordered;
|
|
||||||
findUncalcLeavesCandidates(tbi, clusters, ordered);
|
|
||||||
|
|
||||||
vector<RoseVertex> dead;
|
|
||||||
|
|
||||||
for (const auto &key : ordered) {
|
|
||||||
DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size());
|
|
||||||
mergeCluster(tbi.g, tbi.rm, clusters[key], dead, tbi.cc);
|
|
||||||
}
|
|
||||||
tbi.removeVertices(dead);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
|
* Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
|
||||||
* reports should not contribute to the hash.
|
* reports should not contribute to the hash.
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -27,8 +27,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/** \file
|
/** \file
|
||||||
* \brief Rose Build: functions for reducing the size of the Rose graph
|
* \brief Rose Build: functions for reducing the number of engines in a Rose
|
||||||
* through merging.
|
* graph through merging or deduplicating engines.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef ROSE_BUILD_MERGE_H
|
#ifndef ROSE_BUILD_MERGE_H
|
||||||
@ -44,9 +44,6 @@ namespace ue2 {
|
|||||||
class NGHolder;
|
class NGHolder;
|
||||||
class RoseBuildImpl;
|
class RoseBuildImpl;
|
||||||
|
|
||||||
void mergeDupeLeaves(RoseBuildImpl &tbi);
|
|
||||||
void uncalcLeaves(RoseBuildImpl &tbi);
|
|
||||||
|
|
||||||
bool dedupeLeftfixes(RoseBuildImpl &tbi);
|
bool dedupeLeftfixes(RoseBuildImpl &tbi);
|
||||||
void mergeLeftfixesVariableLag(RoseBuildImpl &tbi);
|
void mergeLeftfixesVariableLag(RoseBuildImpl &tbi);
|
||||||
void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi);
|
void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi);
|
||||||
|
@ -62,6 +62,8 @@ using boost::adaptors::map_values;
|
|||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
|
static constexpr size_t MERGE_GROUP_SIZE_MAX = 200;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// Used for checking edge sets (both in- and out-) against each other.
|
// Used for checking edge sets (both in- and out-) against each other.
|
||||||
struct EdgeAndVertex {
|
struct EdgeAndVertex {
|
||||||
@ -2026,4 +2028,304 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) {
|
|||||||
assert(canImplementGraphs(build));
|
assert(canImplementGraphs(build));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
struct DupeLeafKey {
|
||||||
|
explicit DupeLeafKey(const RoseVertexProps &litv)
|
||||||
|
: literals(litv.literals), reports(litv.reports),
|
||||||
|
eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left),
|
||||||
|
som_adjust(litv.som_adjust) {
|
||||||
|
DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept);
|
||||||
|
DEBUG_PRINTF("report %u\n", left.leftfix_report);
|
||||||
|
DEBUG_PRINTF("lag %u\n", left.lag);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator<(const DupeLeafKey &b) const {
|
||||||
|
const DupeLeafKey &a = *this;
|
||||||
|
ORDER_CHECK(literals);
|
||||||
|
ORDER_CHECK(eod_accept);
|
||||||
|
ORDER_CHECK(suffix);
|
||||||
|
ORDER_CHECK(reports);
|
||||||
|
ORDER_CHECK(som_adjust);
|
||||||
|
ORDER_CHECK(left.leftfix_report);
|
||||||
|
ORDER_CHECK(left.lag);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
flat_set<u32> literals;
|
||||||
|
flat_set<ReportID> reports;
|
||||||
|
bool eod_accept;
|
||||||
|
suffix_id suffix;
|
||||||
|
LeftEngInfo left;
|
||||||
|
u32 som_adjust;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct UncalcLeafKey {
|
||||||
|
UncalcLeafKey(const RoseGraph &g, RoseVertex v)
|
||||||
|
: literals(g[v].literals), rose(g[v].left) {
|
||||||
|
for (const auto &e : in_edges_range(v, g)) {
|
||||||
|
RoseVertex u = source(e, g);
|
||||||
|
preds.insert(make_pair(u, g[e]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator<(const UncalcLeafKey &b) const {
|
||||||
|
const UncalcLeafKey &a = *this;
|
||||||
|
ORDER_CHECK(literals);
|
||||||
|
ORDER_CHECK(preds);
|
||||||
|
ORDER_CHECK(rose);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
flat_set<u32> literals;
|
||||||
|
flat_set<pair<RoseVertex, RoseEdgeProps>> preds;
|
||||||
|
LeftEngInfo rose;
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function merges leaf vertices with the same literals and report
|
||||||
|
* id/suffix. The leaf vertices of the graph are inspected and a mapping of
|
||||||
|
* leaf vertex properties to vertices is built. If the same set of leaf
|
||||||
|
* properties has already been seen when we inspect a vertex, we attempt to
|
||||||
|
* merge the vertex in with the previously seen vertex. This process can fail
|
||||||
|
* if the vertices share a common predecessor vertex but have a differing,
|
||||||
|
* incompatible relationship (different bounds or infix) with the predecessor.
|
||||||
|
*
|
||||||
|
* This takes place after \ref dedupeSuffixes to increase effectiveness as the
|
||||||
|
* same suffix is required for a merge to occur.
|
||||||
|
*
|
||||||
|
* TODO: work if this is a subset of role aliasing (and if it can be eliminated)
|
||||||
|
* or clearly document cases that would not be covered by role aliasing.
|
||||||
|
*/
|
||||||
|
void mergeDupeLeaves(RoseBuildImpl &build) {
|
||||||
|
map<DupeLeafKey, RoseVertex> leaves;
|
||||||
|
vector<RoseVertex> changed;
|
||||||
|
|
||||||
|
RoseGraph &g = build.g;
|
||||||
|
for (auto v : vertices_range(g)) {
|
||||||
|
if (in_degree(v, g) == 0) {
|
||||||
|
assert(build.isAnyStart(v));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu "
|
||||||
|
"out_degree %zu\n", g[v].index, in_degree(v, g),
|
||||||
|
out_degree(v, g));
|
||||||
|
|
||||||
|
// Vertex must be a reporting leaf node
|
||||||
|
if (g[v].reports.empty() || !isLeafNode(v, g)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// At the moment, we ignore all successors of root or anchored_root,
|
||||||
|
// since many parts of our runtime assume that these have in-degree 1.
|
||||||
|
if (build.isRootSuccessor(v)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DupeLeafKey dupe(g[v]);
|
||||||
|
if (leaves.find(dupe) == leaves.end()) {
|
||||||
|
leaves.insert(make_pair(dupe, v));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
RoseVertex t = leaves.find(dupe)->second;
|
||||||
|
DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index,
|
||||||
|
g[t].index);
|
||||||
|
|
||||||
|
vector<RoseEdge> deadEdges;
|
||||||
|
for (const auto &e : in_edges_range(v, g)) {
|
||||||
|
RoseVertex u = source(e, g);
|
||||||
|
DEBUG_PRINTF("u index=%zu\n", g[u].index);
|
||||||
|
if (RoseEdge et = edge(u, t, g)) {
|
||||||
|
if (g[et].minBound <= g[e].minBound
|
||||||
|
&& g[et].maxBound >= g[e].maxBound) {
|
||||||
|
DEBUG_PRINTF("remove more constrained edge\n");
|
||||||
|
deadEdges.push_back(e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index,
|
||||||
|
g[t].index);
|
||||||
|
add_edge(u, t, g[e], g);
|
||||||
|
deadEdges.push_back(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!deadEdges.empty()) {
|
||||||
|
for (auto &e : deadEdges) {
|
||||||
|
remove_edge(e, g);
|
||||||
|
}
|
||||||
|
changed.push_back(v);
|
||||||
|
g[t].min_offset = min(g[t].min_offset, g[v].min_offset);
|
||||||
|
g[t].max_offset = max(g[t].max_offset, g[v].max_offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DEBUG_PRINTF("find loop done\n");
|
||||||
|
|
||||||
|
// Remove any vertices that now have no in-edges.
|
||||||
|
size_t countRemovals = 0;
|
||||||
|
for (size_t i = 0; i < changed.size(); i++) {
|
||||||
|
RoseVertex v = changed[i];
|
||||||
|
if (in_degree(v, g) == 0) {
|
||||||
|
DEBUG_PRINTF("remove vertex\n");
|
||||||
|
if (!build.isVirtualVertex(v)) {
|
||||||
|
for (u32 lit_id : g[v].literals) {
|
||||||
|
build.literal_info[lit_id].vertices.erase(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
remove_vertex(v, g);
|
||||||
|
countRemovals++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we've removed anything, we need to renumber vertices
|
||||||
|
if (countRemovals) {
|
||||||
|
renumber_vertices(g);
|
||||||
|
DEBUG_PRINTF("removed %zu vertices.\n", countRemovals);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Merges the suffixes on the (identical) vertices in \a vcluster, used by
|
||||||
|
* \ref uncalcLeaves. */
|
||||||
|
static
|
||||||
|
void mergeCluster(RoseGraph &g, const ReportManager &rm,
|
||||||
|
const vector<RoseVertex> &vcluster,
|
||||||
|
vector<RoseVertex> &dead, const CompileContext &cc) {
|
||||||
|
if (vcluster.size() <= 1) {
|
||||||
|
return; // No merge to perform.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note that we batch merges up fairly crudely for performance reasons.
|
||||||
|
vector<RoseVertex>::const_iterator it = vcluster.begin(), it2;
|
||||||
|
while (it != vcluster.end()) {
|
||||||
|
vector<NGHolder *> cluster;
|
||||||
|
map<NGHolder *, RoseVertex> rev;
|
||||||
|
|
||||||
|
for (it2 = it;
|
||||||
|
it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX;
|
||||||
|
++it2) {
|
||||||
|
RoseVertex v = *it2;
|
||||||
|
NGHolder *h = g[v].suffix.graph.get();
|
||||||
|
assert(!g[v].suffix.haig); /* should not be here if haig */
|
||||||
|
rev[h] = v;
|
||||||
|
cluster.push_back(h);
|
||||||
|
}
|
||||||
|
it = it2;
|
||||||
|
|
||||||
|
DEBUG_PRINTF("merging cluster %zu\n", cluster.size());
|
||||||
|
auto merged = mergeNfaCluster(cluster, &rm, cc);
|
||||||
|
DEBUG_PRINTF("done\n");
|
||||||
|
|
||||||
|
for (const auto &m : merged) {
|
||||||
|
NGHolder *h_victim = m.first; // mergee
|
||||||
|
NGHolder *h_winner = m.second;
|
||||||
|
RoseVertex victim = rev[h_victim];
|
||||||
|
RoseVertex winner = rev[h_winner];
|
||||||
|
|
||||||
|
LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset);
|
||||||
|
ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset);
|
||||||
|
insert(&g[winner].reports, g[victim].reports);
|
||||||
|
|
||||||
|
dead.push_back(victim);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void findUncalcLeavesCandidates(RoseBuildImpl &build,
|
||||||
|
map<UncalcLeafKey, vector<RoseVertex> > &clusters,
|
||||||
|
deque<UncalcLeafKey> &ordered) {
|
||||||
|
const RoseGraph &g = build.g;
|
||||||
|
|
||||||
|
vector<RoseVertex> suffix_vertices; // vertices with suffix graphs
|
||||||
|
unordered_map<const NGHolder *, u32> fcount; // ref count per graph
|
||||||
|
|
||||||
|
for (auto v : vertices_range(g)) {
|
||||||
|
if (g[v].suffix) {
|
||||||
|
if (!g[v].suffix.graph) {
|
||||||
|
continue; /* cannot uncalc (haig/mcclellan); TODO */
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(g[v].suffix.graph->kind == NFA_SUFFIX);
|
||||||
|
|
||||||
|
// Ref count all suffixes, as we don't want to merge a suffix
|
||||||
|
// that happens to be shared with a non-leaf vertex somewhere.
|
||||||
|
DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index,
|
||||||
|
g[v].suffix.graph.get());
|
||||||
|
fcount[g[v].suffix.graph.get()]++;
|
||||||
|
|
||||||
|
// Vertex must be a reporting pseudo accept
|
||||||
|
if (!isLeafNode(v, g)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
suffix_vertices.push_back(v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto v : suffix_vertices) {
|
||||||
|
if (in_degree(v, g) == 0) {
|
||||||
|
assert(build.isAnyStart(v));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const NGHolder *h = g[v].suffix.graph.get();
|
||||||
|
assert(h);
|
||||||
|
DEBUG_PRINTF("suffix %p\n", h);
|
||||||
|
|
||||||
|
// We can't easily merge suffixes shared with other vertices, and
|
||||||
|
// creating a unique copy to do so may just mean we end up tracking
|
||||||
|
// more NFAs. Better to leave shared suffixes alone.
|
||||||
|
if (fcount[h] != 1) {
|
||||||
|
DEBUG_PRINTF("skipping shared suffix\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
UncalcLeafKey key(g, v);
|
||||||
|
vector<RoseVertex> &vec = clusters[key];
|
||||||
|
if (vec.empty()) {
|
||||||
|
|
||||||
|
ordered.push_back(key);
|
||||||
|
}
|
||||||
|
vec.push_back(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("find loop done\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function attempts to combine identical roles (same literals, same
|
||||||
|
* predecessors, etc) with different suffixes into a single role which
|
||||||
|
* activates a larger suffix. The leaf vertices of the graph with a suffix are
|
||||||
|
* grouped into clusters which have members triggered by identical roles. The
|
||||||
|
* \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised
|
||||||
|
* to build a set of larger (and still implementable) suffixes. The graph is
|
||||||
|
* then updated to point to the new suffixes and any unneeded roles are
|
||||||
|
* removed.
|
||||||
|
*
|
||||||
|
* Note: suffixes which are shared amongst multiple roles are not considered
|
||||||
|
* for this pass as the individual suffixes would have to continue to exist for
|
||||||
|
* the other roles to trigger resulting in the transformation not producing any
|
||||||
|
* savings.
|
||||||
|
*
|
||||||
|
* Note: as \ref mergeNfaCluster is slow when the cluster sizes are large,
|
||||||
|
* clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller
|
||||||
|
* chunks for processing.
|
||||||
|
*/
|
||||||
|
void uncalcLeaves(RoseBuildImpl &build) {
|
||||||
|
DEBUG_PRINTF("uncalcing\n");
|
||||||
|
|
||||||
|
map<UncalcLeafKey, vector<RoseVertex> > clusters;
|
||||||
|
deque<UncalcLeafKey> ordered;
|
||||||
|
findUncalcLeavesCandidates(build, clusters, ordered);
|
||||||
|
|
||||||
|
vector<RoseVertex> dead;
|
||||||
|
|
||||||
|
for (const auto &key : ordered) {
|
||||||
|
DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size());
|
||||||
|
mergeCluster(build.g, build.rm, clusters[key], dead, build.cc);
|
||||||
|
}
|
||||||
|
build.removeVertices(dead);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -26,8 +26,13 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef ROSE_BUILD_ROLE_ALIASING
|
#ifndef ROSE_BUILD_ROLE_ALIASING_H
|
||||||
#define ROSE_BUILD_ROLE_ALIASING
|
#define ROSE_BUILD_ROLE_ALIASING_H
|
||||||
|
|
||||||
|
/** \file
|
||||||
|
* \brief Rose Build: functions for reducing the size of the Rose graph
|
||||||
|
* through merging roles (RoseVertices) together.
|
||||||
|
*/
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
@ -35,6 +40,9 @@ class RoseBuildImpl;
|
|||||||
|
|
||||||
void aliasRoles(RoseBuildImpl &build, bool mergeRoses);
|
void aliasRoles(RoseBuildImpl &build, bool mergeRoses);
|
||||||
|
|
||||||
|
void mergeDupeLeaves(RoseBuildImpl &build);
|
||||||
|
void uncalcLeaves(RoseBuildImpl &build);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
#include "rose/rose_build.h"
|
#include "rose/rose_build.h"
|
||||||
#include "rose/rose_build_impl.h"
|
#include "rose/rose_build_impl.h"
|
||||||
#include "rose/rose_build_merge.h"
|
#include "rose/rose_build_merge.h"
|
||||||
|
#include "rose/rose_build_role_aliasing.h"
|
||||||
#include "util/report_manager.h"
|
#include "util/report_manager.h"
|
||||||
#include "util/boundary_reports.h"
|
#include "util/boundary_reports.h"
|
||||||
#include "util/compile_context.h"
|
#include "util/compile_context.h"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user