mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
dedupeLeftfixesVariableLag: refactor, more blockmode deduping
This commit is contained in:
parent
d6c050abd6
commit
a1fdc3afcf
@ -254,6 +254,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
std::set<u32> all_tops(const left_id &r);
|
std::set<u32> all_tops(const left_id &r);
|
||||||
|
std::set<ReportID> all_reports(const left_id &left);
|
||||||
bool isAnchored(const left_id &r);
|
bool isAnchored(const left_id &r);
|
||||||
depth findMinWidth(const left_id &r);
|
depth findMinWidth(const left_id &r);
|
||||||
depth findMaxWidth(const left_id &r);
|
depth findMaxWidth(const left_id &r);
|
||||||
|
@ -124,17 +124,17 @@ size_t small_rose_threshold(const CompileContext &cc) {
|
|||||||
* reports should not contribute to the hash.
|
* reports should not contribute to the hash.
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
size_t hashLeftfix(const LeftEngInfo &left) {
|
size_t hashLeftfix(const left_id &left) {
|
||||||
size_t val = 0;
|
size_t val = 0;
|
||||||
|
|
||||||
if (left.castle) {
|
if (left.castle()) {
|
||||||
hash_combine(val, left.castle->reach());
|
hash_combine(val, left.castle()->reach());
|
||||||
for (const auto &pr : left.castle->repeats) {
|
for (const auto &pr : left.castle()->repeats) {
|
||||||
hash_combine(val, pr.first); // top
|
hash_combine(val, pr.first); // top
|
||||||
hash_combine(val, pr.second.bounds);
|
hash_combine(val, pr.second.bounds);
|
||||||
}
|
}
|
||||||
} else if (left.graph) {
|
} else if (left.graph()) {
|
||||||
hash_combine(val, hash_holder(*left.graph));
|
hash_combine(val, hash_holder(*left.graph()));
|
||||||
}
|
}
|
||||||
|
|
||||||
return val;
|
return val;
|
||||||
@ -180,34 +180,25 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Trivial Rose comparator intended to find graphs that are identical except
|
* Intended to find graphs that are identical except for their report
|
||||||
* for their report IDs. Relies on vertex and edge indices to pick up graphs
|
* IDs. Relies on vertex and edge indices to pick up graphs that have been
|
||||||
* that have been messily put together in different orderings...
|
* messily put together in different orderings. Only implemented for castles and
|
||||||
|
* holders.
|
||||||
*/
|
*/
|
||||||
struct RoseComparator {
|
static
|
||||||
explicit RoseComparator(const RoseGraph &g_in) : g(g_in) {}
|
bool is_equal(const left_id &u_left, ReportID u_report,
|
||||||
|
const left_id &v_left, ReportID v_report) {
|
||||||
bool operator()(const RoseVertex u, const RoseVertex v) const {
|
if (u_left.castle() && v_left.castle()) {
|
||||||
const LeftEngInfo &u_left = g[u].left;
|
return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report);
|
||||||
const LeftEngInfo &v_left = g[v].left;
|
|
||||||
|
|
||||||
if (u_left.castle && v_left.castle) {
|
|
||||||
return is_equal(*u_left.castle, u_left.leftfix_report,
|
|
||||||
*v_left.castle, v_left.leftfix_report);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!u_left.graph || !v_left.graph) {
|
if (!u_left.graph() || !v_left.graph()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return is_equal(*u_left.graph, u_left.leftfix_report, *v_left.graph,
|
return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report);
|
||||||
v_left.leftfix_report);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
const RoseGraph &g;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -253,8 +244,6 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) {
|
|||||||
|
|
||||||
DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
|
DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
|
||||||
|
|
||||||
const RoseComparator rosecmp(g);
|
|
||||||
|
|
||||||
// Walk groups and dedupe the roses therein.
|
// Walk groups and dedupe the roses therein.
|
||||||
for (deque<RoseVertex> &verts : roses | map_values) {
|
for (deque<RoseVertex> &verts : roses | map_values) {
|
||||||
DEBUG_PRINTF("group has %zu vertices\n", verts.size());
|
DEBUG_PRINTF("group has %zu vertices\n", verts.size());
|
||||||
@ -272,7 +261,9 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) {
|
|||||||
|
|
||||||
// Scan the rest of the list for dupes.
|
// Scan the rest of the list for dupes.
|
||||||
for (auto kt = std::next(jt); kt != jte; ++kt) {
|
for (auto kt = std::next(jt); kt != jte; ++kt) {
|
||||||
if (g[v].left == g[*kt].left || !rosecmp(v, *kt)) {
|
if (g[v].left == g[*kt].left
|
||||||
|
|| !is_equal(g[v].left, g[v].left.leftfix_report,
|
||||||
|
g[*kt].left, g[*kt].left.leftfix_report)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1346,6 +1337,21 @@ void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) {
|
||||||
|
insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
|
||||||
|
for (auto v : vertices_range(g)) {
|
||||||
|
const auto &left = g[v].left;
|
||||||
|
if (!left) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assert(contains(all_reports(left), left.leftfix_report));
|
||||||
|
eng_verts[left].push_back(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
return eng_verts;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This pass attempts to merge prefix/infix engines which share a common set of
|
* This pass attempts to merge prefix/infix engines which share a common set of
|
||||||
* parent vertices.
|
* parent vertices.
|
||||||
@ -1377,19 +1383,11 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
|
|||||||
|
|
||||||
RoseGraph &g = build.g;
|
RoseGraph &g = build.g;
|
||||||
|
|
||||||
insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
|
|
||||||
|
|
||||||
DEBUG_PRINTF("-----\n");
|
DEBUG_PRINTF("-----\n");
|
||||||
DEBUG_PRINTF("entry\n");
|
DEBUG_PRINTF("entry\n");
|
||||||
DEBUG_PRINTF("-----\n");
|
DEBUG_PRINTF("-----\n");
|
||||||
|
|
||||||
for (auto v : vertices_range(g)) {
|
auto eng_verts = get_eng_verts(g);
|
||||||
const auto &left = g[v].left;
|
|
||||||
if (!left) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
eng_verts[left].push_back(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
map<MergeKey, vector<left_id>> engine_groups;
|
map<MergeKey, vector<left_id>> engine_groups;
|
||||||
for (const auto &e : eng_verts) {
|
for (const auto &e : eng_verts) {
|
||||||
@ -1511,13 +1509,10 @@ namespace {
|
|||||||
* Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
|
* Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
|
||||||
*/
|
*/
|
||||||
struct DedupeLeftKey {
|
struct DedupeLeftKey {
|
||||||
DedupeLeftKey(const RoseBuildImpl &build, RoseVertex v)
|
DedupeLeftKey(const RoseBuildImpl &build,
|
||||||
: left_hash(hashLeftfix(build.g[v].left)),
|
flat_set<pair<size_t, u32>> preds_in, const left_id &left)
|
||||||
transient(contains(build.transient, build.g[v].left)) {
|
: left_hash(hashLeftfix(left)), preds(move(preds_in)),
|
||||||
const auto &g = build.g;
|
transient(contains(build.transient, left)) {
|
||||||
for (const auto &e : in_edges_range(v, g)) {
|
|
||||||
preds.emplace(g[source(e, g)].index, g[e].rose_top);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator<(const DedupeLeftKey &b) const {
|
bool operator<(const DedupeLeftKey &b) const {
|
||||||
@ -1531,7 +1526,7 @@ private:
|
|||||||
size_t left_hash;
|
size_t left_hash;
|
||||||
|
|
||||||
/** For each in-edge, the pair of (parent index, edge top). */
|
/** For each in-edge, the pair of (parent index, edge top). */
|
||||||
set<pair<size_t, u32>> preds;
|
flat_set<pair<size_t, u32>> preds;
|
||||||
|
|
||||||
/** We don't want to combine transient with non-transient. */
|
/** We don't want to combine transient with non-transient. */
|
||||||
bool transient;
|
bool transient;
|
||||||
@ -1539,6 +1534,15 @@ private:
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
static
|
||||||
|
flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) {
|
||||||
|
flat_set<pair<size_t, u32>> preds;
|
||||||
|
for (const auto &e : in_edges_range(v, g)) {
|
||||||
|
preds.emplace(g[source(e, g)].index, g[e].rose_top);
|
||||||
|
}
|
||||||
|
return preds;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is a generalisation of \ref dedupeLeftfixes which relaxes two
|
* This is a generalisation of \ref dedupeLeftfixes which relaxes two
|
||||||
* restrictions: multiple predecessor roles are allowed and the delay used by
|
* restrictions: multiple predecessor roles are allowed and the delay used by
|
||||||
@ -1558,83 +1562,97 @@ private:
|
|||||||
*
|
*
|
||||||
* Note: this is unable to dedupe when delayed literals are involved unlike
|
* Note: this is unable to dedupe when delayed literals are involved unlike
|
||||||
* dedupeLeftfixes.
|
* dedupeLeftfixes.
|
||||||
*
|
|
||||||
* Note: in block mode we restrict the dedupe of prefixes further as some of
|
|
||||||
* logic checks are shared with the mergeLeftfix functions.
|
|
||||||
*/
|
*/
|
||||||
void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
|
void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
|
||||||
map<DedupeLeftKey, RoseBouquet> roseGrouping;
|
|
||||||
|
|
||||||
DEBUG_PRINTF("entry\n");
|
DEBUG_PRINTF("entry\n");
|
||||||
|
|
||||||
RoseGraph &g = build.g;
|
RoseGraph &g = build.g;
|
||||||
for (auto v : vertices_range(g)) {
|
auto eng_verts = get_eng_verts(g);
|
||||||
if (!g[v].left) {
|
|
||||||
|
map<DedupeLeftKey, vector<left_id>> engine_groups;
|
||||||
|
for (const auto &e : eng_verts) {
|
||||||
|
const left_id &left = e.first;
|
||||||
|
const auto &verts = e.second;
|
||||||
|
|
||||||
|
/* There should only be one report on an engine as no merges have
|
||||||
|
* happened yet. (aside from eod prefixes) */
|
||||||
|
if (all_reports(left).size() != 1) {
|
||||||
|
assert(any_of_in(adjacent_vertices_range(verts.front(), g),
|
||||||
|
[&](RoseVertex w) { return g[w].eod_accept; }));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const left_id leftfix(g[v].left);
|
if (left.haig()) {
|
||||||
|
/* TODO: allow deduping of identical haigs */
|
||||||
if (leftfix.haig()) {
|
|
||||||
/* TODO: allow merging of identical haigs */
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (leftfix.graph()) {
|
if (left.graph()) {
|
||||||
/* we should not have merged yet */
|
/* we should not have merged yet */
|
||||||
assert(!is_triggered(*leftfix.graph())
|
assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph()));
|
||||||
|| onlyOneTop(*leftfix.graph()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
roseGrouping[DedupeLeftKey(build, v)].insert(leftfix, v);
|
auto preds = get_pred_tops(verts.front(), g);
|
||||||
|
for (RoseVertex v : verts) {
|
||||||
|
if (preds != get_pred_tops(v, g)) {
|
||||||
|
DEBUG_PRINTF("distinct pred sets\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (RoseBouquet &roses : roseGrouping | map_values) {
|
/* We don't bother chunking as we expect deduping to be successful if the
|
||||||
DEBUG_PRINTF("group of %zu roses\n", roses.size());
|
* hashes match */
|
||||||
|
|
||||||
if (roses.size() < 2) {
|
for (auto &group : engine_groups | map_values) {
|
||||||
|
DEBUG_PRINTF("group of %zu roses\n", group.size());
|
||||||
|
|
||||||
|
if (group.size() < 2) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const RoseComparator rosecmp(g);
|
for (auto it = group.begin(); it != group.end(); ++it) {
|
||||||
|
|
||||||
for (auto it = roses.begin(); it != roses.end(); ++it) {
|
|
||||||
left_id r1 = *it;
|
left_id r1 = *it;
|
||||||
const deque<RoseVertex> &verts1 = roses.vertices(r1);
|
vector<RoseVertex> &verts1 = eng_verts[r1];
|
||||||
|
assert(!verts1.empty()); /* cleared engines should be behind us */
|
||||||
|
|
||||||
for (auto jt = next(it); jt != roses.end(); ++jt) {
|
assert(all_reports(r1).size() == 1);
|
||||||
|
ReportID r1_report = *all_reports(r1).begin();
|
||||||
|
|
||||||
|
for (auto jt = next(it); jt != group.end(); ++jt) {
|
||||||
left_id r2 = *jt;
|
left_id r2 = *jt;
|
||||||
const deque<RoseVertex> &verts2 = roses.vertices(r2);
|
vector<RoseVertex> &verts2 = eng_verts[r2];
|
||||||
|
assert(!verts2.empty());
|
||||||
|
assert(all_reports(r2).size() == 1);
|
||||||
|
ReportID r2_report = *all_reports(r2).begin();
|
||||||
|
|
||||||
if (!rosecmp(verts1.front(), verts2.front())) {
|
if (!is_equal(r1, r1_report, r2, r2_report)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mergeableRoseVertices(build, verts1, verts2)) {
|
if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
|
DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
|
||||||
|
|
||||||
// Replace h1 with h2.
|
// Replace r1 with r2.
|
||||||
|
|
||||||
const LeftEngInfo &v2_left = g[verts2.front()].left;
|
|
||||||
assert(v2_left.graph.get() == r2.graph());
|
|
||||||
|
|
||||||
for (auto v : verts1) {
|
for (auto v : verts1) {
|
||||||
DEBUG_PRINTF("replacing report %u with %u on %zu\n",
|
DEBUG_PRINTF("replacing report %u with %u on %zu\n",
|
||||||
g[v].left.leftfix_report,
|
r2_report, r1_report, g[v].index);
|
||||||
v2_left.leftfix_report, g[v].index);
|
|
||||||
u32 orig_lag = g[v].left.lag;
|
u32 orig_lag = g[v].left.lag;
|
||||||
g[v].left = v2_left;
|
g[v].left = g[verts2.front()].left;
|
||||||
g[v].left.lag = orig_lag;
|
g[v].left.lag = orig_lag;
|
||||||
}
|
}
|
||||||
roses.insert(r2, verts1);
|
|
||||||
|
insert(&verts2, verts2.end(), verts1);
|
||||||
|
verts1.clear();
|
||||||
|
|
||||||
/* remove stale entry from transient set, if present */
|
/* remove stale entry from transient set, if present */
|
||||||
build.transient.erase(r1);
|
build.transient.erase(r1);
|
||||||
|
|
||||||
// no need to erase h1 from roses, that would invalidate `it'.
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -750,6 +750,19 @@ set<u32> all_tops(const left_id &r) {
|
|||||||
return {0};
|
return {0};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
set<u32> all_reports(const left_id &left) {
|
||||||
|
assert(left.graph() || left.castle() || left.haig() || left.dfa());
|
||||||
|
if (left.graph()) {
|
||||||
|
return all_reports(*left.graph());
|
||||||
|
} else if (left.castle()) {
|
||||||
|
return all_reports(*left.castle());
|
||||||
|
} else if (left.dfa()) {
|
||||||
|
return all_reports(*left.dfa());
|
||||||
|
} else {
|
||||||
|
return all_reports(*left.haig());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
u32 num_tops(const left_id &r) {
|
u32 num_tops(const left_id &r) {
|
||||||
return all_tops(r).size();
|
return all_tops(r).size();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user