Rework literal overlap checks for merging engines

Also increase the size of chunks we consider merging for castles.
This commit is contained in:
Alex Coyte 2015-12-02 15:15:02 +11:00 committed by Matthew Barr
parent 05beadf52f
commit b9c5d65f0e

View File

@ -93,6 +93,7 @@ static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
static const size_t MERGE_GROUP_SIZE_MAX = 200;
static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000;
/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
static const size_t DFA_CHUNK_SIZE_MAX = 200;
@ -799,47 +800,69 @@ static void chunkBouquets(const Bouquet<EngineRef> &in,
}
}
static
bool stringsCanFinishAtSameSpot(const ue2_literal &u,
ue2_literal::const_iterator v_b,
ue2_literal::const_iterator v_e) {
ue2_literal::const_iterator u_e = u.end();
ue2_literal::const_iterator u_b = u.begin();
while (u_e != u_b && v_e != v_b) {
--u_e;
--v_e;
if (!overlaps(*u_e, *v_e)) {
return false;
}
}
return true;
}
/**
* Prefix analysis: For lit1 with delay1 and lit2 with delay2, let L be the
* length of the largest suffix of lit1 that is a prefix of lit2. A merge is
* bad if L - delay1 > len(lit2) - delay2.
* Check that if after u has been seen, that it is impossible for the arrival of
* v to require the inspection of an engine earlier than u did.
*
* OR if we would have to check 2 literals of differing lags at the same
* point.
* Let delta be the earliest that v can be seen after u (may be zero)
*
* ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
* ==> - ulag <= delta - vlag
* ==> vlag - ulag <= delta
*/
static
bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
const rose_literal_id &vl, const u32 vlag) {
DEBUG_PRINTF("%s %s\n", escapeString(ul.s).c_str(),
escapeString(vl.s).c_str());
if (ulag != vlag && (vl.delay || ul.delay || isSuffix(ul.s, vl.s))) {
/* rose literals should not be delayed anyway */
DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
escapeString(vl.s).c_str(), vlag);
if (vl.delay || ul.delay) {
/* engine related literals should not be delayed anyway */
return false;
}
// Note that maxOverlap also picks up infixes.
size_t overlap = maxOverlap(ul, vl);
if (overlap < ulag) {
return true; /* avoiding underflow */
if (ulag >= vlag) {
assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
return true;
}
return overlap - ulag <= vl.elength() - vlag;
size_t min_allowed_delta = vlag - ulag;
DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
for (size_t i = 0; i < min_allowed_delta; i++) {
if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
return false;
}
}
DEBUG_PRINTF("OK\n");
return true;
}
bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
RoseVertex v) {
assert(u != v);
const auto &ulits = tbi.g[u].literals;
const auto &vlits = tbi.g[v].literals;
// We cannot merge roses that prefix literals in different tables.
if (tbi.literals.right.at(*ulits.begin()).table !=
tbi.literals.right.at(*vlits.begin()).table) {
DEBUG_PRINTF("literals in different tables\n");
return false;
}
const left_id u_left(tbi.g[u].left), v_left(tbi.g[v].left);
static
bool hasSameEngineType(const RoseVertexProps &u_prop,
const RoseVertexProps &v_prop) {
const left_id u_left(u_prop.left), v_left(v_prop.left);
if (u_left.haig() || v_left.haig()) {
if (u_left.graph() != v_left.graph()) {
@ -859,11 +882,68 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
}
}
return true;
}
static
bool compatibleLiteralsForMerge(
const vector<pair<const rose_literal_id *, u32>> &ulits,
const vector<pair<const rose_literal_id *, u32>> &vlits) {
assert(!ulits.empty());
assert(!vlits.empty());
// We cannot merge engines that prefix literals in different tables.
if (ulits[0].first->table != vlits[0].first->table) {
DEBUG_PRINTF("literals in different tables\n");
return false;
}
/* An engine requires that all accesses to it are ordered by offsets. (ie,
we can not check an engine's state at offset Y, if we have already
checked its status at offset X and X > Y). If we can not establish that
the literals used for triggering will statisfy this property, then it is
not safe to merge the engine. */
for (const auto &ue : ulits) {
const rose_literal_id &ul = *ue.first;
u32 ulag = ue.second;
if (ul.delay) {
return false; // We don't handle delayed cases yet.
}
for (const auto &ve : vlits) {
const rose_literal_id &vl = *ve.first;
u32 vlag = ve.second;
if (vl.delay) {
return false; // We don't handle delayed cases yet.
}
if (!checkPrefix(ul, ulag, vl, vlag)
|| !checkPrefix(vl, vlag, ul, ulag)) {
DEBUG_PRINTF("prefix check failed\n");
return false;
}
}
}
return true;
}
bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
RoseVertex v) {
assert(u != v);
if (!hasSameEngineType(tbi.g[u], tbi.g[v])) {
return false;
}
// UE-1675: in block mode, we want to be a little more selective -- only
// merge prefix roses when the literal sets are the same.
if (!tbi.cc.streaming && tbi.isRootSuccessor(u)) {
assert(tbi.isRootSuccessor(v));
if (ulits != vlits) {
if (tbi.g[u].literals != tbi.g[v].literals) {
DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
return false;
}
@ -883,88 +963,104 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
}
}
// We accept any pair of literal sets A and B where no literal in A
// contains a literal in B and no literal in B contains a literal in A.
const u32 ulag = tbi.g[u].left.lag;
const u32 vlag = tbi.g[v].left.lag;
for (const u32 &ulit : ulits) {
const rose_literal_id &ul = tbi.literals.right.at(ulit);
if (ul.delay) {
return false; // We don't handle delayed cases here.
}
for (const u32 &vlit : vlits) {
const rose_literal_id &vl = tbi.literals.right.at(vlit);
if (vl.delay) {
return false; // We don't handle delayed cases here.
}
if (!checkPrefix(ul, ulag, vl, vlag) ||
!checkPrefix(vl, vlag, ul, ulag)) {
DEBUG_PRINTF("prefix check failed\n");
return false;
}
}
u32 ulag = tbi.g[u].left.lag;
vector<pair<const rose_literal_id *, u32>> ulits;
ulits.reserve(tbi.g[u].literals.size());
for (u32 id : tbi.g[u].literals) {
ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag));
}
DEBUG_PRINTF("roses on %zu and %zu are mergeable\n",
tbi.g[u].idx, tbi.g[v].idx);
u32 vlag = tbi.g[v].left.lag;
vector<pair<const rose_literal_id *, u32>> vlits;
vlits.reserve(tbi.g[v].literals.size());
for (u32 id : tbi.g[v].literals) {
vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag));
}
if (!compatibleLiteralsForMerge(ulits, vlits)) {
return false;
}
DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].idx,
tbi.g[v].idx);
return true;
}
/* We cannot merge an engine, if a trigger literal and a post literal overlap
* in such a way that engine status needs to be check at a point before the
* engine's current location.
*
* i.e., for a trigger literal u and a pos literal v,
* where delta is the earliest v can appear after t,
* we require that v_loc - v_lag >= u_loc
* ==> u_loc + delta - v_lag >= u_loc
* ==> delta >= v_lag
*
*/
static
bool mergeableDelays(const RoseBuildImpl &tbi, const flat_set<u32> &ulits,
const flat_set<u32> &vlits, u32 vlag) {
for (const u32 &ulit : ulits) {
const rose_literal_id &ul = tbi.literals.right.at(ulit);
assert(!ul.delay); // this should never have got this far?
for (const u32 vlit : vlits) {
const rose_literal_id &vl = tbi.literals.right.at(vlit);
assert(!vl.delay); // this should never have got this far?
bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
u32 vlag) {
DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
escapeString(vl.s).c_str(), vlag);
DEBUG_PRINTF("%s %s (lag %u, overlap %zu)\n",
escapeString(ul.s).c_str(),
escapeString(vl.s).c_str(), vlag,
maxOverlap(ul, vl));
size_t l = vl.elength() - maxOverlap(ul, vl);
if (vlag > l) {
DEBUG_PRINTF("failed lag check!\n");
return false;
}
for (size_t i = 0; i < vlag; i++) {
if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
return false;
}
}
DEBUG_PRINTF("OK\n");
return true;
}
static
static never_inline
bool checkPredDelays(const RoseBuildImpl &tbi, const deque<RoseVertex> &v1,
const deque<RoseVertex> &v2) {
set<RoseVertex> preds;
flat_set<RoseVertex> preds;
for (auto v : v1) {
insert(&preds, inv_adjacent_vertices(v, tbi.g));
}
flat_set<u32> pred_lits;
/* No need to examine delays of a common pred - as it must already have
* survived the delay checks.
*
* This is important when the pred is in the anchored table as
* the literal is no longer available. */
flat_set<RoseVertex> known_good_preds;
for (auto v : v2) {
insert(&known_good_preds, inv_adjacent_vertices(v, tbi.g));
}
for (auto u : preds) {
const auto &pred_lits = tbi.g[u].literals;
for (auto v : v2) {
u32 vlag = tbi.g[v].left.lag;
DEBUG_PRINTF("consider (%zu, %zu) lag=%u\n", tbi.g[u].idx,
tbi.g[v].idx, vlag);
if (edge_by_target(u, v, tbi.g).second) {
/* no need to examine delays as it is a common pred - so checks
* must already have survived the delay checks.
* This is important when the pred is in the anchored table as
* the literal is no longer available. */
DEBUG_PRINTF("ok, also %zu is also a pred of %zu\n",
tbi.g[u].idx, tbi.g[v].idx);
continue;
}
if (!mergeableDelays(tbi, pred_lits, tbi.g[v].literals, vlag)) {
return false;
if (!contains(known_good_preds, &u)) {
insert(&pred_lits, tbi.g[u].literals);
}
}
vector<const rose_literal_id *> pred_rose_lits;
pred_rose_lits.reserve(pred_lits.size());
for (const auto &p : pred_lits) {
pred_rose_lits.push_back(&tbi.literals.right.at(p));
}
for (auto v : v2) {
u32 vlag = tbi.g[v].left.lag;
if (!vlag) {
continue;
}
for (const u32 vlit : tbi.g[v].literals) {
const rose_literal_id &vl = tbi.literals.right.at(vlit);
assert(!vl.delay); // this should never have got this far?
for (const auto &ul : pred_rose_lits) {
assert(!ul->delay); // this should never have got this far?
if (!checkPredDelay(*ul, vl, vlag)) {
return false;
}
}
}
}
@ -976,17 +1072,79 @@ static
bool mergeableRoseVertices(const RoseBuildImpl &tbi,
const deque<RoseVertex> &verts1,
const deque<RoseVertex> &verts2) {
for (auto v1 : verts1) {
for (auto v2 : verts2) {
if (!mergeableRoseVertices(tbi, v1, v2)) {
return false;
}
assert(!verts1.empty());
assert(!verts2.empty());
RoseVertex u_front = verts1.front();
RoseVertex v_front = verts2.front();
/* all vertices must have the same engine type: assume all verts in each
* group are already of the same type */
if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) {
return false;
}
bool is_prefix = tbi.isRootSuccessor(u_front);
/* We cannot merge prefixes/vertices if they are successors of different
* root vertices: similarly, assume the grouped vertices are compatible */
if (is_prefix) {
assert(tbi.isRootSuccessor(v_front));
set<RoseVertex> u_preds;
set<RoseVertex> v_preds;
insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g));
insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g));
if (u_preds != v_preds) {
return false;
}
}
vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
for (auto a : verts1) {
// UE-1675: in block mode, we want to be a little more selective --
// only merge prefix roses when the literal sets are the same.
if (!tbi.cc.streaming && is_prefix) {
assert(tbi.isRootSuccessor(a));
if (tbi.g[u_front].literals != tbi.g[a].literals) {
DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
return false;
}
}
u32 ulag = tbi.g[a].left.lag;
for (u32 id : tbi.g[a].literals) {
ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag));
}
}
vector<pair<const rose_literal_id *, u32>> vlits;
for (auto a : verts2) {
// UE-1675: in block mode, we want to be a little more selective --
// only merge prefix roses when the literal sets are the same.
if (!tbi.cc.streaming && is_prefix) {
assert(tbi.isRootSuccessor(a));
if (tbi.g[u_front].literals != tbi.g[a].literals) {
DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
return false;
}
}
u32 vlag = tbi.g[a].left.lag;
for (u32 id : tbi.g[a].literals) {
vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag));
}
}
if (!compatibleLiteralsForMerge(ulits, vlits)) {
return false;
}
// Check preds are compatible as well.
if (!checkPredDelays(tbi, verts1, verts2) ||
!checkPredDelays(tbi, verts2, verts1)) {
if (!checkPredDelays(tbi, verts1, verts2)
|| !checkPredDelays(tbi, verts2, verts1)) {
return false;
}
@ -1741,33 +1899,31 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) {
}
static
void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) {
void mergeCastleChunk(RoseBuildImpl &tbi, RoseBouquet &cands) {
/* caller must have already ensured that candidates have the same reach */
RoseGraph &g = tbi.g;
DEBUG_PRINTF("%zu castle rose merge candidates\n", roses.size());
DEBUG_PRINTF("%zu castle rose merge candidates\n", cands.size());
deque<left_id> merged;
for (auto it = roses.begin(); it != roses.end(); ++it) {
for (auto it = cands.begin(); it != cands.end(); ++it) {
left_id r1 = *it;
CastleProto &castle1 = *r1.castle();
const deque<RoseVertex> &verts1 = roses.vertices(r1);
const deque<RoseVertex> &verts1 = cands.vertices(r1);
merged.clear();
for (auto jt = next(it); jt != roses.end(); ++jt) {
for (auto jt = next(it); jt != cands.end(); ++jt) {
left_id r2 = *jt;
CastleProto &castle2 = *r2.castle();
const deque<RoseVertex> &verts2 = roses.vertices(r2);
const deque<RoseVertex> &verts2 = cands.vertices(r2);
if (castle1.repeats.size() == castle1.max_occupancy) {
DEBUG_PRINTF("castle1 has hit max occupancy\n");
break; // next castle1
}
if (castle1.reach() != castle2.reach()) {
DEBUG_PRINTF("different reach\n");
continue; // next castle2
}
assert(castle1.reach() == castle2.reach());
if (!mergeableRoseVertices(tbi, verts1, verts2)) {
DEBUG_PRINTF("not mergeable\n");
@ -1793,12 +1949,12 @@ void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) {
}
}
roses.insert(r1, verts2);
cands.insert(r1, verts2);
merged.push_back(r2);
}
DEBUG_PRINTF("%zu roses merged\n", merged.size());
roses.erase_all(merged.begin(), merged.end());
cands.erase_all(merged.begin(), merged.end());
}
}
@ -1924,13 +2080,13 @@ void mergeCastleLeftfixes(RoseBuildImpl &tbi) {
for (auto &m : by_reach) {
DEBUG_PRINTF("%zu castles for reach: %s\n", m.second.size(),
describeClass(m.first).c_str());
RoseBouquet &roses = m.second;
deque<RoseBouquet> rose_groups;
chunkBouquets(roses, rose_groups, MERGE_GROUP_SIZE_MAX);
roses.clear();
RoseBouquet &candidates = m.second;
deque<RoseBouquet> cand_groups;
chunkBouquets(candidates, cand_groups, MERGE_CASTLE_GROUP_SIZE_MAX);
candidates.clear();
for (auto &group : rose_groups) {
mergeCastleRoses(tbi, group);
for (auto &group : cand_groups) {
mergeCastleChunk(tbi, group);
}
}
}