diff --git a/src/rose/rose_build_merge.cpp b/src/rose/rose_build_merge.cpp index 4c55a41f..e42e0aca 100644 --- a/src/rose/rose_build_merge.cpp +++ b/src/rose/rose_build_merge.cpp @@ -93,6 +93,7 @@ static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64; static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32; static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10; static const size_t MERGE_GROUP_SIZE_MAX = 200; +static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000; /** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */ static const size_t DFA_CHUNK_SIZE_MAX = 200; @@ -799,47 +800,69 @@ static void chunkBouquets(const Bouquet &in, } } +static +bool stringsCanFinishAtSameSpot(const ue2_literal &u, + ue2_literal::const_iterator v_b, + ue2_literal::const_iterator v_e) { + ue2_literal::const_iterator u_e = u.end(); + ue2_literal::const_iterator u_b = u.begin(); + + while (u_e != u_b && v_e != v_b) { + --u_e; + --v_e; + + if (!overlaps(*u_e, *v_e)) { + return false; + } + } + + return true; +} + /** - * Prefix analysis: For lit1 with delay1 and lit2 with delay2, let L be the - * length of the largest suffix of lit1 that is a prefix of lit2. A merge is - * bad if L - delay1 > len(lit2) - delay2. + * Check that if after u has been seen, that it is impossible for the arrival of + * v to require the inspection of an engine earlier than u did. * - * OR if we would have to check 2 literals of differing lags at the same - * point. + * Let delta be the earliest that v can be seen after u (may be zero) + * + * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta) + * ==> - ulag <= delta - vlag + * ==> vlag - ulag <= delta */ static bool checkPrefix(const rose_literal_id &ul, const u32 ulag, const rose_literal_id &vl, const u32 vlag) { - DEBUG_PRINTF("%s %s\n", escapeString(ul.s).c_str(), - escapeString(vl.s).c_str()); - if (ulag != vlag && (vl.delay || ul.delay || isSuffix(ul.s, vl.s))) { - /* rose literals should not be delayed anyway */ + DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag, + escapeString(vl.s).c_str(), vlag); + + if (vl.delay || ul.delay) { + /* engine related literals should not be delayed anyway */ return false; } - // Note that maxOverlap also picks up infixes. - size_t overlap = maxOverlap(ul, vl); - if (overlap < ulag) { - return true; /* avoiding underflow */ + if (ulag >= vlag) { + assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag); + return true; } - return overlap - ulag <= vl.elength() - vlag; + + size_t min_allowed_delta = vlag - ulag; + DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta); + + for (size_t i = 0; i < min_allowed_delta; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; + } + } + + DEBUG_PRINTF("OK\n"); + return true; } -bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, - RoseVertex v) { - assert(u != v); - - const auto &ulits = tbi.g[u].literals; - const auto &vlits = tbi.g[v].literals; - - // We cannot merge roses that prefix literals in different tables. - if (tbi.literals.right.at(*ulits.begin()).table != - tbi.literals.right.at(*vlits.begin()).table) { - DEBUG_PRINTF("literals in different tables\n"); - return false; - } - - const left_id u_left(tbi.g[u].left), v_left(tbi.g[v].left); +static +bool hasSameEngineType(const RoseVertexProps &u_prop, + const RoseVertexProps &v_prop) { + const left_id u_left(u_prop.left), v_left(v_prop.left); if (u_left.haig() || v_left.haig()) { if (u_left.graph() != v_left.graph()) { @@ -859,11 +882,68 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, } } + return true; +} + +static +bool compatibleLiteralsForMerge( + const vector> &ulits, + const vector> &vlits) { + assert(!ulits.empty()); + assert(!vlits.empty()); + + // We cannot merge engines that prefix literals in different tables. + if (ulits[0].first->table != vlits[0].first->table) { + DEBUG_PRINTF("literals in different tables\n"); + return false; + } + + /* An engine requires that all accesses to it are ordered by offsets. (ie, + we can not check an engine's state at offset Y, if we have already + checked its status at offset X and X > Y). If we can not establish that + the literals used for triggering will statisfy this property, then it is + not safe to merge the engine. */ + for (const auto &ue : ulits) { + const rose_literal_id &ul = *ue.first; + u32 ulag = ue.second; + + if (ul.delay) { + return false; // We don't handle delayed cases yet. + } + + for (const auto &ve : vlits) { + const rose_literal_id &vl = *ve.first; + u32 vlag = ve.second; + + if (vl.delay) { + return false; // We don't handle delayed cases yet. + } + + if (!checkPrefix(ul, ulag, vl, vlag) + || !checkPrefix(vl, vlag, ul, ulag)) { + DEBUG_PRINTF("prefix check failed\n"); + return false; + } + } + } + + return true; +} + +bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, + RoseVertex v) { + assert(u != v); + + if (!hasSameEngineType(tbi.g[u], tbi.g[v])) { + return false; + } + // UE-1675: in block mode, we want to be a little more selective -- only // merge prefix roses when the literal sets are the same. if (!tbi.cc.streaming && tbi.isRootSuccessor(u)) { assert(tbi.isRootSuccessor(v)); - if (ulits != vlits) { + + if (tbi.g[u].literals != tbi.g[v].literals) { DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); return false; } @@ -883,88 +963,104 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, } } - // We accept any pair of literal sets A and B where no literal in A - // contains a literal in B and no literal in B contains a literal in A. - - const u32 ulag = tbi.g[u].left.lag; - const u32 vlag = tbi.g[v].left.lag; - - for (const u32 &ulit : ulits) { - const rose_literal_id &ul = tbi.literals.right.at(ulit); - - if (ul.delay) { - return false; // We don't handle delayed cases here. - } - - for (const u32 &vlit : vlits) { - const rose_literal_id &vl = tbi.literals.right.at(vlit); - - if (vl.delay) { - return false; // We don't handle delayed cases here. - } - - if (!checkPrefix(ul, ulag, vl, vlag) || - !checkPrefix(vl, vlag, ul, ulag)) { - DEBUG_PRINTF("prefix check failed\n"); - return false; - } - } + u32 ulag = tbi.g[u].left.lag; + vector> ulits; + ulits.reserve(tbi.g[u].literals.size()); + for (u32 id : tbi.g[u].literals) { + ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag)); } - DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", - tbi.g[u].idx, tbi.g[v].idx); + u32 vlag = tbi.g[v].left.lag; + vector> vlits; + vlits.reserve(tbi.g[v].literals.size()); + for (u32 id : tbi.g[v].literals) { + vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag)); + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + + DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].idx, + tbi.g[v].idx); return true; } +/* We cannot merge an engine, if a trigger literal and a post literal overlap + * in such a way that engine status needs to be check at a point before the + * engine's current location. + * + * i.e., for a trigger literal u and a pos literal v, + * where delta is the earliest v can appear after t, + * we require that v_loc - v_lag >= u_loc + * ==> u_loc + delta - v_lag >= u_loc + * ==> delta >= v_lag + * + */ static -bool mergeableDelays(const RoseBuildImpl &tbi, const flat_set &ulits, - const flat_set &vlits, u32 vlag) { - for (const u32 &ulit : ulits) { - const rose_literal_id &ul = tbi.literals.right.at(ulit); - assert(!ul.delay); // this should never have got this far? - for (const u32 vlit : vlits) { - const rose_literal_id &vl = tbi.literals.right.at(vlit); - assert(!vl.delay); // this should never have got this far? +bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, + u32 vlag) { + DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(), + escapeString(vl.s).c_str(), vlag); - DEBUG_PRINTF("%s %s (lag %u, overlap %zu)\n", - escapeString(ul.s).c_str(), - escapeString(vl.s).c_str(), vlag, - maxOverlap(ul, vl)); - size_t l = vl.elength() - maxOverlap(ul, vl); - if (vlag > l) { - DEBUG_PRINTF("failed lag check!\n"); - return false; - } + for (size_t i = 0; i < vlag; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; } } + + DEBUG_PRINTF("OK\n"); return true; } -static +static never_inline bool checkPredDelays(const RoseBuildImpl &tbi, const deque &v1, const deque &v2) { - set preds; + flat_set preds; for (auto v : v1) { insert(&preds, inv_adjacent_vertices(v, tbi.g)); } + flat_set pred_lits; + + /* No need to examine delays of a common pred - as it must already have + * survived the delay checks. + * + * This is important when the pred is in the anchored table as + * the literal is no longer available. */ + flat_set known_good_preds; + for (auto v : v2) { + insert(&known_good_preds, inv_adjacent_vertices(v, tbi.g)); + } + for (auto u : preds) { - const auto &pred_lits = tbi.g[u].literals; - for (auto v : v2) { - u32 vlag = tbi.g[v].left.lag; - DEBUG_PRINTF("consider (%zu, %zu) lag=%u\n", tbi.g[u].idx, - tbi.g[v].idx, vlag); - if (edge_by_target(u, v, tbi.g).second) { - /* no need to examine delays as it is a common pred - so checks - * must already have survived the delay checks. - * This is important when the pred is in the anchored table as - * the literal is no longer available. */ - DEBUG_PRINTF("ok, also %zu is also a pred of %zu\n", - tbi.g[u].idx, tbi.g[v].idx); - continue; - } - if (!mergeableDelays(tbi, pred_lits, tbi.g[v].literals, vlag)) { - return false; + if (!contains(known_good_preds, &u)) { + insert(&pred_lits, tbi.g[u].literals); + } + } + + vector pred_rose_lits; + pred_rose_lits.reserve(pred_lits.size()); + for (const auto &p : pred_lits) { + pred_rose_lits.push_back(&tbi.literals.right.at(p)); + } + + for (auto v : v2) { + u32 vlag = tbi.g[v].left.lag; + if (!vlag) { + continue; + } + + for (const u32 vlit : tbi.g[v].literals) { + const rose_literal_id &vl = tbi.literals.right.at(vlit); + assert(!vl.delay); // this should never have got this far? + for (const auto &ul : pred_rose_lits) { + assert(!ul->delay); // this should never have got this far? + + if (!checkPredDelay(*ul, vl, vlag)) { + return false; + } } } } @@ -976,17 +1072,79 @@ static bool mergeableRoseVertices(const RoseBuildImpl &tbi, const deque &verts1, const deque &verts2) { - for (auto v1 : verts1) { - for (auto v2 : verts2) { - if (!mergeableRoseVertices(tbi, v1, v2)) { - return false; - } + assert(!verts1.empty()); + assert(!verts2.empty()); + + RoseVertex u_front = verts1.front(); + RoseVertex v_front = verts2.front(); + + /* all vertices must have the same engine type: assume all verts in each + * group are already of the same type */ + if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) { + return false; + } + + bool is_prefix = tbi.isRootSuccessor(u_front); + + /* We cannot merge prefixes/vertices if they are successors of different + * root vertices: similarly, assume the grouped vertices are compatible */ + if (is_prefix) { + assert(tbi.isRootSuccessor(v_front)); + set u_preds; + set v_preds; + insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g)); + insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g)); + + if (u_preds != v_preds) { + return false; } } + vector> ulits; /* lit + lag pairs */ + for (auto a : verts1) { + // UE-1675: in block mode, we want to be a little more selective -- + // only merge prefix roses when the literal sets are the same. + if (!tbi.cc.streaming && is_prefix) { + assert(tbi.isRootSuccessor(a)); + + if (tbi.g[u_front].literals != tbi.g[a].literals) { + DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); + return false; + } + } + + u32 ulag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag)); + } + } + + vector> vlits; + for (auto a : verts2) { + // UE-1675: in block mode, we want to be a little more selective -- + // only merge prefix roses when the literal sets are the same. + if (!tbi.cc.streaming && is_prefix) { + assert(tbi.isRootSuccessor(a)); + + if (tbi.g[u_front].literals != tbi.g[a].literals) { + DEBUG_PRINTF("literals aren't identical (block mode prefix)\n"); + return false; + } + } + + u32 vlag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag)); + } + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + // Check preds are compatible as well. - if (!checkPredDelays(tbi, verts1, verts2) || - !checkPredDelays(tbi, verts2, verts1)) { + if (!checkPredDelays(tbi, verts1, verts2) + || !checkPredDelays(tbi, verts2, verts1)) { return false; } @@ -1741,33 +1899,31 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) { } static -void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) { +void mergeCastleChunk(RoseBuildImpl &tbi, RoseBouquet &cands) { + /* caller must have already ensured that candidates have the same reach */ RoseGraph &g = tbi.g; - DEBUG_PRINTF("%zu castle rose merge candidates\n", roses.size()); + DEBUG_PRINTF("%zu castle rose merge candidates\n", cands.size()); deque merged; - for (auto it = roses.begin(); it != roses.end(); ++it) { + for (auto it = cands.begin(); it != cands.end(); ++it) { left_id r1 = *it; CastleProto &castle1 = *r1.castle(); - const deque &verts1 = roses.vertices(r1); + const deque &verts1 = cands.vertices(r1); merged.clear(); - for (auto jt = next(it); jt != roses.end(); ++jt) { + for (auto jt = next(it); jt != cands.end(); ++jt) { left_id r2 = *jt; CastleProto &castle2 = *r2.castle(); - const deque &verts2 = roses.vertices(r2); + const deque &verts2 = cands.vertices(r2); if (castle1.repeats.size() == castle1.max_occupancy) { DEBUG_PRINTF("castle1 has hit max occupancy\n"); break; // next castle1 } - if (castle1.reach() != castle2.reach()) { - DEBUG_PRINTF("different reach\n"); - continue; // next castle2 - } + assert(castle1.reach() == castle2.reach()); if (!mergeableRoseVertices(tbi, verts1, verts2)) { DEBUG_PRINTF("not mergeable\n"); @@ -1793,12 +1949,12 @@ void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) { } } - roses.insert(r1, verts2); + cands.insert(r1, verts2); merged.push_back(r2); } DEBUG_PRINTF("%zu roses merged\n", merged.size()); - roses.erase_all(merged.begin(), merged.end()); + cands.erase_all(merged.begin(), merged.end()); } } @@ -1924,13 +2080,13 @@ void mergeCastleLeftfixes(RoseBuildImpl &tbi) { for (auto &m : by_reach) { DEBUG_PRINTF("%zu castles for reach: %s\n", m.second.size(), describeClass(m.first).c_str()); - RoseBouquet &roses = m.second; - deque rose_groups; - chunkBouquets(roses, rose_groups, MERGE_GROUP_SIZE_MAX); - roses.clear(); + RoseBouquet &candidates = m.second; + deque cand_groups; + chunkBouquets(candidates, cand_groups, MERGE_CASTLE_GROUP_SIZE_MAX); + candidates.clear(); - for (auto &group : rose_groups) { - mergeCastleRoses(tbi, group); + for (auto &group : cand_groups) { + mergeCastleChunk(tbi, group); } } }