mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Rework literal overlap checks for merging engines
Also increase the size of chunks we consider merging for castles.
This commit is contained in:
parent
05beadf52f
commit
b9c5d65f0e
@ -93,6 +93,7 @@ static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
|
||||
static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
|
||||
static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
|
||||
static const size_t MERGE_GROUP_SIZE_MAX = 200;
|
||||
static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000;
|
||||
|
||||
/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
|
||||
static const size_t DFA_CHUNK_SIZE_MAX = 200;
|
||||
@ -799,47 +800,69 @@ static void chunkBouquets(const Bouquet<EngineRef> &in,
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool stringsCanFinishAtSameSpot(const ue2_literal &u,
|
||||
ue2_literal::const_iterator v_b,
|
||||
ue2_literal::const_iterator v_e) {
|
||||
ue2_literal::const_iterator u_e = u.end();
|
||||
ue2_literal::const_iterator u_b = u.begin();
|
||||
|
||||
while (u_e != u_b && v_e != v_b) {
|
||||
--u_e;
|
||||
--v_e;
|
||||
|
||||
if (!overlaps(*u_e, *v_e)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefix analysis: For lit1 with delay1 and lit2 with delay2, let L be the
|
||||
* length of the largest suffix of lit1 that is a prefix of lit2. A merge is
|
||||
* bad if L - delay1 > len(lit2) - delay2.
|
||||
* Check that if after u has been seen, that it is impossible for the arrival of
|
||||
* v to require the inspection of an engine earlier than u did.
|
||||
*
|
||||
* OR if we would have to check 2 literals of differing lags at the same
|
||||
* point.
|
||||
* Let delta be the earliest that v can be seen after u (may be zero)
|
||||
*
|
||||
* ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
|
||||
* ==> - ulag <= delta - vlag
|
||||
* ==> vlag - ulag <= delta
|
||||
*/
|
||||
static
|
||||
bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
|
||||
const rose_literal_id &vl, const u32 vlag) {
|
||||
DEBUG_PRINTF("%s %s\n", escapeString(ul.s).c_str(),
|
||||
escapeString(vl.s).c_str());
|
||||
if (ulag != vlag && (vl.delay || ul.delay || isSuffix(ul.s, vl.s))) {
|
||||
/* rose literals should not be delayed anyway */
|
||||
DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
|
||||
escapeString(vl.s).c_str(), vlag);
|
||||
|
||||
if (vl.delay || ul.delay) {
|
||||
/* engine related literals should not be delayed anyway */
|
||||
return false;
|
||||
}
|
||||
|
||||
// Note that maxOverlap also picks up infixes.
|
||||
size_t overlap = maxOverlap(ul, vl);
|
||||
if (overlap < ulag) {
|
||||
return true; /* avoiding underflow */
|
||||
if (ulag >= vlag) {
|
||||
assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
|
||||
return true;
|
||||
}
|
||||
return overlap - ulag <= vl.elength() - vlag;
|
||||
|
||||
size_t min_allowed_delta = vlag - ulag;
|
||||
DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
|
||||
|
||||
for (size_t i = 0; i < min_allowed_delta; i++) {
|
||||
if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
|
||||
DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("OK\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
|
||||
RoseVertex v) {
|
||||
assert(u != v);
|
||||
|
||||
const auto &ulits = tbi.g[u].literals;
|
||||
const auto &vlits = tbi.g[v].literals;
|
||||
|
||||
// We cannot merge roses that prefix literals in different tables.
|
||||
if (tbi.literals.right.at(*ulits.begin()).table !=
|
||||
tbi.literals.right.at(*vlits.begin()).table) {
|
||||
DEBUG_PRINTF("literals in different tables\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const left_id u_left(tbi.g[u].left), v_left(tbi.g[v].left);
|
||||
static
|
||||
bool hasSameEngineType(const RoseVertexProps &u_prop,
|
||||
const RoseVertexProps &v_prop) {
|
||||
const left_id u_left(u_prop.left), v_left(v_prop.left);
|
||||
|
||||
if (u_left.haig() || v_left.haig()) {
|
||||
if (u_left.graph() != v_left.graph()) {
|
||||
@ -859,11 +882,68 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool compatibleLiteralsForMerge(
|
||||
const vector<pair<const rose_literal_id *, u32>> &ulits,
|
||||
const vector<pair<const rose_literal_id *, u32>> &vlits) {
|
||||
assert(!ulits.empty());
|
||||
assert(!vlits.empty());
|
||||
|
||||
// We cannot merge engines that prefix literals in different tables.
|
||||
if (ulits[0].first->table != vlits[0].first->table) {
|
||||
DEBUG_PRINTF("literals in different tables\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* An engine requires that all accesses to it are ordered by offsets. (ie,
|
||||
we can not check an engine's state at offset Y, if we have already
|
||||
checked its status at offset X and X > Y). If we can not establish that
|
||||
the literals used for triggering will statisfy this property, then it is
|
||||
not safe to merge the engine. */
|
||||
for (const auto &ue : ulits) {
|
||||
const rose_literal_id &ul = *ue.first;
|
||||
u32 ulag = ue.second;
|
||||
|
||||
if (ul.delay) {
|
||||
return false; // We don't handle delayed cases yet.
|
||||
}
|
||||
|
||||
for (const auto &ve : vlits) {
|
||||
const rose_literal_id &vl = *ve.first;
|
||||
u32 vlag = ve.second;
|
||||
|
||||
if (vl.delay) {
|
||||
return false; // We don't handle delayed cases yet.
|
||||
}
|
||||
|
||||
if (!checkPrefix(ul, ulag, vl, vlag)
|
||||
|| !checkPrefix(vl, vlag, ul, ulag)) {
|
||||
DEBUG_PRINTF("prefix check failed\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
|
||||
RoseVertex v) {
|
||||
assert(u != v);
|
||||
|
||||
if (!hasSameEngineType(tbi.g[u], tbi.g[v])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// UE-1675: in block mode, we want to be a little more selective -- only
|
||||
// merge prefix roses when the literal sets are the same.
|
||||
if (!tbi.cc.streaming && tbi.isRootSuccessor(u)) {
|
||||
assert(tbi.isRootSuccessor(v));
|
||||
if (ulits != vlits) {
|
||||
|
||||
if (tbi.g[u].literals != tbi.g[v].literals) {
|
||||
DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
|
||||
return false;
|
||||
}
|
||||
@ -883,88 +963,104 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
|
||||
}
|
||||
}
|
||||
|
||||
// We accept any pair of literal sets A and B where no literal in A
|
||||
// contains a literal in B and no literal in B contains a literal in A.
|
||||
|
||||
const u32 ulag = tbi.g[u].left.lag;
|
||||
const u32 vlag = tbi.g[v].left.lag;
|
||||
|
||||
for (const u32 &ulit : ulits) {
|
||||
const rose_literal_id &ul = tbi.literals.right.at(ulit);
|
||||
|
||||
if (ul.delay) {
|
||||
return false; // We don't handle delayed cases here.
|
||||
}
|
||||
|
||||
for (const u32 &vlit : vlits) {
|
||||
const rose_literal_id &vl = tbi.literals.right.at(vlit);
|
||||
|
||||
if (vl.delay) {
|
||||
return false; // We don't handle delayed cases here.
|
||||
}
|
||||
|
||||
if (!checkPrefix(ul, ulag, vl, vlag) ||
|
||||
!checkPrefix(vl, vlag, ul, ulag)) {
|
||||
DEBUG_PRINTF("prefix check failed\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
u32 ulag = tbi.g[u].left.lag;
|
||||
vector<pair<const rose_literal_id *, u32>> ulits;
|
||||
ulits.reserve(tbi.g[u].literals.size());
|
||||
for (u32 id : tbi.g[u].literals) {
|
||||
ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag));
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("roses on %zu and %zu are mergeable\n",
|
||||
tbi.g[u].idx, tbi.g[v].idx);
|
||||
u32 vlag = tbi.g[v].left.lag;
|
||||
vector<pair<const rose_literal_id *, u32>> vlits;
|
||||
vlits.reserve(tbi.g[v].literals.size());
|
||||
for (u32 id : tbi.g[v].literals) {
|
||||
vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag));
|
||||
}
|
||||
|
||||
if (!compatibleLiteralsForMerge(ulits, vlits)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].idx,
|
||||
tbi.g[v].idx);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* We cannot merge an engine, if a trigger literal and a post literal overlap
|
||||
* in such a way that engine status needs to be check at a point before the
|
||||
* engine's current location.
|
||||
*
|
||||
* i.e., for a trigger literal u and a pos literal v,
|
||||
* where delta is the earliest v can appear after t,
|
||||
* we require that v_loc - v_lag >= u_loc
|
||||
* ==> u_loc + delta - v_lag >= u_loc
|
||||
* ==> delta >= v_lag
|
||||
*
|
||||
*/
|
||||
static
|
||||
bool mergeableDelays(const RoseBuildImpl &tbi, const flat_set<u32> &ulits,
|
||||
const flat_set<u32> &vlits, u32 vlag) {
|
||||
for (const u32 &ulit : ulits) {
|
||||
const rose_literal_id &ul = tbi.literals.right.at(ulit);
|
||||
assert(!ul.delay); // this should never have got this far?
|
||||
for (const u32 vlit : vlits) {
|
||||
const rose_literal_id &vl = tbi.literals.right.at(vlit);
|
||||
assert(!vl.delay); // this should never have got this far?
|
||||
bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
|
||||
u32 vlag) {
|
||||
DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
|
||||
escapeString(vl.s).c_str(), vlag);
|
||||
|
||||
DEBUG_PRINTF("%s %s (lag %u, overlap %zu)\n",
|
||||
escapeString(ul.s).c_str(),
|
||||
escapeString(vl.s).c_str(), vlag,
|
||||
maxOverlap(ul, vl));
|
||||
size_t l = vl.elength() - maxOverlap(ul, vl);
|
||||
if (vlag > l) {
|
||||
DEBUG_PRINTF("failed lag check!\n");
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < vlag; i++) {
|
||||
if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
|
||||
DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("OK\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
static never_inline
|
||||
bool checkPredDelays(const RoseBuildImpl &tbi, const deque<RoseVertex> &v1,
|
||||
const deque<RoseVertex> &v2) {
|
||||
set<RoseVertex> preds;
|
||||
flat_set<RoseVertex> preds;
|
||||
for (auto v : v1) {
|
||||
insert(&preds, inv_adjacent_vertices(v, tbi.g));
|
||||
}
|
||||
|
||||
flat_set<u32> pred_lits;
|
||||
|
||||
/* No need to examine delays of a common pred - as it must already have
|
||||
* survived the delay checks.
|
||||
*
|
||||
* This is important when the pred is in the anchored table as
|
||||
* the literal is no longer available. */
|
||||
flat_set<RoseVertex> known_good_preds;
|
||||
for (auto v : v2) {
|
||||
insert(&known_good_preds, inv_adjacent_vertices(v, tbi.g));
|
||||
}
|
||||
|
||||
for (auto u : preds) {
|
||||
const auto &pred_lits = tbi.g[u].literals;
|
||||
for (auto v : v2) {
|
||||
u32 vlag = tbi.g[v].left.lag;
|
||||
DEBUG_PRINTF("consider (%zu, %zu) lag=%u\n", tbi.g[u].idx,
|
||||
tbi.g[v].idx, vlag);
|
||||
if (edge_by_target(u, v, tbi.g).second) {
|
||||
/* no need to examine delays as it is a common pred - so checks
|
||||
* must already have survived the delay checks.
|
||||
* This is important when the pred is in the anchored table as
|
||||
* the literal is no longer available. */
|
||||
DEBUG_PRINTF("ok, also %zu is also a pred of %zu\n",
|
||||
tbi.g[u].idx, tbi.g[v].idx);
|
||||
continue;
|
||||
}
|
||||
if (!mergeableDelays(tbi, pred_lits, tbi.g[v].literals, vlag)) {
|
||||
return false;
|
||||
if (!contains(known_good_preds, &u)) {
|
||||
insert(&pred_lits, tbi.g[u].literals);
|
||||
}
|
||||
}
|
||||
|
||||
vector<const rose_literal_id *> pred_rose_lits;
|
||||
pred_rose_lits.reserve(pred_lits.size());
|
||||
for (const auto &p : pred_lits) {
|
||||
pred_rose_lits.push_back(&tbi.literals.right.at(p));
|
||||
}
|
||||
|
||||
for (auto v : v2) {
|
||||
u32 vlag = tbi.g[v].left.lag;
|
||||
if (!vlag) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const u32 vlit : tbi.g[v].literals) {
|
||||
const rose_literal_id &vl = tbi.literals.right.at(vlit);
|
||||
assert(!vl.delay); // this should never have got this far?
|
||||
for (const auto &ul : pred_rose_lits) {
|
||||
assert(!ul->delay); // this should never have got this far?
|
||||
|
||||
if (!checkPredDelay(*ul, vl, vlag)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -976,17 +1072,79 @@ static
|
||||
bool mergeableRoseVertices(const RoseBuildImpl &tbi,
|
||||
const deque<RoseVertex> &verts1,
|
||||
const deque<RoseVertex> &verts2) {
|
||||
for (auto v1 : verts1) {
|
||||
for (auto v2 : verts2) {
|
||||
if (!mergeableRoseVertices(tbi, v1, v2)) {
|
||||
return false;
|
||||
}
|
||||
assert(!verts1.empty());
|
||||
assert(!verts2.empty());
|
||||
|
||||
RoseVertex u_front = verts1.front();
|
||||
RoseVertex v_front = verts2.front();
|
||||
|
||||
/* all vertices must have the same engine type: assume all verts in each
|
||||
* group are already of the same type */
|
||||
if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_prefix = tbi.isRootSuccessor(u_front);
|
||||
|
||||
/* We cannot merge prefixes/vertices if they are successors of different
|
||||
* root vertices: similarly, assume the grouped vertices are compatible */
|
||||
if (is_prefix) {
|
||||
assert(tbi.isRootSuccessor(v_front));
|
||||
set<RoseVertex> u_preds;
|
||||
set<RoseVertex> v_preds;
|
||||
insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g));
|
||||
insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g));
|
||||
|
||||
if (u_preds != v_preds) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
|
||||
for (auto a : verts1) {
|
||||
// UE-1675: in block mode, we want to be a little more selective --
|
||||
// only merge prefix roses when the literal sets are the same.
|
||||
if (!tbi.cc.streaming && is_prefix) {
|
||||
assert(tbi.isRootSuccessor(a));
|
||||
|
||||
if (tbi.g[u_front].literals != tbi.g[a].literals) {
|
||||
DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
u32 ulag = tbi.g[a].left.lag;
|
||||
for (u32 id : tbi.g[a].literals) {
|
||||
ulits.push_back(make_pair(&tbi.literals.right.at(id), ulag));
|
||||
}
|
||||
}
|
||||
|
||||
vector<pair<const rose_literal_id *, u32>> vlits;
|
||||
for (auto a : verts2) {
|
||||
// UE-1675: in block mode, we want to be a little more selective --
|
||||
// only merge prefix roses when the literal sets are the same.
|
||||
if (!tbi.cc.streaming && is_prefix) {
|
||||
assert(tbi.isRootSuccessor(a));
|
||||
|
||||
if (tbi.g[u_front].literals != tbi.g[a].literals) {
|
||||
DEBUG_PRINTF("literals aren't identical (block mode prefix)\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
u32 vlag = tbi.g[a].left.lag;
|
||||
for (u32 id : tbi.g[a].literals) {
|
||||
vlits.push_back(make_pair(&tbi.literals.right.at(id), vlag));
|
||||
}
|
||||
}
|
||||
|
||||
if (!compatibleLiteralsForMerge(ulits, vlits)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check preds are compatible as well.
|
||||
if (!checkPredDelays(tbi, verts1, verts2) ||
|
||||
!checkPredDelays(tbi, verts2, verts1)) {
|
||||
if (!checkPredDelays(tbi, verts1, verts2)
|
||||
|| !checkPredDelays(tbi, verts2, verts1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1741,33 +1899,31 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, RoseBouquet &roses) {
|
||||
}
|
||||
|
||||
static
|
||||
void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) {
|
||||
void mergeCastleChunk(RoseBuildImpl &tbi, RoseBouquet &cands) {
|
||||
/* caller must have already ensured that candidates have the same reach */
|
||||
RoseGraph &g = tbi.g;
|
||||
DEBUG_PRINTF("%zu castle rose merge candidates\n", roses.size());
|
||||
DEBUG_PRINTF("%zu castle rose merge candidates\n", cands.size());
|
||||
|
||||
deque<left_id> merged;
|
||||
|
||||
for (auto it = roses.begin(); it != roses.end(); ++it) {
|
||||
for (auto it = cands.begin(); it != cands.end(); ++it) {
|
||||
left_id r1 = *it;
|
||||
CastleProto &castle1 = *r1.castle();
|
||||
const deque<RoseVertex> &verts1 = roses.vertices(r1);
|
||||
const deque<RoseVertex> &verts1 = cands.vertices(r1);
|
||||
|
||||
merged.clear();
|
||||
|
||||
for (auto jt = next(it); jt != roses.end(); ++jt) {
|
||||
for (auto jt = next(it); jt != cands.end(); ++jt) {
|
||||
left_id r2 = *jt;
|
||||
CastleProto &castle2 = *r2.castle();
|
||||
const deque<RoseVertex> &verts2 = roses.vertices(r2);
|
||||
const deque<RoseVertex> &verts2 = cands.vertices(r2);
|
||||
|
||||
if (castle1.repeats.size() == castle1.max_occupancy) {
|
||||
DEBUG_PRINTF("castle1 has hit max occupancy\n");
|
||||
break; // next castle1
|
||||
}
|
||||
|
||||
if (castle1.reach() != castle2.reach()) {
|
||||
DEBUG_PRINTF("different reach\n");
|
||||
continue; // next castle2
|
||||
}
|
||||
assert(castle1.reach() == castle2.reach());
|
||||
|
||||
if (!mergeableRoseVertices(tbi, verts1, verts2)) {
|
||||
DEBUG_PRINTF("not mergeable\n");
|
||||
@ -1793,12 +1949,12 @@ void mergeCastleRoses(RoseBuildImpl &tbi, RoseBouquet &roses) {
|
||||
}
|
||||
}
|
||||
|
||||
roses.insert(r1, verts2);
|
||||
cands.insert(r1, verts2);
|
||||
merged.push_back(r2);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%zu roses merged\n", merged.size());
|
||||
roses.erase_all(merged.begin(), merged.end());
|
||||
cands.erase_all(merged.begin(), merged.end());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1924,13 +2080,13 @@ void mergeCastleLeftfixes(RoseBuildImpl &tbi) {
|
||||
for (auto &m : by_reach) {
|
||||
DEBUG_PRINTF("%zu castles for reach: %s\n", m.second.size(),
|
||||
describeClass(m.first).c_str());
|
||||
RoseBouquet &roses = m.second;
|
||||
deque<RoseBouquet> rose_groups;
|
||||
chunkBouquets(roses, rose_groups, MERGE_GROUP_SIZE_MAX);
|
||||
roses.clear();
|
||||
RoseBouquet &candidates = m.second;
|
||||
deque<RoseBouquet> cand_groups;
|
||||
chunkBouquets(candidates, cand_groups, MERGE_CASTLE_GROUP_SIZE_MAX);
|
||||
candidates.clear();
|
||||
|
||||
for (auto &group : rose_groups) {
|
||||
mergeCastleRoses(tbi, group);
|
||||
for (auto &group : cand_groups) {
|
||||
mergeCastleChunk(tbi, group);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user