mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose_build_add_mask: improve findMaskLiteral perf
This commit is contained in:
parent
3b63a95f01
commit
ba0bf0c991
@ -144,7 +144,7 @@ void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
|
||||
}
|
||||
|
||||
static
|
||||
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
||||
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> &cand) {
|
||||
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
|
||||
char c = (char)i;
|
||||
bool nocase = myisupper(c) && cr.test(mytolower(c));
|
||||
@ -152,24 +152,25 @@ bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cand->size() >= MAX_MASK_LITS) {
|
||||
if (cand.size() >= MAX_MASK_LITS) {
|
||||
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
|
||||
return false;
|
||||
}
|
||||
|
||||
cand->emplace_back(c, nocase);
|
||||
cand.emplace_back(c, nocase);
|
||||
}
|
||||
|
||||
assert(cand->size() <= MAX_MASK_LITS);
|
||||
return !cand->empty();
|
||||
assert(cand.size() <= MAX_MASK_LITS);
|
||||
return !cand.empty();
|
||||
}
|
||||
|
||||
static
|
||||
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
||||
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> &curr,
|
||||
vector<ue2_literal> &cand) {
|
||||
DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
|
||||
DEBUG_PRINTF(" current cand list size %zu\n", cand->size());
|
||||
DEBUG_PRINTF(" current cand list size %zu\n", cand.size());
|
||||
|
||||
vector<ue2_literal> curr;
|
||||
curr.clear();
|
||||
|
||||
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
|
||||
char c = (char)i;
|
||||
@ -178,14 +179,14 @@ bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto &lit : *cand) {
|
||||
for (const auto &lit : cand) {
|
||||
if (curr.size() >= MAX_MASK_LITS) {
|
||||
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
|
||||
return false;
|
||||
}
|
||||
|
||||
curr.emplace_back(c, nocase);
|
||||
curr.back() += lit;
|
||||
curr.push_back(lit);
|
||||
curr.back().push_back(c, nocase);
|
||||
}
|
||||
}
|
||||
|
||||
@ -196,7 +197,7 @@ bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
||||
}
|
||||
|
||||
assert(curr.size() <= MAX_MASK_LITS);
|
||||
cand->swap(curr);
|
||||
cand.swap(curr);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -213,6 +214,7 @@ u32 scoreFmlCandidates(const vector<ue2_literal> &cand) {
|
||||
u32 min_period = len;
|
||||
|
||||
for (const auto &lit : cand) {
|
||||
DEBUG_PRINTF("candidate: %s\n", dumpString(lit).c_str());
|
||||
u32 period = lit.length() - maxStringSelfOverlap(lit);
|
||||
min_period = min(min_period, period);
|
||||
}
|
||||
@ -238,31 +240,37 @@ bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
|
||||
*minBound = 0;
|
||||
*length = 0;
|
||||
|
||||
vector<ue2_literal> candidates, best_candidates;
|
||||
vector<ue2_literal> candidates, best_candidates, curr_candidates;
|
||||
u32 best_score = 0;
|
||||
u32 best_minOffset = 0;
|
||||
vector<CharReach>::const_iterator it, itb, ite;
|
||||
for (it = itb = mask.begin(), ite = mask.end(); it != ite; ++it) {
|
||||
|
||||
for (auto it = mask.begin(); it != mask.end(); ++it) {
|
||||
candidates.clear();
|
||||
if (!initFmlCandidates(*it, &candidates)) {
|
||||
if (!initFmlCandidates(*it, candidates)) {
|
||||
DEBUG_PRINTF("failed to init\n");
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("++\n");
|
||||
vector<CharReach>::const_iterator jt = it;
|
||||
while (jt != itb) {
|
||||
auto jt = it;
|
||||
while (jt != mask.begin()) {
|
||||
--jt;
|
||||
DEBUG_PRINTF("--\n");
|
||||
if (!expandFmlCandidates(*jt, &candidates)) {
|
||||
if (!expandFmlCandidates(*jt, curr_candidates, candidates)) {
|
||||
DEBUG_PRINTF("expansion stopped\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Candidates have been expanded in reverse order.
|
||||
for (auto &cand : candidates) {
|
||||
cand = reverse_literal(cand);
|
||||
}
|
||||
|
||||
u32 score = scoreFmlCandidates(candidates);
|
||||
DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
|
||||
candidates.size());
|
||||
if (!candidates.empty() && score >= best_score) {
|
||||
best_minOffset = it - itb - candidates.back().length() + 1;
|
||||
best_minOffset = it - mask.begin() - candidates.back().length() + 1;
|
||||
best_candidates.swap(candidates);
|
||||
best_score = score;
|
||||
}
|
||||
@ -277,11 +285,12 @@ bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
|
||||
*length = best_candidates.back().length();
|
||||
|
||||
DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
|
||||
for (const auto &cand : best_candidates) {
|
||||
assert(cand.length() == *length);
|
||||
lit->push_back(cand);
|
||||
}
|
||||
|
||||
assert(all_of_in(best_candidates, [&](const ue2_literal &s) {
|
||||
return s.length() == *length;
|
||||
}));
|
||||
|
||||
*lit = std::move(best_candidates);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -291,18 +291,24 @@ void ue2_literal::push_back(char c, bool nc) {
|
||||
s.push_back(c);
|
||||
}
|
||||
|
||||
// Return a copy of this literal in reverse order.
|
||||
ue2_literal reverse_literal(const ue2_literal &in) {
|
||||
ue2_literal rv;
|
||||
if (in.empty()) {
|
||||
return rv;
|
||||
void ue2_literal::reverse() {
|
||||
std::reverse(s.begin(), s.end());
|
||||
|
||||
const size_t len = nocase.size();
|
||||
for (size_t i = 0; i < len / 2; i++) {
|
||||
size_t j = len - i - 1;
|
||||
bool a = nocase.test(i);
|
||||
bool b = nocase.test(j);
|
||||
nocase.set(i, b);
|
||||
nocase.set(j, a);
|
||||
}
|
||||
}
|
||||
|
||||
for (ue2_literal::const_iterator it = in.end(); it != in.begin();) {
|
||||
--it;
|
||||
rv.push_back(it->c, it->nocase);
|
||||
}
|
||||
return rv;
|
||||
// Return a copy of this literal in reverse order.
|
||||
ue2_literal reverse_literal(const ue2_literal &in) {
|
||||
auto out = in;
|
||||
out.reverse();
|
||||
return out;
|
||||
}
|
||||
|
||||
bool ue2_literal::operator<(const ue2_literal &b) const {
|
||||
|
@ -191,6 +191,9 @@ public:
|
||||
return a;
|
||||
}
|
||||
|
||||
/// Reverse this literal in-place.
|
||||
void reverse();
|
||||
|
||||
void operator+=(const ue2_literal &b);
|
||||
bool operator==(const ue2_literal &b) const {
|
||||
return s == b.s && nocase == b.nocase;
|
||||
|
Loading…
x
Reference in New Issue
Block a user