mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose_build_add_mask: improve findMaskLiteral perf
This commit is contained in:
parent
3b63a95f01
commit
ba0bf0c991
@ -144,7 +144,7 @@ void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> &cand) {
|
||||||
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
|
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
|
||||||
char c = (char)i;
|
char c = (char)i;
|
||||||
bool nocase = myisupper(c) && cr.test(mytolower(c));
|
bool nocase = myisupper(c) && cr.test(mytolower(c));
|
||||||
@ -152,24 +152,25 @@ bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cand->size() >= MAX_MASK_LITS) {
|
if (cand.size() >= MAX_MASK_LITS) {
|
||||||
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
|
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cand->emplace_back(c, nocase);
|
cand.emplace_back(c, nocase);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(cand->size() <= MAX_MASK_LITS);
|
assert(cand.size() <= MAX_MASK_LITS);
|
||||||
return !cand->empty();
|
return !cand.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> &curr,
|
||||||
|
vector<ue2_literal> &cand) {
|
||||||
DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
|
DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
|
||||||
DEBUG_PRINTF(" current cand list size %zu\n", cand->size());
|
DEBUG_PRINTF(" current cand list size %zu\n", cand.size());
|
||||||
|
|
||||||
vector<ue2_literal> curr;
|
curr.clear();
|
||||||
|
|
||||||
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
|
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
|
||||||
char c = (char)i;
|
char c = (char)i;
|
||||||
@ -178,14 +179,14 @@ bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &lit : *cand) {
|
for (const auto &lit : cand) {
|
||||||
if (curr.size() >= MAX_MASK_LITS) {
|
if (curr.size() >= MAX_MASK_LITS) {
|
||||||
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
|
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
curr.emplace_back(c, nocase);
|
curr.push_back(lit);
|
||||||
curr.back() += lit;
|
curr.back().push_back(c, nocase);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -196,7 +197,7 @@ bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
assert(curr.size() <= MAX_MASK_LITS);
|
assert(curr.size() <= MAX_MASK_LITS);
|
||||||
cand->swap(curr);
|
cand.swap(curr);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -213,6 +214,7 @@ u32 scoreFmlCandidates(const vector<ue2_literal> &cand) {
|
|||||||
u32 min_period = len;
|
u32 min_period = len;
|
||||||
|
|
||||||
for (const auto &lit : cand) {
|
for (const auto &lit : cand) {
|
||||||
|
DEBUG_PRINTF("candidate: %s\n", dumpString(lit).c_str());
|
||||||
u32 period = lit.length() - maxStringSelfOverlap(lit);
|
u32 period = lit.length() - maxStringSelfOverlap(lit);
|
||||||
min_period = min(min_period, period);
|
min_period = min(min_period, period);
|
||||||
}
|
}
|
||||||
@ -238,31 +240,37 @@ bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
|
|||||||
*minBound = 0;
|
*minBound = 0;
|
||||||
*length = 0;
|
*length = 0;
|
||||||
|
|
||||||
vector<ue2_literal> candidates, best_candidates;
|
vector<ue2_literal> candidates, best_candidates, curr_candidates;
|
||||||
u32 best_score = 0;
|
u32 best_score = 0;
|
||||||
u32 best_minOffset = 0;
|
u32 best_minOffset = 0;
|
||||||
vector<CharReach>::const_iterator it, itb, ite;
|
|
||||||
for (it = itb = mask.begin(), ite = mask.end(); it != ite; ++it) {
|
for (auto it = mask.begin(); it != mask.end(); ++it) {
|
||||||
candidates.clear();
|
candidates.clear();
|
||||||
if (!initFmlCandidates(*it, &candidates)) {
|
if (!initFmlCandidates(*it, candidates)) {
|
||||||
DEBUG_PRINTF("failed to init\n");
|
DEBUG_PRINTF("failed to init\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
DEBUG_PRINTF("++\n");
|
DEBUG_PRINTF("++\n");
|
||||||
vector<CharReach>::const_iterator jt = it;
|
auto jt = it;
|
||||||
while (jt != itb) {
|
while (jt != mask.begin()) {
|
||||||
--jt;
|
--jt;
|
||||||
DEBUG_PRINTF("--\n");
|
DEBUG_PRINTF("--\n");
|
||||||
if (!expandFmlCandidates(*jt, &candidates)) {
|
if (!expandFmlCandidates(*jt, curr_candidates, candidates)) {
|
||||||
DEBUG_PRINTF("expansion stopped\n");
|
DEBUG_PRINTF("expansion stopped\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Candidates have been expanded in reverse order.
|
||||||
|
for (auto &cand : candidates) {
|
||||||
|
cand = reverse_literal(cand);
|
||||||
|
}
|
||||||
|
|
||||||
u32 score = scoreFmlCandidates(candidates);
|
u32 score = scoreFmlCandidates(candidates);
|
||||||
DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
|
DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
|
||||||
candidates.size());
|
candidates.size());
|
||||||
if (!candidates.empty() && score >= best_score) {
|
if (!candidates.empty() && score >= best_score) {
|
||||||
best_minOffset = it - itb - candidates.back().length() + 1;
|
best_minOffset = it - mask.begin() - candidates.back().length() + 1;
|
||||||
best_candidates.swap(candidates);
|
best_candidates.swap(candidates);
|
||||||
best_score = score;
|
best_score = score;
|
||||||
}
|
}
|
||||||
@ -277,11 +285,12 @@ bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
|
|||||||
*length = best_candidates.back().length();
|
*length = best_candidates.back().length();
|
||||||
|
|
||||||
DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
|
DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
|
||||||
for (const auto &cand : best_candidates) {
|
|
||||||
assert(cand.length() == *length);
|
|
||||||
lit->push_back(cand);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
assert(all_of_in(best_candidates, [&](const ue2_literal &s) {
|
||||||
|
return s.length() == *length;
|
||||||
|
}));
|
||||||
|
|
||||||
|
*lit = std::move(best_candidates);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -291,18 +291,24 @@ void ue2_literal::push_back(char c, bool nc) {
|
|||||||
s.push_back(c);
|
s.push_back(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return a copy of this literal in reverse order.
|
void ue2_literal::reverse() {
|
||||||
ue2_literal reverse_literal(const ue2_literal &in) {
|
std::reverse(s.begin(), s.end());
|
||||||
ue2_literal rv;
|
|
||||||
if (in.empty()) {
|
const size_t len = nocase.size();
|
||||||
return rv;
|
for (size_t i = 0; i < len / 2; i++) {
|
||||||
|
size_t j = len - i - 1;
|
||||||
|
bool a = nocase.test(i);
|
||||||
|
bool b = nocase.test(j);
|
||||||
|
nocase.set(i, b);
|
||||||
|
nocase.set(j, a);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (ue2_literal::const_iterator it = in.end(); it != in.begin();) {
|
// Return a copy of this literal in reverse order.
|
||||||
--it;
|
ue2_literal reverse_literal(const ue2_literal &in) {
|
||||||
rv.push_back(it->c, it->nocase);
|
auto out = in;
|
||||||
}
|
out.reverse();
|
||||||
return rv;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ue2_literal::operator<(const ue2_literal &b) const {
|
bool ue2_literal::operator<(const ue2_literal &b) const {
|
||||||
|
@ -191,6 +191,9 @@ public:
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reverse this literal in-place.
|
||||||
|
void reverse();
|
||||||
|
|
||||||
void operator+=(const ue2_literal &b);
|
void operator+=(const ue2_literal &b);
|
||||||
bool operator==(const ue2_literal &b) const {
|
bool operator==(const ue2_literal &b) const {
|
||||||
return s == b.s && nocase == b.nocase;
|
return s == b.s && nocase == b.nocase;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user