rose_build_add_mask: improve findMaskLiteral perf

This commit is contained in:
Justin Viiret 2017-08-15 13:57:43 +10:00 committed by Matthew Barr
parent 3b63a95f01
commit ba0bf0c991
3 changed files with 52 additions and 34 deletions

View File

@ -144,7 +144,7 @@ void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
}
static
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> &cand) {
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
char c = (char)i;
bool nocase = myisupper(c) && cr.test(mytolower(c));
@ -152,24 +152,25 @@ bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
continue;
}
if (cand->size() >= MAX_MASK_LITS) {
if (cand.size() >= MAX_MASK_LITS) {
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
return false;
}
cand->emplace_back(c, nocase);
cand.emplace_back(c, nocase);
}
assert(cand->size() <= MAX_MASK_LITS);
return !cand->empty();
assert(cand.size() <= MAX_MASK_LITS);
return !cand.empty();
}
static
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> &curr,
vector<ue2_literal> &cand) {
DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
DEBUG_PRINTF(" current cand list size %zu\n", cand->size());
DEBUG_PRINTF(" current cand list size %zu\n", cand.size());
vector<ue2_literal> curr;
curr.clear();
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
char c = (char)i;
@ -178,14 +179,14 @@ bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
continue;
}
for (const auto &lit : *cand) {
for (const auto &lit : cand) {
if (curr.size() >= MAX_MASK_LITS) {
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
return false;
}
curr.emplace_back(c, nocase);
curr.back() += lit;
curr.push_back(lit);
curr.back().push_back(c, nocase);
}
}
@ -196,7 +197,7 @@ bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
}
assert(curr.size() <= MAX_MASK_LITS);
cand->swap(curr);
cand.swap(curr);
return true;
}
@ -213,6 +214,7 @@ u32 scoreFmlCandidates(const vector<ue2_literal> &cand) {
u32 min_period = len;
for (const auto &lit : cand) {
DEBUG_PRINTF("candidate: %s\n", dumpString(lit).c_str());
u32 period = lit.length() - maxStringSelfOverlap(lit);
min_period = min(min_period, period);
}
@ -238,31 +240,37 @@ bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
*minBound = 0;
*length = 0;
vector<ue2_literal> candidates, best_candidates;
vector<ue2_literal> candidates, best_candidates, curr_candidates;
u32 best_score = 0;
u32 best_minOffset = 0;
vector<CharReach>::const_iterator it, itb, ite;
for (it = itb = mask.begin(), ite = mask.end(); it != ite; ++it) {
for (auto it = mask.begin(); it != mask.end(); ++it) {
candidates.clear();
if (!initFmlCandidates(*it, &candidates)) {
if (!initFmlCandidates(*it, candidates)) {
DEBUG_PRINTF("failed to init\n");
continue;
}
DEBUG_PRINTF("++\n");
vector<CharReach>::const_iterator jt = it;
while (jt != itb) {
auto jt = it;
while (jt != mask.begin()) {
--jt;
DEBUG_PRINTF("--\n");
if (!expandFmlCandidates(*jt, &candidates)) {
if (!expandFmlCandidates(*jt, curr_candidates, candidates)) {
DEBUG_PRINTF("expansion stopped\n");
break;
}
}
// Candidates have been expanded in reverse order.
for (auto &cand : candidates) {
cand = reverse_literal(cand);
}
u32 score = scoreFmlCandidates(candidates);
DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
candidates.size());
if (!candidates.empty() && score >= best_score) {
best_minOffset = it - itb - candidates.back().length() + 1;
best_minOffset = it - mask.begin() - candidates.back().length() + 1;
best_candidates.swap(candidates);
best_score = score;
}
@ -277,11 +285,12 @@ bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
*length = best_candidates.back().length();
DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
for (const auto &cand : best_candidates) {
assert(cand.length() == *length);
lit->push_back(cand);
}
assert(all_of_in(best_candidates, [&](const ue2_literal &s) {
return s.length() == *length;
}));
*lit = std::move(best_candidates);
return true;
}

View File

@ -291,18 +291,24 @@ void ue2_literal::push_back(char c, bool nc) {
s.push_back(c);
}
void ue2_literal::reverse() {
std::reverse(s.begin(), s.end());
const size_t len = nocase.size();
for (size_t i = 0; i < len / 2; i++) {
size_t j = len - i - 1;
bool a = nocase.test(i);
bool b = nocase.test(j);
nocase.set(i, b);
nocase.set(j, a);
}
}
// Return a copy of this literal in reverse order.
ue2_literal reverse_literal(const ue2_literal &in) {
ue2_literal rv;
if (in.empty()) {
return rv;
}
for (ue2_literal::const_iterator it = in.end(); it != in.begin();) {
--it;
rv.push_back(it->c, it->nocase);
}
return rv;
auto out = in;
out.reverse();
return out;
}
bool ue2_literal::operator<(const ue2_literal &b) const {

View File

@ -191,6 +191,9 @@ public:
return a;
}
/// Reverse this literal in-place.
void reverse();
void operator+=(const ue2_literal &b);
bool operator==(const ue2_literal &b) const {
return s == b.s && nocase == b.nocase;