diff --git a/src/rose/rose_build_add_mask.cpp b/src/rose/rose_build_add_mask.cpp index c60c053e..0a7e44c3 100644 --- a/src/rose/rose_build_add_mask.cpp +++ b/src/rose/rose_build_add_mask.cpp @@ -144,7 +144,7 @@ void findMaskLiteral(const vector &mask, bool streaming, } static -bool initFmlCandidates(const CharReach &cr, vector *cand) { +bool initFmlCandidates(const CharReach &cr, vector &cand) { for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) { char c = (char)i; bool nocase = myisupper(c) && cr.test(mytolower(c)); @@ -152,24 +152,25 @@ bool initFmlCandidates(const CharReach &cr, vector *cand) { continue; } - if (cand->size() >= MAX_MASK_LITS) { + if (cand.size() >= MAX_MASK_LITS) { DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS); return false; } - cand->emplace_back(c, nocase); + cand.emplace_back(c, nocase); } - assert(cand->size() <= MAX_MASK_LITS); - return !cand->empty(); + assert(cand.size() <= MAX_MASK_LITS); + return !cand.empty(); } static -bool expandFmlCandidates(const CharReach &cr, vector *cand) { +bool expandFmlCandidates(const CharReach &cr, vector &curr, + vector &cand) { DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count()); - DEBUG_PRINTF(" current cand list size %zu\n", cand->size()); + DEBUG_PRINTF(" current cand list size %zu\n", cand.size()); - vector curr; + curr.clear(); for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) { char c = (char)i; @@ -178,14 +179,14 @@ bool expandFmlCandidates(const CharReach &cr, vector *cand) { continue; } - for (const auto &lit : *cand) { + for (const auto &lit : cand) { if (curr.size() >= MAX_MASK_LITS) { DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS); return false; } - curr.emplace_back(c, nocase); - curr.back() += lit; + curr.push_back(lit); + curr.back().push_back(c, nocase); } } @@ -196,7 +197,7 @@ bool expandFmlCandidates(const CharReach &cr, vector *cand) { } assert(curr.size() <= MAX_MASK_LITS); - cand->swap(curr); + cand.swap(curr); return true; } @@ -213,6 +214,7 @@ u32 scoreFmlCandidates(const vector &cand) { u32 min_period = len; for (const auto &lit : cand) { + DEBUG_PRINTF("candidate: %s\n", dumpString(lit).c_str()); u32 period = lit.length() - maxStringSelfOverlap(lit); min_period = min(min_period, period); } @@ -238,31 +240,37 @@ bool findMaskLiterals(const vector &mask, vector *lit, *minBound = 0; *length = 0; - vector candidates, best_candidates; + vector candidates, best_candidates, curr_candidates; u32 best_score = 0; u32 best_minOffset = 0; - vector::const_iterator it, itb, ite; - for (it = itb = mask.begin(), ite = mask.end(); it != ite; ++it) { + + for (auto it = mask.begin(); it != mask.end(); ++it) { candidates.clear(); - if (!initFmlCandidates(*it, &candidates)) { + if (!initFmlCandidates(*it, candidates)) { DEBUG_PRINTF("failed to init\n"); continue; } DEBUG_PRINTF("++\n"); - vector::const_iterator jt = it; - while (jt != itb) { + auto jt = it; + while (jt != mask.begin()) { --jt; DEBUG_PRINTF("--\n"); - if (!expandFmlCandidates(*jt, &candidates)) { + if (!expandFmlCandidates(*jt, curr_candidates, candidates)) { DEBUG_PRINTF("expansion stopped\n"); break; } } + + // Candidates have been expanded in reverse order. + for (auto &cand : candidates) { + cand = reverse_literal(cand); + } + u32 score = scoreFmlCandidates(candidates); DEBUG_PRINTF("scored %u for literal set of size %zu\n", score, candidates.size()); if (!candidates.empty() && score >= best_score) { - best_minOffset = it - itb - candidates.back().length() + 1; + best_minOffset = it - mask.begin() - candidates.back().length() + 1; best_candidates.swap(candidates); best_score = score; } @@ -277,11 +285,12 @@ bool findMaskLiterals(const vector &mask, vector *lit, *length = best_candidates.back().length(); DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length); - for (const auto &cand : best_candidates) { - assert(cand.length() == *length); - lit->push_back(cand); - } + assert(all_of_in(best_candidates, [&](const ue2_literal &s) { + return s.length() == *length; + })); + + *lit = std::move(best_candidates); return true; } diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index 39e1edbd..98b007d4 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -291,18 +291,24 @@ void ue2_literal::push_back(char c, bool nc) { s.push_back(c); } +void ue2_literal::reverse() { + std::reverse(s.begin(), s.end()); + + const size_t len = nocase.size(); + for (size_t i = 0; i < len / 2; i++) { + size_t j = len - i - 1; + bool a = nocase.test(i); + bool b = nocase.test(j); + nocase.set(i, b); + nocase.set(j, a); + } +} + // Return a copy of this literal in reverse order. ue2_literal reverse_literal(const ue2_literal &in) { - ue2_literal rv; - if (in.empty()) { - return rv; - } - - for (ue2_literal::const_iterator it = in.end(); it != in.begin();) { - --it; - rv.push_back(it->c, it->nocase); - } - return rv; + auto out = in; + out.reverse(); + return out; } bool ue2_literal::operator<(const ue2_literal &b) const { diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 44f1f53f..0fa76c3a 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -191,6 +191,9 @@ public: return a; } + /// Reverse this literal in-place. + void reverse(); + void operator+=(const ue2_literal &b); bool operator==(const ue2_literal &b) const { return s == b.s && nocase == b.nocase;