rose: always use mandatory masks for lit fragments

This commit is contained in:
Justin Viiret 2017-06-20 17:11:18 +10:00 committed by Matthew Barr
parent bdae3d5b80
commit 7560e189eb

View File

@ -55,6 +55,7 @@
#include <sstream> #include <sstream>
#include <boost/range/adaptor/map.hpp> #include <boost/range/adaptor/map.hpp>
#include <boost/range/adaptor/reversed.hpp>
using namespace std; using namespace std;
using boost::adaptors::map_values; using boost::adaptors::map_values;
@ -63,7 +64,7 @@ namespace ue2 {
static const size_t MAX_ACCEL_STRING_LEN = 16; static const size_t MAX_ACCEL_STRING_LEN = 16;
#ifdef DEBUG #if defined(DEBUG) || defined(DUMP_SUPPORT)
static UNUSED static UNUSED
string dumpMask(const vector<u8> &v) { string dumpMask(const vector<u8> &v) {
ostringstream oss; ostringstream oss;
@ -231,28 +232,12 @@ bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id,
} }
static static
bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id, bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id,
const rose_literal_info &info, const RoseVertex v, const RoseVertex v, vector<u8> &msk, vector<u8> &cmp) {
vector<u8> &msk, vector<u8> &cmp) {
// Start with zero masks. // Start with zero masks.
msk.assign(HWLM_MASKLEN, 0); msk.assign(HWLM_MASKLEN, 0);
cmp.assign(HWLM_MASKLEN, 0); cmp.assign(HWLM_MASKLEN, 0);
// Masks can come from literal benefits (for mixed-case literals).
if (info.requires_benefits) {
assert(mixed_sensitivity(id.s));
size_t j = 0;
for (ue2_literal::const_reverse_iterator it = id.s.rbegin(),
ite = id.s.rend();
it != ite && j < HWLM_MASKLEN; ++it, ++j) {
size_t offset = HWLM_MASKLEN - j - 1;
const CharReach &cr = *it;
make_and_cmp_mask(cr, &msk[offset], &cmp[offset]);
}
return true;
}
const LeftEngInfo &left = build.g[v].left; const LeftEngInfo &left = build.g[v].left;
if (left && left.lag < HWLM_MASKLEN) { if (left && left.lag < HWLM_MASKLEN) {
if (maskFromLeft(left, msk, cmp)) { if (maskFromLeft(left, msk, cmp)) {
@ -293,9 +278,9 @@ bool hamsterMaskCombine(vector<u8> &msk, vector<u8> &cmp,
} }
static static
bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id, bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id,
const rose_literal_info &info, const rose_literal_info &info, vector<u8> &msk,
vector<u8> &msk, vector<u8> &cmp) { vector<u8> &cmp) {
if (!build.cc.grey.roseHamsterMasks) { if (!build.cc.grey.roseHamsterMasks) {
return false; return false;
} }
@ -305,11 +290,14 @@ bool findHamsterMask(const RoseBuildImpl &build, const rose_literal_id &id,
return false; return false;
} }
msk.assign(HWLM_MASKLEN, 0);
cmp.assign(HWLM_MASKLEN, 0);
size_t num = 0; size_t num = 0;
vector<u8> v_msk, v_cmp; vector<u8> v_msk, v_cmp;
for (RoseVertex v : info.vertices) { for (RoseVertex v : info.vertices) {
if (!findHamsterMask(build, id, info, v, v_msk, v_cmp)) { if (!addSurroundingMask(build, id, v, v_msk, v_cmp)) {
DEBUG_PRINTF("no mask\n"); DEBUG_PRINTF("no mask\n");
return false; return false;
} }
@ -364,14 +352,6 @@ void findMoreLiteralMasks(RoseBuildImpl &build) {
continue; continue;
} }
if (!lit.msk.empty()) {
continue;
}
const auto &lit_info = build.literal_info.at(id);
if (lit_info.requires_benefits) {
continue;
}
candidates.push_back(id); candidates.push_back(id);
} }
@ -380,14 +360,15 @@ void findMoreLiteralMasks(RoseBuildImpl &build) {
auto &lit_info = build.literal_info.at(id); auto &lit_info = build.literal_info.at(id);
vector<u8> msk, cmp; vector<u8> msk, cmp;
if (!findHamsterMask(build, lit, lit_info, msk, cmp)) { if (!addSurroundingMask(build, lit, lit_info, msk, cmp)) {
continue; continue;
} }
assert(!msk.empty()); DEBUG_PRINTF("found surrounding mask for lit_id=%u (%s)\n", id,
DEBUG_PRINTF("found advisory mask for lit_id=%u (%s)\n", id,
dumpString(lit.s).c_str()); dumpString(lit.s).c_str());
u32 new_id = build.getLiteralId(lit.s, msk, cmp, lit.delay, lit.table); u32 new_id = build.getLiteralId(lit.s, msk, cmp, lit.delay, lit.table);
assert(new_id != id); if (new_id == id) {
continue;
}
DEBUG_PRINTF("replacing with new lit_id=%u\n", new_id); DEBUG_PRINTF("replacing with new lit_id=%u\n", new_id);
// Note that our new literal may already exist and have vertices, etc. // Note that our new literal may already exist and have vertices, etc.
@ -409,6 +390,48 @@ void findMoreLiteralMasks(RoseBuildImpl &build) {
} }
} }
// The mask already associated with the literal and any mask due to
// mixed-case is mandatory.
static
void addLiteralMask(const rose_literal_id &id, vector<u8> &msk,
vector<u8> &cmp) {
if (id.msk.empty() && !mixed_sensitivity(id.s)) {
return;
}
while (msk.size() < HWLM_MASKLEN) {
msk.insert(msk.begin(), 0);
cmp.insert(cmp.begin(), 0);
}
if (!id.msk.empty()) {
assert(id.msk.size() <= HWLM_MASKLEN);
assert(id.msk.size() == id.cmp.size());
for (size_t i = 0; i < id.msk.size(); i++) {
size_t mand_offset = msk.size() - i - 1;
size_t lit_offset = id.msk.size() - i - 1;
msk[mand_offset] = id.msk[lit_offset];
cmp[mand_offset] = id.cmp[lit_offset];
}
}
if (mixed_sensitivity(id.s)) {
auto it = id.s.rbegin();
for (size_t i = 0, i_end = min(id.s.length(), size_t{HWLM_MASKLEN});
i < i_end; ++i, ++it) {
const auto &c = *it;
if (!c.nocase) {
size_t offset = HWLM_MASKLEN - i - 1;
DEBUG_PRINTF("offset %zu must match 0x%02x exactly\n", offset,
c.c);
make_and_cmp_mask(c, &msk[offset], &cmp[offset]);
}
}
}
normaliseLiteralMask(id.s, msk, cmp);
}
static static
bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, bool isDirectHighlander(const RoseBuildImpl &build, const u32 id,
const rose_literal_info &info) { const rose_literal_info &info) {
@ -716,8 +739,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
} }
} }
const vector<u8> &msk = lit.msk; vector<u8> msk = lit.msk; // copy
const vector<u8> &cmp = lit.cmp; vector<u8> cmp = lit.cmp; // copy
bool noruns = isNoRunsLiteral(build, id, info, max_len); bool noruns = isNoRunsLiteral(build, id, info, max_len);
size_t lit_hist_len = 0; size_t lit_hist_len = 0;
@ -740,6 +763,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
assert(!noruns); assert(!noruns);
} }
addLiteralMask(lit, msk, cmp);
const auto &s_final = lit_final.get_string(); const auto &s_final = lit_final.get_string();
bool nocase = lit_final.any_nocase(); bool nocase = lit_final.any_nocase();