rose: explode mixed-case literals early

This commit is contained in:
Justin Viiret 2016-12-14 16:16:59 +11:00 committed by Matthew Barr
parent df7bc22ae0
commit f964801923

View File

@ -40,6 +40,7 @@
#include "rose_build_role_aliasing.h" #include "rose_build_role_aliasing.h"
#include "rose_build_util.h" #include "rose_build_util.h"
#include "ue2common.h" #include "ue2common.h"
#include "hwlm/hwlm_literal.h"
#include "nfa/nfa_internal.h" #include "nfa/nfa_internal.h"
#include "nfa/rdfa.h" #include "nfa/rdfa.h"
#include "nfagraph/ng_holder.h" #include "nfagraph/ng_holder.h"
@ -102,7 +103,73 @@ bool limited_explosion(const ue2_literal &s) {
return nc_count <= MAX_EXPLOSION_NC; return nc_count <= MAX_EXPLOSION_NC;
} }
static
void removeLiteralFromGraph(RoseBuildImpl &build, u32 id) {
assert(id < build.literal_info.size());
auto &info = build.literal_info.at(id);
for (const auto &v : info.vertices) {
build.g[v].literals.erase(id);
}
info.vertices.clear();
}
/**
* \brief Replace the given mixed-case literal with the set of its caseless
* variants.
*/
static
void explodeLiteral(RoseBuildImpl &build, u32 id) {
const auto &lit = build.literals.right.at(id);
auto &info = build.literal_info[id];
assert(!info.group_mask); // not set yet
assert(info.undelayed_id == id); // we do not explode delayed literals
for (auto it = caseIterateBegin(lit.s); it != caseIterateEnd(); ++it) {
ue2_literal new_str(*it, false);
if (!maskIsConsistent(new_str.get_string(), false, lit.msk, lit.cmp)) {
DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n");
continue;
}
u32 new_id =
build.getLiteralId(new_str, lit.msk, lit.cmp, lit.delay, lit.table);
DEBUG_PRINTF("adding exploded lit %u: '%s'\n", new_id,
dumpString(new_str).c_str());
const auto &new_lit = build.literals.right.at(new_id);
auto &new_info = build.literal_info.at(new_id);
insert(&new_info.vertices, info.vertices);
for (const auto &v : info.vertices) {
build.g[v].literals.insert(new_id);
}
build.literal_info[new_id].undelayed_id = new_id;
if (!info.delayed_ids.empty()) {
flat_set<u32> &del_ids = new_info.delayed_ids;
for (u32 delay_id : info.delayed_ids) {
const auto &dlit = build.literals.right.at(delay_id);
u32 new_delay_id =
build.getLiteralId(new_lit.s, new_lit.msk, new_lit.cmp,
dlit.delay, dlit.table);
del_ids.insert(new_delay_id);
build.literal_info[new_delay_id].undelayed_id = new_id;
}
}
}
// Remove the old literal and any old delay variants.
removeLiteralFromGraph(build, id);
for (u32 delay_id : info.delayed_ids) {
removeLiteralFromGraph(build, delay_id);
}
info.delayed_ids.clear();
}
void RoseBuildImpl::handleMixedSensitivity(void) { void RoseBuildImpl::handleMixedSensitivity(void) {
vector<u32> explode;
for (const auto &e : literals.right) { for (const auto &e : literals.right) {
u32 id = e.first; u32 id = e.first;
const rose_literal_id &lit = e.second; const rose_literal_id &lit = e.second;
@ -123,15 +190,20 @@ void RoseBuildImpl::handleMixedSensitivity(void) {
// with a CHECK_LONG_LIT instruction and need unique final_ids. // with a CHECK_LONG_LIT instruction and need unique final_ids.
// TODO: we could allow explosion for literals where the prefixes // TODO: we could allow explosion for literals where the prefixes
// covered by CHECK_LONG_LIT are identical. // covered by CHECK_LONG_LIT are identical.
if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX &&
limited_explosion(lit.s)) { if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN &&
limited_explosion(lit.s) && literal_info[id].delayed_ids.empty()) {
DEBUG_PRINTF("need to explode existing string '%s'\n", DEBUG_PRINTF("need to explode existing string '%s'\n",
dumpString(lit.s).c_str()); dumpString(lit.s).c_str());
literal_info[id].requires_explode = true; explode.push_back(id);
} else { } else {
literal_info[id].requires_benefits = true; literal_info[id].requires_benefits = true;
} }
} }
for (u32 id : explode) {
explodeLiteral(*this, id);
}
} }
// Returns the length of the longest prefix of s that is (a) also a suffix of s // Returns the length of the longest prefix of s that is (a) also a suffix of s