rose: use fragment ids earlier for anchored dfas

This commit is contained in:
Justin Viiret 2017-02-13 16:41:08 +11:00 committed by Matthew Barr
parent 8b25d83415
commit 79512bd5c3
5 changed files with 45 additions and 36 deletions

View File

@ -183,7 +183,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) {
flat_set<ReportID> new_reports; flat_set<ReportID> new_reports;
for (auto id : ds.reports) { for (auto id : ds.reports) {
assert(id < build.literal_info.size()); assert(id < build.literal_info.size());
new_reports.insert(build.literal_info.at(id).final_id); new_reports.insert(build.literal_info.at(id).fragment_id);
} }
ds.reports = move(new_reports); ds.reports = move(new_reports);
} }
@ -191,7 +191,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) {
/** /**
* \brief Replaces the report ids currently in the dfas (rose graph literal * \brief Replaces the report ids currently in the dfas (rose graph literal
* ids) with the final id for each literal. * ids) with the fragment id for each literal.
*/ */
static static
void remapAnchoredReports(RoseBuildImpl &build) { void remapAnchoredReports(RoseBuildImpl &build) {
@ -208,8 +208,7 @@ void remapAnchoredReports(RoseBuildImpl &build) {
* raw_dfa with program offsets. * raw_dfa with program offsets.
*/ */
static static
void remapIdsToPrograms(raw_dfa &rdfa, void remapIdsToPrograms(const RoseBuildImpl &build, raw_dfa &rdfa) {
const map<u32, LitFragment> &final_to_frag_map) {
for (dstate &ds : rdfa.states) { for (dstate &ds : rdfa.states) {
assert(ds.reports_eod.empty()); // Not used in anchored matcher. assert(ds.reports_eod.empty()); // Not used in anchored matcher.
if (ds.reports.empty()) { if (ds.reports.empty()) {
@ -217,9 +216,8 @@ void remapIdsToPrograms(raw_dfa &rdfa,
} }
flat_set<ReportID> new_reports; flat_set<ReportID> new_reports;
for (auto final_id : ds.reports) { for (auto fragment_id : ds.reports) {
assert(contains(final_to_frag_map, final_id)); auto &frag = build.fragments.at(fragment_id);
auto &frag = final_to_frag_map.at(final_id);
new_reports.insert(frag.lit_program_offset); new_reports.insert(frag.lit_program_offset);
} }
ds.reports = move(new_reports); ds.reports = move(new_reports);
@ -227,16 +225,18 @@ void remapIdsToPrograms(raw_dfa &rdfa,
} }
static static
void populate_holder(const simple_anchored_info &sai, const set<u32> &exit_ids, unique_ptr<NGHolder> populate_holder(const simple_anchored_info &sai,
NGHolder *h_in) { const flat_set<u32> &exit_ids) {
DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound, DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
sai.max_bound, dumpString(sai.literal).c_str()); sai.max_bound, dumpString(sai.literal).c_str());
NGHolder &h = *h_in; auto h_ptr = make_unique<NGHolder>();
set<NFAVertex> ends = addDotsToGraph(h, h.start, sai.min_bound, NGHolder &h = *h_ptr;
sai.max_bound, CharReach::dot()); auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound,
CharReach::dot());
NFAVertex v = addToGraph(h, ends, sai.literal); NFAVertex v = addToGraph(h, ends, sai.literal);
add_edge(v, h.accept, h); add_edge(v, h.accept, h);
h[v].reports.insert(exit_ids.begin(), exit_ids.end()); h[v].reports.insert(exit_ids.begin(), exit_ids.end());
return h_ptr;
} }
u32 anchoredStateSize(const anchored_matcher_info &atable) { u32 anchoredStateSize(const anchored_matcher_info &atable) {
@ -735,15 +735,15 @@ void buildSimpleDfas(const RoseBuildImpl &build,
vector<unique_ptr<raw_dfa>> *anchored_dfas) { vector<unique_ptr<raw_dfa>> *anchored_dfas) {
/* we should have determinised all of these before so there should be no /* we should have determinised all of these before so there should be no
* chance of failure. */ * chance of failure. */
for (const auto &simple : build.anchored_simple) { flat_set<u32> exit_ids;
set<u32> exit_ids; for (const auto &simple : build.anchored_simple) {
exit_ids.clear();
for (auto lit_id : simple.second) { for (auto lit_id : simple.second) {
exit_ids.insert(build.literal_info[lit_id].final_id); exit_ids.insert(build.literal_info[lit_id].fragment_id);
} }
NGHolder h; auto h = populate_holder(simple.first, exit_ids);
populate_holder(simple.first, exit_ids, &h); Automaton_Holder autom(*h);
Automaton_Holder autom(h); auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
unique_ptr<raw_dfa> rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
assert(!rv); assert(!rv);
rdfa->start_anchored = INIT_STATE; rdfa->start_anchored = INIT_STATE;
@ -858,7 +858,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
} }
for (auto &rdfa : dfas) { for (auto &rdfa : dfas) {
remapIdsToPrograms(rdfa, build.final_to_frag_map); remapIdsToPrograms(build, rdfa);
} }
vector<aligned_unique_ptr<NFA>> nfas; vector<aligned_unique_ptr<NFA>> nfas;

View File

@ -4646,10 +4646,8 @@ rose_group getGroups(const RoseBuildImpl &build, const flat_set<u32> &lit_ids) {
} }
static static
map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build, void groupByFragment(RoseBuildImpl &build, const build_context &bc) {
const build_context &bc) {
u32 frag_id = 0; u32 frag_id = 0;
map<u32, LitFragment> final_to_frag;
struct FragmentInfo { struct FragmentInfo {
vector<u32> final_ids; vector<u32> final_ids;
@ -4658,6 +4656,9 @@ map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
map<rose_literal_id, FragmentInfo> frag_info; map<rose_literal_id, FragmentInfo> frag_info;
auto &final_to_frag = build.final_to_frag_map;
auto &fragments = build.fragments;
for (const auto &m : bc.final_id_to_literal) { for (const auto &m : bc.final_id_to_literal) {
u32 final_id = m.first; u32 final_id = m.first;
const auto &lit_ids = m.second; const auto &lit_ids = m.second;
@ -4666,21 +4667,27 @@ map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
auto groups = getGroups(build, lit_ids); auto groups = getGroups(build, lit_ids);
if (lit_ids.size() > 1) { if (lit_ids.size() > 1) {
final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); final_to_frag.emplace(final_id, frag_id);
fragments.emplace_back(frag_id, groups);
frag_id++;
continue; continue;
} }
const auto lit_id = *lit_ids.begin(); const auto lit_id = *lit_ids.begin();
const auto &lit = build.literals.right.at(lit_id); const auto &lit = build.literals.right.at(lit_id);
if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); final_to_frag.emplace(final_id, frag_id);
fragments.emplace_back(frag_id, groups);
frag_id++;
continue; continue;
} }
// Combining fragments that squash their groups is unsafe. // Combining fragments that squash their groups is unsafe.
const auto &info = build.literal_info[lit_id]; const auto &info = build.literal_info[lit_id];
if (info.squash_group) { if (info.squash_group) {
final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); final_to_frag.emplace(final_id, frag_id);
fragments.emplace_back(frag_id, groups);
frag_id++;
continue; continue;
} }
@ -4695,14 +4702,13 @@ map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
const auto &fi = m.second; const auto &fi = m.second;
DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(),
as_string_list(fi.final_ids).c_str()); as_string_list(fi.final_ids).c_str());
fragments.emplace_back(frag_id, fi.groups);
for (const auto final_id : fi.final_ids) { for (const auto final_id : fi.final_ids) {
assert(!contains(final_to_frag, final_id)); assert(!contains(final_to_frag, final_id));
final_to_frag.emplace(final_id, LitFragment(frag_id, fi.groups)); final_to_frag.emplace(final_id, frag_id);
} }
frag_id++; frag_id++;
} }
return final_to_frag;
} }
/** /**
@ -4713,7 +4719,7 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
// Build a reverse mapping from fragment -> final_id. // Build a reverse mapping from fragment -> final_id.
map<u32, flat_set<u32>> frag_to_final_map; map<u32, flat_set<u32>> frag_to_final_map;
for (const auto &m : build.final_to_frag_map) { for (const auto &m : build.final_to_frag_map) {
frag_to_final_map[m.second.fragment_id].insert(m.first); frag_to_final_map[m.second].insert(m.first);
} }
const u32 num_fragments = verify_u32(frag_to_final_map.size()); const u32 num_fragments = verify_u32(frag_to_final_map.size());
@ -4736,7 +4742,8 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
} }
// Update LitFragment entries. // Update LitFragment entries.
for (auto &frag : build.final_to_frag_map | map_values) { for (const auto &fragment_id : build.final_to_frag_map | map_values) {
auto &frag = build.fragments.at(fragment_id);
frag.lit_program_offset = litPrograms[frag.fragment_id]; frag.lit_program_offset = litPrograms[frag.fragment_id];
frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id]; frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id];
} }
@ -5407,7 +5414,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
build_context bc; build_context bc;
allocateFinalLiteralId(*this, bc); allocateFinalLiteralId(*this, bc);
final_to_frag_map = groupByFragment(*this, bc); groupByFragment(*this, bc);
// Write the fragment IDs into the literal_info structures. // Write the fragment IDs into the literal_info structures.
for (auto &info : literal_info) { for (auto &info : literal_info) {
@ -5415,7 +5422,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
continue; continue;
} }
assert(contains(final_to_frag_map, info.final_id)); assert(contains(final_to_frag_map, info.final_id));
info.fragment_id = final_to_frag_map.at(info.final_id).fragment_id; info.fragment_id = final_to_frag_map.at(info.final_id);
} }
auto anchored_dfas = buildAnchoredDfas(*this); auto anchored_dfas = buildAnchoredDfas(*this);

View File

@ -1153,7 +1153,7 @@ void dumpRoseLitPrograms(const RoseBuildImpl &build, const RoseEngine *t,
programs.reserve(build.final_to_frag_map.size()); programs.reserve(build.final_to_frag_map.size());
for (const auto &m : build.final_to_frag_map) { for (const auto &m : build.final_to_frag_map) {
const auto &frag = m.second; const auto &frag = build.fragments.at(m.second);
if (frag.lit_program_offset) { if (frag.lit_program_offset) {
programs.push_back(frag.lit_program_offset); programs.push_back(frag.lit_program_offset);
} }

View File

@ -593,7 +593,8 @@ public:
* overlap calculation in history assignment. */ * overlap calculation in history assignment. */
std::map<u32, rose_literal_id> anchoredLitSuffix; std::map<u32, rose_literal_id> anchoredLitSuffix;
std::map<u32, LitFragment> final_to_frag_map; std::map<u32, u32> final_to_frag_map;
std::vector<LitFragment> fragments;
unordered_set<left_id> transient; unordered_set<left_id> transient;
unordered_map<left_id, rose_group> rose_squash_masks; unordered_map<left_id, rose_group> rose_squash_masks;

View File

@ -741,7 +741,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
for (auto &lit : mp.lits) { for (auto &lit : mp.lits) {
u32 final_id = lit.id; u32 final_id = lit.id;
assert(contains(build.final_to_frag_map, final_id)); assert(contains(build.final_to_frag_map, final_id));
const auto &frag = build.final_to_frag_map.at(final_id); const auto &frag =
build.fragments.at(build.final_to_frag_map.at(final_id));
lit.id = delay_rebuild ? frag.delay_program_offset lit.id = delay_rebuild ? frag.delay_program_offset
: frag.lit_program_offset; : frag.lit_program_offset;
lit.groups = frag.groups; lit.groups = frag.groups;