diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 258eee9c..7c8c9023 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -183,7 +183,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { flat_set new_reports; for (auto id : ds.reports) { assert(id < build.literal_info.size()); - new_reports.insert(build.literal_info.at(id).final_id); + new_reports.insert(build.literal_info.at(id).fragment_id); } ds.reports = move(new_reports); } @@ -191,7 +191,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) { /** * \brief Replaces the report ids currently in the dfas (rose graph literal - * ids) with the final id for each literal. + * ids) with the fragment id for each literal. */ static void remapAnchoredReports(RoseBuildImpl &build) { @@ -208,8 +208,7 @@ void remapAnchoredReports(RoseBuildImpl &build) { * raw_dfa with program offsets. */ static -void remapIdsToPrograms(raw_dfa &rdfa, - const map &final_to_frag_map) { +void remapIdsToPrograms(const RoseBuildImpl &build, raw_dfa &rdfa) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { @@ -217,9 +216,8 @@ void remapIdsToPrograms(raw_dfa &rdfa, } flat_set new_reports; - for (auto final_id : ds.reports) { - assert(contains(final_to_frag_map, final_id)); - auto &frag = final_to_frag_map.at(final_id); + for (auto fragment_id : ds.reports) { + auto &frag = build.fragments.at(fragment_id); new_reports.insert(frag.lit_program_offset); } ds.reports = move(new_reports); @@ -227,16 +225,18 @@ void remapIdsToPrograms(raw_dfa &rdfa, } static -void populate_holder(const simple_anchored_info &sai, const set &exit_ids, - NGHolder *h_in) { +unique_ptr populate_holder(const simple_anchored_info &sai, + const flat_set &exit_ids) { DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound, dumpString(sai.literal).c_str()); - NGHolder &h = *h_in; - set ends = addDotsToGraph(h, h.start, sai.min_bound, - sai.max_bound, CharReach::dot()); + auto h_ptr = make_unique(); + NGHolder &h = *h_ptr; + auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound, + CharReach::dot()); NFAVertex v = addToGraph(h, ends, sai.literal); add_edge(v, h.accept, h); h[v].reports.insert(exit_ids.begin(), exit_ids.end()); + return h_ptr; } u32 anchoredStateSize(const anchored_matcher_info &atable) { @@ -735,15 +735,15 @@ void buildSimpleDfas(const RoseBuildImpl &build, vector> *anchored_dfas) { /* we should have determinised all of these before so there should be no * chance of failure. */ - for (const auto &simple : build.anchored_simple) { - set exit_ids; + flat_set exit_ids; + for (const auto &simple : build.anchored_simple) { + exit_ids.clear(); for (auto lit_id : simple.second) { - exit_ids.insert(build.literal_info[lit_id].final_id); + exit_ids.insert(build.literal_info[lit_id].fragment_id); } - NGHolder h; - populate_holder(simple.first, exit_ids, &h); - Automaton_Holder autom(h); - unique_ptr rdfa = ue2::make_unique(NFA_OUTFIX_RAW); + auto h = populate_holder(simple.first, exit_ids); + Automaton_Holder autom(*h); + auto rdfa = ue2::make_unique(NFA_OUTFIX_RAW); UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); assert(!rv); rdfa->start_anchored = INIT_STATE; @@ -858,7 +858,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, } for (auto &rdfa : dfas) { - remapIdsToPrograms(rdfa, build.final_to_frag_map); + remapIdsToPrograms(build, rdfa); } vector> nfas; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a50ebb8e..03bba972 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -4646,10 +4646,8 @@ rose_group getGroups(const RoseBuildImpl &build, const flat_set &lit_ids) { } static -map groupByFragment(const RoseBuildImpl &build, - const build_context &bc) { +void groupByFragment(RoseBuildImpl &build, const build_context &bc) { u32 frag_id = 0; - map final_to_frag; struct FragmentInfo { vector final_ids; @@ -4658,6 +4656,9 @@ map groupByFragment(const RoseBuildImpl &build, map frag_info; + auto &final_to_frag = build.final_to_frag_map; + auto &fragments = build.fragments; + for (const auto &m : bc.final_id_to_literal) { u32 final_id = m.first; const auto &lit_ids = m.second; @@ -4666,21 +4667,27 @@ map groupByFragment(const RoseBuildImpl &build, auto groups = getGroups(build, lit_ids); if (lit_ids.size() > 1) { - final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); + final_to_frag.emplace(final_id, frag_id); + fragments.emplace_back(frag_id, groups); + frag_id++; continue; } const auto lit_id = *lit_ids.begin(); const auto &lit = build.literals.right.at(lit_id); if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { - final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); + final_to_frag.emplace(final_id, frag_id); + fragments.emplace_back(frag_id, groups); + frag_id++; continue; } // Combining fragments that squash their groups is unsafe. const auto &info = build.literal_info[lit_id]; if (info.squash_group) { - final_to_frag.emplace(final_id, LitFragment(frag_id++, groups)); + final_to_frag.emplace(final_id, frag_id); + fragments.emplace_back(frag_id, groups); + frag_id++; continue; } @@ -4695,14 +4702,13 @@ map groupByFragment(const RoseBuildImpl &build, const auto &fi = m.second; DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), as_string_list(fi.final_ids).c_str()); + fragments.emplace_back(frag_id, fi.groups); for (const auto final_id : fi.final_ids) { assert(!contains(final_to_frag, final_id)); - final_to_frag.emplace(final_id, LitFragment(frag_id, fi.groups)); + final_to_frag.emplace(final_id, frag_id); } frag_id++; } - - return final_to_frag; } /** @@ -4713,7 +4719,7 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { // Build a reverse mapping from fragment -> final_id. map> frag_to_final_map; for (const auto &m : build.final_to_frag_map) { - frag_to_final_map[m.second.fragment_id].insert(m.first); + frag_to_final_map[m.second].insert(m.first); } const u32 num_fragments = verify_u32(frag_to_final_map.size()); @@ -4736,7 +4742,8 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { } // Update LitFragment entries. - for (auto &frag : build.final_to_frag_map | map_values) { + for (const auto &fragment_id : build.final_to_frag_map | map_values) { + auto &frag = build.fragments.at(fragment_id); frag.lit_program_offset = litPrograms[frag.fragment_id]; frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id]; } @@ -5407,7 +5414,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { build_context bc; allocateFinalLiteralId(*this, bc); - final_to_frag_map = groupByFragment(*this, bc); + groupByFragment(*this, bc); // Write the fragment IDs into the literal_info structures. for (auto &info : literal_info) { @@ -5415,7 +5422,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { continue; } assert(contains(final_to_frag_map, info.final_id)); - info.fragment_id = final_to_frag_map.at(info.final_id).fragment_id; + info.fragment_id = final_to_frag_map.at(info.final_id); } auto anchored_dfas = buildAnchoredDfas(*this); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 9cab4087..92a3935b 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1153,7 +1153,7 @@ void dumpRoseLitPrograms(const RoseBuildImpl &build, const RoseEngine *t, programs.reserve(build.final_to_frag_map.size()); for (const auto &m : build.final_to_frag_map) { - const auto &frag = m.second; + const auto &frag = build.fragments.at(m.second); if (frag.lit_program_offset) { programs.push_back(frag.lit_program_offset); } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index bf588553..321f54d8 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -593,7 +593,8 @@ public: * overlap calculation in history assignment. */ std::map anchoredLitSuffix; - std::map final_to_frag_map; + std::map final_to_frag_map; + std::vector fragments; unordered_set transient; unordered_map rose_squash_masks; diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index cd88c980..3ecec9d8 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -741,7 +741,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, for (auto &lit : mp.lits) { u32 final_id = lit.id; assert(contains(build.final_to_frag_map, final_id)); - const auto &frag = build.final_to_frag_map.at(final_id); + const auto &frag = + build.fragments.at(build.final_to_frag_map.at(final_id)); lit.id = delay_rebuild ? frag.delay_program_offset : frag.lit_program_offset; lit.groups = frag.groups;