diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index b1aead36..7604e27a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -244,9 +244,6 @@ struct build_context : boost::noncopyable { /** \brief Global bitmap of groups that can be squashed. */ rose_group squashable_groups = 0; - /** \brief Mapping from final ID to the set of literals it is used for. */ - map> final_id_to_literal; - /** \brief Mapping from Rose literal ID to anchored program index. */ map anchored_programs; @@ -2566,12 +2563,12 @@ void recordResources(RoseResources &resources, if (!build.outfixes.empty()) { resources.has_outfixes = true; } - for (u32 i = 0; i < build.literal_info.size(); i++) { - if (build.hasFinalId(i)) { - resources.has_literals = true; - break; - } - } + + resources.has_literals = + any_of(begin(build.literal_info), end(build.literal_info), + [](const rose_literal_info &info) { + return info.fragment_id != MO_INVALID_IDX; + }); const auto &g = build.g; for (const auto &v : vertices_range(g)) { @@ -4658,6 +4655,26 @@ map> findEdgesByLiteral(const RoseBuildImpl &build) { return lit_edge_map; } +static +bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { + assert(lit_id < build.literal_info.size()); + const auto &info = build.literal_info[lit_id]; + if (!info.vertices.empty()) { + return true; + } + + for (const u32 &delayed_id : info.delayed_ids) { + assert(delayed_id < build.literal_info.size()); + const rose_literal_info &delayed_info = build.literal_info[delayed_id]; + if (!delayed_info.vertices.empty()) { + return true; + } + } + + DEBUG_PRINTF("literal %u has no refs\n", lit_id); + return false; +} + static rose_literal_id getFragment(const rose_literal_id &lit) { if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { @@ -4673,87 +4690,68 @@ rose_literal_id getFragment(const rose_literal_id &lit) { } static -rose_group getGroups(const RoseBuildImpl &build, const flat_set &lit_ids) { - rose_group groups = 0; - for (auto lit_id : lit_ids) { - auto &info = build.literal_info.at(lit_id); - groups |= info.group_mask; - } - return groups; -} - -static -void groupByFragment(RoseBuildImpl &build, const build_context &bc) { +void groupByFragment(RoseBuildImpl &build) { u32 frag_id = 0; struct FragmentInfo { - vector final_ids; + vector lit_ids; rose_group groups = 0; }; map frag_info; - map final_to_frag; + map lit_to_frag; auto &fragments = build.fragments; - for (const auto &m : bc.final_id_to_literal) { - u32 final_id = m.first; - const auto &lit_ids = m.second; - assert(!lit_ids.empty()); + for (const auto &m : build.literals.right) { + u32 lit_id = m.first; - auto groups = getGroups(build, lit_ids); - - if (lit_ids.size() > 1) { - final_to_frag.emplace(final_id, frag_id); - fragments.emplace_back(frag_id, groups); - frag_id++; + if (!isUsedLiteral(build, lit_id)) { continue; } - const auto lit_id = *lit_ids.begin(); - const auto &lit = build.literals.right.at(lit_id); + const auto &lit = m.second; + const auto &info = build.literal_info.at(lit_id); + + auto groups = info.group_mask; + if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { - final_to_frag.emplace(final_id, frag_id); + lit_to_frag.emplace(lit_id, frag_id); fragments.emplace_back(frag_id, groups); frag_id++; continue; } // Combining fragments that squash their groups is unsafe. - const auto &info = build.literal_info[lit_id]; if (info.squash_group) { - final_to_frag.emplace(final_id, frag_id); + lit_to_frag.emplace(lit_id, frag_id); fragments.emplace_back(frag_id, groups); frag_id++; continue; } - DEBUG_PRINTF("fragment candidate: final_id=%u %s\n", final_id, + DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id, dumpString(lit.s).c_str()); auto &fi = frag_info[getFragment(lit)]; - fi.final_ids.push_back(final_id); + fi.lit_ids.push_back(lit_id); fi.groups |= groups; } for (const auto &m : frag_info) { const auto &fi = m.second; DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), - as_string_list(fi.final_ids).c_str()); + as_string_list(fi.lit_ids).c_str()); fragments.emplace_back(frag_id, fi.groups); - for (const auto final_id : fi.final_ids) { - assert(!contains(final_to_frag, final_id)); - final_to_frag.emplace(final_id, frag_id); + for (const auto lit_id : fi.lit_ids) { + assert(!contains(lit_to_frag, lit_id)); + lit_to_frag.emplace(lit_id, frag_id); } frag_id++; } // Write the fragment IDs into the literal_info structures. - for (auto &info : build.literal_info) { - if (info.final_id == MO_INVALID_IDX) { - continue; - } - assert(contains(final_to_frag, info.final_id)); - info.fragment_id = final_to_frag.at(info.final_id); + for (const auto &m : lit_to_frag) { + build.literal_info[m.first].fragment_id = m.second; } } @@ -4871,8 +4869,7 @@ pair writeAnchoredPrograms(RoseBuildImpl &build, build_context &bc) { u32 offset = writeLiteralProgram(build, bc, {lit_id}, lit_edge_map, true); - DEBUG_PRINTF("lit_id=%u, final_id %u -> anch prog at %u\n", lit_id, - final_id, offset); + DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset); u32 anch_id; auto it = cache.find(offset); @@ -5248,169 +5245,6 @@ u32 buildEagerQueueIter(const set &eager, u32 leftfixBeginQueue, return bc.engine_blob.add_iterator(iter); } -static -void allocateFinalIdToSet(RoseBuildImpl &build, build_context &bc, - const set &lits, u32 *next_final_id) { - const auto &g = build.g; - auto &literal_info = build.literal_info; - auto &final_id_to_literal = bc.final_id_to_literal; - - /* We can allocate the same final id to multiple literals of the same type - * if they share the same vertex set and trigger the same delayed literal - * ids and squash the same roles and have the same group squashing - * behaviour. Benefits literals cannot be merged. */ - - for (u32 int_id : lits) { - rose_literal_info &curr_info = literal_info[int_id]; - const rose_literal_id &lit = build.literals.right.at(int_id); - const auto &verts = curr_info.vertices; - - // Literals with benefits cannot be merged. - if (curr_info.requires_benefits) { - DEBUG_PRINTF("id %u has benefits\n", int_id); - goto assign_new_id; - } - - // Literals that need confirmation with CHECK_LONG_LIT or CHECK_MED_LIT - // cannot be merged. - if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("id %u needs lit confirm\n", int_id); - goto assign_new_id; - } - - if (!verts.empty() && curr_info.delayed_ids.empty()) { - vector cand; - insert(&cand, cand.end(), g[*verts.begin()].literals); - for (auto v : verts) { - vector temp; - set_intersection(cand.begin(), cand.end(), - g[v].literals.begin(), - g[v].literals.end(), - inserter(temp, temp.end())); - cand.swap(temp); - } - - for (u32 cand_id : cand) { - if (cand_id >= int_id) { - break; - } - - const auto &cand_info = literal_info[cand_id]; - const auto &cand_lit = build.literals.right.at(cand_id); - - if (cand_lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - continue; - } - - if (cand_info.requires_benefits) { - continue; - } - - if (!cand_info.delayed_ids.empty()) { - /* TODO: allow cases where delayed ids are equivalent. - * This is awkward currently as the have not had their - * final ids allocated yet */ - continue; - } - - if (lits.find(cand_id) == lits.end() - || cand_info.vertices.size() != verts.size() - || cand_info.squash_group != curr_info.squash_group) { - continue; - } - - /* if we are squashing groups we need to check if they are the - * same group */ - if (cand_info.squash_group - && cand_info.group_mask != curr_info.group_mask) { - continue; - } - - u32 final_id = cand_info.final_id; - assert(final_id != MO_INVALID_IDX); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = final_id; - final_id_to_literal[final_id].insert(int_id); - goto next_lit; - } - } - - assign_new_id: - /* oh well, have to give it a fresh one, hang the expense */ - DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id); - assert(curr_info.final_id == MO_INVALID_IDX); - curr_info.final_id = *next_final_id; - final_id_to_literal[*next_final_id].insert(int_id); - (*next_final_id)++; - next_lit:; - } -} - -static -bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { - assert(lit_id < build.literal_info.size()); - const auto &info = build.literal_info[lit_id]; - if (!info.vertices.empty()) { - return true; - } - - for (const u32 &delayed_id : info.delayed_ids) { - assert(delayed_id < build.literal_info.size()); - const rose_literal_info &delayed_info = build.literal_info[delayed_id]; - if (!delayed_info.vertices.empty()) { - return true; - } - } - - DEBUG_PRINTF("literal %u has no refs\n", lit_id); - return false; -} - -/** \brief Allocate final literal IDs for all literals. */ -static -void allocateFinalLiteralId(RoseBuildImpl &build, build_context &bc) { - set anch; - set norm; - set delay; - - /* undelayed ids come first */ - assert(bc.final_id_to_literal.empty()); - u32 next_final_id = 0; - for (u32 i = 0; i < build.literal_info.size(); i++) { - assert(!build.hasFinalId(i)); - - if (!isUsedLiteral(build, i)) { - /* what is this literal good for? absolutely nothing */ - continue; - } - - // The special EOD event literal has its own program and does not need - // a real literal ID. - if (i == build.eod_event_literal_id) { - assert(build.eod_event_literal_id != MO_INVALID_IDX); - continue; - } - - if (build.isDelayed(i)) { - assert(!build.literal_info[i].requires_benefits); - delay.insert(i); - } else if (build.literals.right.at(i).table == ROSE_ANCHORED) { - anch.insert(i); - } else { - norm.insert(i); - } - } - - /* normal lits */ - allocateFinalIdToSet(build, bc, norm, &next_final_id); - - /* next anchored stuff */ - allocateFinalIdToSet(build, bc, anch, &next_final_id); - - /* delayed ids come last */ - allocateFinalIdToSet(build, bc, delay, &next_final_id); -} - static aligned_unique_ptr addSmallWriteEngine(RoseBuildImpl &build, aligned_unique_ptr rose) { @@ -5523,8 +5357,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); build_context bc; - allocateFinalLiteralId(*this, bc); - groupByFragment(*this, bc); + groupByFragment(*this); auto anchored_dfas = buildAnchoredDfas(*this); diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 63b5bd0f..7dd55d5f 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -759,10 +759,6 @@ bool RoseBuildImpl::isDelayed(u32 id) const { return literal_info.at(id).undelayed_id != id; } -bool RoseBuildImpl::hasFinalId(u32 id) const { - return literal_info.at(id).final_id != MO_INVALID_IDX; -} - bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const { for (u32 lit_id : g[v].literals) { if (literals.right.at(lit_id).delay) { diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index b3966169..2f882e68 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -249,9 +249,9 @@ private: void writeLiteral(ostream &os, u32 id) const { os << "lit=" << id; if (id < build.literal_info.size()) { - os << "/" << build.literal_info[id].final_id << " "; + os << "/" << build.literal_info[id].fragment_id << " "; } else { - os << "/nofinal "; + os << "/nofrag "; } if (contains(build.literals.right, id)) { @@ -355,7 +355,7 @@ void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) { break; } - os << " ID " << id << "/" << lit_info.final_id << ": \"" + os << " ID " << id << "/" << lit_info.fragment_id << ": \"" << escapeString(s.get_string()) << "\"" << " (len " << s.length() << ","; if (s.any_nocase()) { diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 860404b4..cafd0505 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -264,7 +264,6 @@ struct rose_literal_info { ue2::flat_set vertices; rose_group group_mask = 0; u32 undelayed_id = MO_INVALID_IDX; - u32 final_id = MO_INVALID_IDX; // TODO: remove u32 fragment_id = MO_INVALID_IDX; //!< ID corresponding to literal prog. bool squash_group = false; bool requires_benefits = false; @@ -530,8 +529,6 @@ public: bool isDirectReport(u32 id) const; bool isDelayed(u32 id) const; - bool hasFinalId(u32 id) const; - bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored * table */ bool isFloating(RoseVertex v) const; /* true iff has literal in floating diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index 1643a06c..50e48a5b 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -349,8 +349,8 @@ void findMoreLiteralMasks(RoseBuildImpl &build) { const u32 id = e.first; const auto &lit = e.second; - // This pass takes place before final IDs are assigned to literals. - assert(!build.hasFinalId(id)); + // This pass takes place before fragment IDs are assigned to literals. + assert(build.literal_info.at(id).fragment_id == MO_INVALID_IDX); if (lit.delay || build.isDelayed(id)) { continue; @@ -657,7 +657,7 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, for (const auto &e : build.literals.right) { const u32 id = e.first; - if (!build.hasFinalId(id)) { + if (build.literal_info.at(id).fragment_id == MO_INVALID_IDX) { continue; }