/* * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "rose_build_anchored.h" #include "grey.h" #include "rose_build_impl.h" #include "rose_build_matchers.h" #include "rose_internal.h" #include "ue2common.h" #include "nfa/dfa_min.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" #include "nfa/nfa_build_util.h" #include "nfa/rdfa_merge.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_util.h" #include "nfagraph/ng_mcclellan_internal.h" #include "util/alloc.h" #include "util/bitfield.h" #include "util/charreach.h" #include "util/compile_context.h" #include "util/compile_error.h" #include "util/container.h" #include "util/determinise.h" #include "util/flat_containers.h" #include "util/graph_range.h" #include "util/order_check.h" #include "util/ue2string.h" #include "util/unordered.h" #include "util/verify_types.h" #include #include #include #include using namespace std; namespace ue2 { #define ANCHORED_NFA_STATE_LIMIT 512 #define MAX_DFA_STATES 16000 #define DFA_PAIR_MERGE_THRESHOLD 5000 #define MAX_SMALL_START_REACH 4 #define INIT_STATE (DEAD_STATE + 1) #define NO_FRAG_ID (~0U) // Adds a vertex with the given reach. static NFAVertex add_vertex(NGHolder &h, const CharReach &cr) { NFAVertex v = add_vertex(h); h[v].char_reach = cr; return v; } static void add_edges(const set &parents, NFAVertex v, NGHolder &h) { for (auto p : parents) { add_edge(p, v, h); } } static set addDotsToGraph(NGHolder &h, NFAVertex start, u32 min, u32 max, const CharReach &cr) { DEBUG_PRINTF("adding [%u, %u] to graph\n", min, max); u32 i = 0; set curr; curr.insert(start); for (; i < min; i++) { NFAVertex next = add_vertex(h, cr); add_edges(curr, next, h); curr.clear(); curr.insert(next); } assert(max != ROSE_BOUND_INF); set orig = curr; for (; i < max; i++) { NFAVertex next = add_vertex(h, cr); add_edges(curr, next, h); curr.clear(); curr.insert(next); curr.insert(orig.begin(), orig.end()); } return curr; } static NFAVertex addToGraph(NGHolder &h, const set &curr, const ue2_literal &s) { DEBUG_PRINTF("adding %s to graph\n", dumpString(s).c_str()); assert(!s.empty()); ue2_literal::const_iterator it = s.begin(); NFAVertex u = add_vertex(h, *it); add_edges(curr, u, h); for (++it; it != s.end(); ++it) { NFAVertex next = add_vertex(h, *it); add_edge(u, next, h); u = next; } return u; } static void mergeAnchoredDfas(vector> &dfas, const RoseBuildImpl &build) { // First, group our DFAs into "small start" and "big start" sets. vector> small_starts, big_starts; for (auto &rdfa : dfas) { u32 start_size = mcclellanStartReachSize(rdfa.get()); if (start_size <= MAX_SMALL_START_REACH) { small_starts.emplace_back(std::move(rdfa)); } else { big_starts.emplace_back(std::move(rdfa)); } } dfas.clear(); DEBUG_PRINTF("%zu dfas with small starts, %zu dfas with big starts\n", small_starts.size(), big_starts.size()); mergeDfas(small_starts, MAX_DFA_STATES, nullptr, build.cc.grey); mergeDfas(big_starts, MAX_DFA_STATES, nullptr, build.cc.grey); // Rehome our groups into one vector. for (auto &rdfa : small_starts) { // cppcheck-suppress useStlAlgorithm dfas.emplace_back(std::move(rdfa)); } for (auto &rdfa : big_starts) { // cppcheck-suppress useStlAlgorithm dfas.emplace_back(std::move(rdfa)); } // Final test: if we've built two DFAs here that are small enough, we can // try to merge them. if (dfas.size() == 2) { size_t total_states = dfas[0]->states.size() + dfas[1]->states.size(); if (total_states < DFA_PAIR_MERGE_THRESHOLD) { DEBUG_PRINTF("doing small pair merge\n"); mergeDfas(dfas, MAX_DFA_STATES, nullptr, build.cc.grey); } } } static void remapAnchoredReports(raw_dfa &rdfa, const vector &frag_map) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { continue; } flat_set new_reports; for (auto id : ds.reports) { assert(id < frag_map.size()); new_reports.insert(frag_map[id]); } ds.reports = std::move(new_reports); } } /** * \brief Replaces the report ids currently in the dfas (rose graph literal * ids) with the fragment id for each literal. */ static void remapAnchoredReports(RoseBuildImpl &build, const vector &frag_map) { for (auto &m : build.anchored_nfas) { for (auto &rdfa : m.second) { assert(rdfa); remapAnchoredReports(*rdfa, frag_map); } } } /** * Returns mapping from literal ids to fragment ids. */ static vector reverseFragMap(const RoseBuildImpl &build, const vector &fragments) { vector rev(build.literal_info.size(), NO_FRAG_ID); for (const auto &f : fragments) { for (u32 lit_id : f.lit_ids) { assert(lit_id < rev.size()); rev[lit_id] = f.fragment_id; } } return rev; } /** * \brief Replace the reports (which are literal final_ids) in the given * raw_dfa with program offsets. */ static void remapIdsToPrograms(const vector &fragments, raw_dfa &rdfa) { for (dstate &ds : rdfa.states) { assert(ds.reports_eod.empty()); // Not used in anchored matcher. if (ds.reports.empty()) { continue; } flat_set new_reports; for (auto fragment_id : ds.reports) { const auto &frag = fragments.at(fragment_id); new_reports.insert(frag.lit_program_offset); } ds.reports = std::move(new_reports); } } static unique_ptr populate_holder(const simple_anchored_info &sai, const flat_set &exit_ids) { DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound, dumpString(sai.literal).c_str()); auto h_ptr = make_unique(); NGHolder &h = *h_ptr; auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound, CharReach::dot()); NFAVertex v = addToGraph(h, ends, sai.literal); add_edge(v, h.accept, h); h[v].reports.insert(exit_ids.begin(), exit_ids.end()); return h_ptr; } u32 anchoredStateSize(const anchored_matcher_info &atable) { const struct anchored_matcher_info *curr = &atable; // Walk the list until we find the last element; total state size will be // that engine's state offset plus its state requirement. while (curr->next_offset) { curr = reinterpret_cast (reinterpret_cast(curr) + curr->next_offset); } const NFA *nfa = reinterpret_cast(reinterpret_cast(curr) + sizeof(*curr)); return curr->state_offset + nfa->streamStateSize; } namespace { using nfa_state_set = bitfield; struct Holder_StateSet { Holder_StateSet() : wdelay(0) {} nfa_state_set wrap_state; u32 wdelay; bool operator==(const Holder_StateSet &b) const { return wdelay == b.wdelay && wrap_state == b.wrap_state; } size_t hash() const { return hash_all(wrap_state, wdelay); } }; class Automaton_Holder { public: using StateSet = Holder_StateSet; using StateMap = ue2_unordered_map; explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) { for (auto v : vertices_range(g)) { vertexToIndex[v] = indexToVertex.size(); indexToVertex.emplace_back(v); } assert(indexToVertex.size() <= ANCHORED_NFA_STATE_LIMIT); DEBUG_PRINTF("%zu states\n", indexToVertex.size()); init.wdelay = 0; init.wrap_state.set(vertexToIndex[g.start]); DEBUG_PRINTF("init wdelay %u\n", init.wdelay); calculateAlphabet(); cr_by_index = populateCR(g, indexToVertex, alpha); } private: void calculateAlphabet() { vector esets(1, CharReach::dot()); for (auto v : indexToVertex) { const CharReach &cr = g[v].char_reach; for (size_t i = 0; i < esets.size(); i++) { if (esets[i].count() == 1) { continue; } CharReach t = cr & esets[i]; if (t.any() && t != esets[i]) { esets[i] &= ~t; esets.emplace_back(t); } } } alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); } public: void transition(const StateSet &in, StateSet *next) { /* track the dfa state, reset nfa states */ u32 wdelay = in.wdelay ? in.wdelay - 1 : 0; for (symbol_t s = 0; s < alphasize; s++) { next[s].wrap_state.reset(); next[s].wdelay = wdelay; } nfa_state_set gsucc; if (wdelay != in.wdelay) { DEBUG_PRINTF("enabling start\n"); gsucc.set(vertexToIndex[g.startDs]); } for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos; i = in.wrap_state.find_next(i)) { NFAVertex v = indexToVertex[i]; for (auto w : adjacent_vertices_range(v, g)) { if (!contains(vertexToIndex, w) || w == g.accept || w == g.acceptEod) { continue; } if (w == g.startDs) { continue; } gsucc.set(vertexToIndex[w]); } } for (size_t j = gsucc.find_first(); j != nfa_state_set::npos; j = gsucc.find_next(j)) { const CharReach &cr = cr_by_index[j]; for (size_t s = cr.find_first(); s != CharReach::npos; s = cr.find_next(s)) { next[s].wrap_state.set(j); /* pre alpha'ed */ } } next[alpha[TOP]] = in; } const vector initial() { return {init}; } void reports(const StateSet &in, flat_set &rv) { rv.clear(); for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos; i = in.wrap_state.find_next(i)) { NFAVertex v = indexToVertex[i]; if (edge(v, g.accept, g).second) { assert(!g[v].reports.empty()); insert(&rv, g[v].reports); } else { assert(g[v].reports.empty()); } } } void reportsEod(const StateSet &, flat_set &r) { r.clear(); } static bool canPrune(const flat_set &) { /* used by ng_ to prune states after highlander accepts */ return false; } private: const NGHolder &g; unordered_map vertexToIndex; vector indexToVertex; vector cr_by_index; StateSet init; public: StateSet dead; array alpha; array unalpha; u16 alphasize; }; } // namespace static bool check_dupe(const raw_dfa &rdfa, const vector> &existing, ReportID *remap) { if (!remap) { DEBUG_PRINTF("no remap\n"); return false; } set rdfa_reports; for (const auto &ds : rdfa.states) { rdfa_reports.insert(ds.reports.begin(), ds.reports.end()); } if (rdfa_reports.size() != 1) { return false; /* too complicated for now would need mapping TODO */ } for (const auto &e_rdfa : existing) { assert(e_rdfa); const raw_dfa &b = *e_rdfa; if (rdfa.start_anchored != b.start_anchored || rdfa.alpha_size != b.alpha_size || rdfa.states.size() != b.states.size() || rdfa.alpha_remap != b.alpha_remap) { continue; } set b_reports; for (u32 i = 0; i < b.states.size(); i++) { assert(b.states[i].reports_eod.empty()); assert(rdfa.states[i].reports_eod.empty()); if (rdfa.states[i].reports.size() != b.states[i].reports.size()) { goto next_dfa; } b_reports.insert(b.states[i].reports.begin(), b.states[i].reports.end()); assert(rdfa.states[i].next.size() == b.states[i].next.size()); if (!equal(rdfa.states[i].next.begin(), rdfa.states[i].next.end(), b.states[i].next.begin())) { goto next_dfa; } } if (b_reports.size() != 1) { continue; } *remap = *b_reports.begin(); DEBUG_PRINTF("dupe found remapping to %u\n", *remap); return true; next_dfa:; } return false; } static bool check_dupe_simple(const RoseBuildImpl &build, u32 min_bound, u32 max_bound, const ue2_literal &lit, ReportID *remap) { if (!remap) { DEBUG_PRINTF("no remap\n"); return false; } simple_anchored_info sai(min_bound, max_bound, lit); if (contains(build.anchored_simple, sai)) { *remap = *build.anchored_simple.at(sai).begin(); return true; } return false; } static NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) { vector lit_verts; NFAVertex v = h.accept; while ((v = getSoleSourceVertex(h, v))) { const CharReach &cr = h[v].char_reach; if (cr.count() > 1 && !cr.isCaselessChar()) { break; } lit_verts.emplace_back(v); } if (lit_verts.empty()) { return NGHolder::null_vertex(); } bool nocase = false; bool case_set = false; for (auto it = lit_verts.rbegin(), ite = lit_verts.rend(); it != ite; ++it) { const CharReach &cr = h[*it].char_reach; if (cr.isAlpha()) { bool cr_nocase = cr.count() != 1; if (case_set && cr_nocase != nocase) { return NGHolder::null_vertex(); } case_set = true; nocase = cr_nocase; lit->push_back(cr.find_first(), nocase); } else { lit->push_back(cr.find_first(), false); } } return lit_verts.back(); } static bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound, ue2_literal *lit, u32 *report) { assert(!proper_out_degree(h.startDs, h)); assert(in_degree(h.acceptEod, h) == 1); DEBUG_PRINTF("looking for simple case\n"); NFAVertex lit_head = extractLiteral(h, lit); if (lit_head == NGHolder::null_vertex()) { DEBUG_PRINTF("no literal found\n"); return false; } const auto &reps = h[*inv_adjacent_vertices(h.accept, h).first].reports; if (reps.size() != 1) { return false; } *report = *reps.begin(); assert(!lit->empty()); set rep_exits; /* lit should only be connected to dot vertices */ for (auto u : inv_adjacent_vertices_range(lit_head, h)) { DEBUG_PRINTF("checking %zu\n", h[u].index); if (!h[u].char_reach.all()) { return false; } if (u != h.start) { rep_exits.insert(u); } } if (rep_exits.empty()) { DEBUG_PRINTF("direct anchored\n"); assert(edge(h.start, lit_head, h).second); *min_bound = 0; *max_bound = 0; return true; } NFAVertex key = *rep_exits.begin(); // Special-case the check for '^.foo' or '^.?foo'. if (rep_exits.size() == 1 && edge(h.start, key, h).second && out_degree(key, h) == 1) { DEBUG_PRINTF("one exit\n"); assert(edge(h.start, h.startDs, h).second); size_t num_enters = out_degree(h.start, h); if (num_enters == 2) { DEBUG_PRINTF("^.{1,1} prefix\n"); *min_bound = 1; *max_bound = 1; return true; } if (num_enters == 3 && edge(h.start, lit_head, h).second) { DEBUG_PRINTF("^.{0,1} prefix\n"); *min_bound = 0; *max_bound = 1; return true; } } vector repeats; findRepeats(h, 2, &repeats); vector::const_iterator it; for (it = repeats.begin(); it != repeats.end(); ++it) { DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size()); if (find(it->vertices.begin(), it->vertices.end(), key) != it->vertices.end()) { break; } } if (it == repeats.end()) { DEBUG_PRINTF("no repeat found\n"); return false; } set rep_verts; insert(&rep_verts, it->vertices); if (!is_subset_of(rep_exits, rep_verts)) { DEBUG_PRINTF("bad exit check\n"); return false; } set rep_enters; insert(&rep_enters, adjacent_vertices(h.start, h)); rep_enters.erase(lit_head); rep_enters.erase(h.startDs); if (!is_subset_of(rep_enters, rep_verts)) { DEBUG_PRINTF("bad entry check\n"); return false; } u32 min_b = it->repeatMin; if (edge(h.start, lit_head, h).second) { /* jump edge */ if (min_b != 1) { DEBUG_PRINTF("jump edge around repeat with min bound\n"); return false; } min_b = 0; } *min_bound = min_b; *max_bound = it->repeatMax; DEBUG_PRINTF("repeat %u %u before %s\n", *min_bound, *max_bound, dumpString(*lit).c_str()); return true; } static int finalise_out(RoseBuildImpl &build, const NGHolder &h, const Automaton_Holder &autom, unique_ptr out_dfa, ReportID *remap) { u32 min_bound = ~0U; u32 max_bound = ~0U; ue2_literal lit; u32 simple_report = MO_INVALID_IDX; if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) { assert(simple_report != MO_INVALID_IDX); if (check_dupe_simple(build, min_bound, max_bound, lit, remap)) { DEBUG_PRINTF("found duplicate remapping to %u\n", *remap); return ANCHORED_REMAP; } DEBUG_PRINTF("add with report %u\n", simple_report); build.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)] .insert(simple_report); return ANCHORED_SUCCESS; } out_dfa->start_anchored = INIT_STATE; out_dfa->start_floating = DEAD_STATE; out_dfa->alpha_size = autom.alphasize; out_dfa->alpha_remap = autom.alpha; auto hash = hash_dfa_no_reports(*out_dfa); if (check_dupe(*out_dfa, build.anchored_nfas[hash], remap)) { return ANCHORED_REMAP; } build.anchored_nfas[hash].emplace_back(std::move(out_dfa)); return ANCHORED_SUCCESS; } static int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) { DEBUG_PRINTF("autom bad!\n"); return ANCHORED_FAIL; } Automaton_Holder autom(h); auto out_dfa = std::make_unique(NFA_OUTFIX_RAW); if (determinise(autom, out_dfa->states, MAX_DFA_STATES)) { return finalise_out(build, h, autom, std::move(out_dfa), remap); } DEBUG_PRINTF("determinise failed\n"); return ANCHORED_FAIL; } static void setReports(NGHolder &h, const map> &reportMap, const unordered_map &orig_to_copy) { for (const auto &m : reportMap) { NFAVertex t = orig_to_copy.at(m.first); assert(!m.second.empty()); add_edge(t, h.accept, h); insert(&h[t].reports, m.second); } } int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, const map> &reportMap) { NGHolder h; unordered_map orig_to_copy; cloneHolder(h, wrapper, &orig_to_copy); clear_in_edges(h.accept, h); clear_in_edges(h.acceptEod, h); add_edge(h.accept, h.acceptEod, h); clearReports(h); setReports(h, reportMap, orig_to_copy); return addAutomaton(build, h, nullptr); } int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, u32 exit_id, ReportID *remap) { NGHolder h; cloneHolder(h, anchored); clearReports(h); assert(in_degree(h.acceptEod, h) == 1); for (auto v : inv_adjacent_vertices_range(h.accept, h)) { h[v].reports.clear(); h[v].reports.insert(exit_id); } return addAutomaton(build, h, remap); } static void buildSimpleDfas(const RoseBuildImpl &build, const vector &frag_map, vector> *anchored_dfas) { /* we should have determinised all of these before so there should be no * chance of failure. */ flat_set exit_ids; for (const auto &simple : build.anchored_simple) { exit_ids.clear(); for (auto lit_id : simple.second) { assert(lit_id < frag_map.size()); exit_ids.insert(frag_map[lit_id]); } auto h = populate_holder(simple.first, exit_ids); Automaton_Holder autom(*h); auto rdfa = std::make_unique(NFA_OUTFIX_RAW); UNUSED bool rv = determinise(autom, rdfa->states, MAX_DFA_STATES); assert(rv); rdfa->start_anchored = INIT_STATE; rdfa->start_floating = DEAD_STATE; rdfa->alpha_size = autom.alphasize; rdfa->alpha_remap = autom.alpha; anchored_dfas->emplace_back(std::move(rdfa)); } } /** * Fill the given vector with all of the raw_dfas we need to compile into the * anchored matcher. Takes ownership of the input structures, clearing them * from RoseBuildImpl. */ static vector> getAnchoredDfas(RoseBuildImpl &build, const vector &frag_map) { vector> dfas; // DFAs that already exist as raw_dfas. for (auto &anch_dfas : build.anchored_nfas) { for (auto &rdfa : anch_dfas.second) { // cppcheck-suppress useStlAlgorithm dfas.emplace_back(std::move(rdfa)); } } build.anchored_nfas.clear(); // DFAs we currently have as simple literals. if (!build.anchored_simple.empty()) { buildSimpleDfas(build, frag_map, &dfas); build.anchored_simple.clear(); } return dfas; } /** * \brief Builds our anchored DFAs into runtime NFAs. * * Constructs a vector of NFA structures and a vector of their start offsets * (number of dots removed from the prefix) from the raw_dfa structures given. * * Note: frees the raw_dfa structures on completion. * * \return Total bytes required for the complete anchored matcher. */ static size_t buildNfas(vector &anchored_dfas, vector> *nfas, vector *start_offset, const CompileContext &cc, const ReportManager &rm) { const size_t num_dfas = anchored_dfas.size(); nfas->reserve(num_dfas); start_offset->reserve(num_dfas); size_t total_size = 0; for (auto &rdfa : anchored_dfas) { u32 removed_dots = remove_leading_dots(rdfa); start_offset->emplace_back(removed_dots); minimize_hopcroft(rdfa, cc.grey); auto nfa = mcclellanCompile(rdfa, cc, rm, false); if (!nfa) { assert(0); throw std::bad_alloc(); } assert(nfa->length); total_size += ROUNDUP_CL(sizeof(anchored_matcher_info) + nfa->length); nfas->emplace_back(std::move(nfa)); } // We no longer need to keep the raw_dfa structures around. anchored_dfas.clear(); return total_size; } vector buildAnchoredDfas(RoseBuildImpl &build, const vector &fragments) { vector dfas; if (build.anchored_nfas.empty() && build.anchored_simple.empty()) { DEBUG_PRINTF("empty\n"); return dfas; } const auto frag_map = reverseFragMap(build, fragments); remapAnchoredReports(build, frag_map); auto anch_dfas = getAnchoredDfas(build, frag_map); mergeAnchoredDfas(anch_dfas, build); dfas.reserve(anch_dfas.size()); for (auto &rdfa : anch_dfas) { assert(rdfa); dfas.emplace_back(std::move(*rdfa)); } return dfas; } bytecode_ptr buildAnchoredMatcher(const RoseBuildImpl &build, const vector &fragments, vector &dfas) { const CompileContext &cc = build.cc; if (dfas.empty()) { DEBUG_PRINTF("empty\n"); return bytecode_ptr(nullptr); } for (auto &rdfa : dfas) { remapIdsToPrograms(fragments, rdfa); } vector> nfas; vector start_offset; // start offset for each dfa (dots removed) size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); if (total_size > cc.grey.limitRoseAnchoredSize) { throw ResourceLimitError(); } auto atable = make_zeroed_bytecode_ptr(total_size, 64); char *curr = reinterpret_cast(atable.get()); u32 state_offset = 0; for (size_t i = 0; i < nfas.size(); i++) { const NFA *nfa = nfas[i].get(); anchored_matcher_info *ami = reinterpret_cast(curr); const char *prev_curr = curr; curr += sizeof(anchored_matcher_info); memcpy(curr, nfa, nfa->length); curr += nfa->length; curr = ROUNDUP_PTR(curr, 64); if (i + 1 == nfas.size()) { ami->next_offset = 0U; } else { ami->next_offset = verify_u32(curr - prev_curr); } ami->state_offset = state_offset; state_offset += nfa->streamStateSize; ami->anchoredMinDistance = start_offset[i]; } DEBUG_PRINTF("success %zu\n", atable.size()); return atable; } } // namespace ue2