From bb29aeb2986797688687f05e11e16d623ccfa00b Mon Sep 17 00:00:00 2001 From: Alex Coyte Date: Wed, 26 Apr 2017 13:45:31 +1000 Subject: [PATCH] rose: shift program construction functions to rose_build_program --- CMakeLists.txt | 1 + src/rose/rose_build_bytecode.cpp | 2549 ++---------------------------- src/rose/rose_build_impl.h | 5 + src/rose/rose_build_misc.cpp | 53 + src/rose/rose_build_program.cpp | 2150 +++++++++++++++++++++++++ src/rose/rose_build_program.h | 160 +- src/rose/rose_build_resources.h | 57 + 7 files changed, 2567 insertions(+), 2408 deletions(-) create mode 100644 src/rose/rose_build_resources.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 650bcf20..bc42c659 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -971,6 +971,7 @@ SET (hs_SRCS src/rose/rose_build_misc.cpp src/rose/rose_build_program.cpp src/rose/rose_build_program.h + src/rose/rose_build_resources.h src/rose/rose_build_role_aliasing.cpp src/rose/rose_build_scatter.cpp src/rose/rose_build_scatter.h diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 94927558..636af0a6 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -38,11 +38,11 @@ #include "rose_build_exclusive.h" #include "rose_build_groups.h" #include "rose_build_infix.h" -#include "rose_build_instructions.h" #include "rose_build_long_lit.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" #include "rose_build_program.h" +#include "rose_build_resources.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" @@ -82,7 +82,6 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/container.h" -#include "util/dump_charclass.h" #include "util/fatbit_build.h" #include "util/graph_range.h" #include "util/make_unique.h" @@ -133,56 +132,6 @@ namespace ue2 { namespace /* anon */ { -static constexpr u32 INVALID_QUEUE = ~0U; - -struct left_build_info { - // Constructor for an engine implementation. - left_build_info(u32 q, u32 l, u32 t, rose_group sm, - const std::vector &stops, u32 max_ql, u8 cm_count, - const CharReach &cm_cr) - : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), - max_queuelen(max_ql), countingMiracleCount(cm_count), - countingMiracleReach(cm_cr) {} - - // Constructor for a lookaround implementation. - explicit left_build_info(const vector> &looks) - : has_lookaround(true), lookaround(looks) {} - - u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ - u32 lag = 0; - u32 transient = 0; - rose_group squash_mask = ~rose_group{0}; - vector stopAlphabet; - u32 max_queuelen = 0; - u8 countingMiracleCount = 0; - CharReach countingMiracleReach; - u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ - /* leftfix can be completely implemented with lookaround */ - bool has_lookaround = false; - vector> lookaround; // alternative implementation to the NFA -}; - -/** - * \brief Structure tracking which resources are used by this Rose instance at - * runtime. - * - * We use this to control how much initialisation we need to do at the - * beginning of a stream/block at runtime. - */ -struct RoseResources { - bool has_outfixes = false; - bool has_suffixes = false; - bool has_leftfixes = false; - bool has_literals = false; - bool has_states = false; - bool checks_groups = false; - bool has_lit_delay = false; - bool has_lit_check = false; // long literal support - bool has_anchored = false; - bool has_floating = false; - bool has_eod = false; -}; - struct build_context : noncopyable { /** \brief information about engines to the left of a vertex */ map leftfix_info; @@ -190,27 +139,15 @@ struct build_context : noncopyable { /** \brief mapping from suffix to queue index. */ map suffixes; + /** \brief engine info by queue. */ + map engine_info_by_queue; + /** \brief Simple cache of programs written to engine blob, used for * deduplication. */ ue2::unordered_map program_cache; - /** \brief LookEntry list cache, so that we can reuse the look index and - * reach index for the same lookaround. */ - ue2::unordered_map>, - pair> lookaround_cache; - - /** \brief Lookaround table for Rose roles. */ - vector>> lookaround; - - /** \brief Lookaround look table size. */ - size_t lookTableSize = 0; - - /** \brief Lookaround reach table size. - * since single path lookaround and multi-path lookaround have different - * bitvectors range (32 and 256), we need to maintain both look table size - * and reach table size. */ - size_t reachTableSize = 0; + lookaround_info lookarounds; /** \brief State indices, for those roles that have them. * Each vertex present has a unique state index in the range @@ -225,17 +162,10 @@ struct build_context : noncopyable { * that need hash table support. */ vector longLiterals; - /** \brief Long literal length threshold, used in streaming mode. */ - size_t longLitLengthThreshold = 0; - /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ RoseEngineBlob engine_blob; - /** \brief True if reports need CATCH_UP instructions to catch up suffixes, - * outfixes etc. */ - bool needs_catchup; - /** \brief True if this Rose engine has an MPV engine. */ bool needs_mpv_catchup = false; @@ -243,34 +173,6 @@ struct build_context : noncopyable { RoseResources resources; }; -/** \brief Data only used during construction of various programs (literal, - * anchored, delay, etc). */ -struct ProgramBuild : noncopyable { - explicit ProgramBuild(u32 fMinLitOffset) - : floatingMinLiteralMatchOffset(fMinLitOffset) { - } - - /** \brief Minimum offset of a match from the floating table. */ - const u32 floatingMinLiteralMatchOffset; - - /** \brief Mapping from vertex to key, for vertices with a - * CHECK_NOT_HANDLED instruction. */ - ue2::unordered_map handledKeys; - - /** \brief Mapping from Rose literal ID to anchored program index. */ - map anchored_programs; - - /** \brief Mapping from Rose literal ID to delayed program index. */ - map delay_programs; - - /** \brief Mapping from every vertex to the groups that must be on for that - * vertex to be reached. */ - ue2::unordered_map vertex_group_map; - - /** \brief Global bitmap of groups that can be squashed. */ - rose_group squashable_groups = 0; -}; - /** \brief subengine info including built engine and * corresponding triggering rose vertices */ struct ExclusiveSubengine { @@ -291,18 +193,7 @@ struct ExclusiveInfo : noncopyable { } static -const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { - assert(contains(bc.engineOffsets, qi)); - u32 nfa_offset = bc.engineOffsets.at(qi); - assert(nfa_offset >= bc.engine_blob.base_offset); - const NFA *n = (const NFA *)(bc.engine_blob.data() + nfa_offset - - bc.engine_blob.base_offset); - assert(n->queueIndex == qi); - return n; -} - -static -const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { +void add_nfa_to_blob(build_context &bc, NFA &nfa) { u32 qi = nfa.queueIndex; u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length); DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, @@ -310,10 +201,6 @@ const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { assert(!contains(bc.engineOffsets, qi)); bc.engineOffsets.emplace(qi, nfa_offset); - - const NFA *n = get_nfa_from_blob(bc, qi); - assert(memcmp(&nfa, n, nfa.length) == 0); - return n; } static @@ -401,8 +288,8 @@ bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { } if (cc.streaming && resources.has_lit_check) { - DEBUG_PRINTF("has long literals in streaming mode, which needs " - "long literal table support\n"); + DEBUG_PRINTF("has long literals in streaming mode, which needs long " + "literal table support\n"); return false; } @@ -719,8 +606,7 @@ buildRepeatEngine(const CastleProto &proto, static bytecode_ptr getDfa(raw_dfa &rdfa, bool is_transient, - const CompileContext &cc, - const ReportManager &rm) { + const CompileContext &cc, const ReportManager &rm) { // Unleash the Sheng!! auto dfa = shengCompile(rdfa, cc, rm, false); if (!dfa && !is_transient) { @@ -1155,6 +1041,31 @@ left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei, return leftfix; } +static +void enforceEngineSizeLimit(const NFA *n, const Grey &grey) { + const size_t nfa_size = n->length; + // Global limit. + if (nfa_size > grey.limitEngineSize) { + throw ResourceLimitError(); + } + + // Type-specific limit checks follow. + + if (isDfaType(n->type)) { + if (nfa_size > grey.limitDFASize) { + throw ResourceLimitError(); + } + } else if (isNfaType(n->type)) { + if (nfa_size > grey.limitNFASize) { + throw ResourceLimitError(); + } + } else if (isLbrType(n->type)) { + if (nfa_size > grey.limitLBRSize) { + throw ResourceLimitError(); + } + } +} + static bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, const map > &infixTriggers, @@ -1193,6 +1104,9 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, setLeftNfaProperties(*nfa, leftfix); nfa->queueIndex = qi; + enforceEngineSizeLimit(nfa.get(), cc.grey); + bc.engine_info_by_queue.emplace(nfa->queueIndex, + engine_info(nfa.get(), is_transient)); if (!prefix && !leftfix.haig() && leftfix.graph() && nfaStuckOn(*leftfix.graph())) { @@ -1290,12 +1204,10 @@ void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo, for (const auto &n : tamaInfo.subengines) { for (const auto &v : subengines[i].vertices) { if (is_suffix) { - tamaProto.add(n, g[v].index, g[v].suffix.top, - out_top_remap); + tamaProto.add(n, g[v].index, g[v].suffix.top, out_top_remap); } else { for (const auto &e : in_edges_range(v, g)) { - tamaProto.add(n, g[v].index, g[e].rose_top, - out_top_remap); + tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap); } } } @@ -1308,32 +1220,34 @@ shared_ptr constructContainerEngine(const RoseGraph &g, build_context &bc, const ExclusiveInfo &info, const u32 queue, - const bool is_suffix) { + const bool is_suffix, + const Grey &grey) { const auto &subengines = info.subengines; - auto tamaInfo = - constructTamaInfo(g, subengines, is_suffix); + auto tamaInfo = constructTamaInfo(g, subengines, is_suffix); map, u32> out_top_remap; auto n = buildTamarama(*tamaInfo, queue, out_top_remap); + enforceEngineSizeLimit(n.get(), grey); + bc.engine_info_by_queue.emplace(n->queueIndex, engine_info(n.get(), false)); add_nfa_to_blob(bc, *n); DEBUG_PRINTF("queue id:%u\n", queue); shared_ptr tamaProto = make_shared(); tamaProto->reports = info.reports; - updateTops(g, *tamaInfo, *tamaProto, subengines, - out_top_remap, is_suffix); + updateTops(g, *tamaInfo, *tamaProto, subengines, out_top_remap, is_suffix); return tamaProto; } static void buildInfixContainer(RoseGraph &g, build_context &bc, - const vector &exclusive_info) { + const vector &exclusive_info, + const Grey &grey) { // Build tamarama engine for (const auto &info : exclusive_info) { const u32 queue = info.queue; const auto &subengines = info.subengines; auto tamaProto = - constructContainerEngine(g, bc, info, queue, false); + constructContainerEngine(g, bc, info, queue, false, grey); for (const auto &sub : subengines) { const auto &verts = sub.vertices; @@ -1347,13 +1261,14 @@ void buildInfixContainer(RoseGraph &g, build_context &bc, static void buildSuffixContainer(RoseGraph &g, build_context &bc, - const vector &exclusive_info) { + const vector &exclusive_info, + const Grey &grey) { // Build tamarama engine for (const auto &info : exclusive_info) { const u32 queue = info.queue; const auto &subengines = info.subengines; - auto tamaProto = - constructContainerEngine(g, bc, info, queue, true); + auto tamaProto = constructContainerEngine(g, bc, info, queue, true, + grey); for (const auto &sub : subengines) { const auto &verts = sub.vertices; for (const auto &v : verts) { @@ -1488,7 +1403,7 @@ void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc, } updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info, no_retrigger_queues); - buildInfixContainer(g, bc, exclusive_info); + buildInfixContainer(g, bc, exclusive_info, build.cc.grey); } static @@ -1560,8 +1475,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, findInfixTriggers(tbi, &infixTriggers); if (cc.grey.allowTamarama && cc.streaming && !do_prefix) { - findExclusiveInfixes(tbi, bc, qif, infixTriggers, - no_retrigger_queues); + findExclusiveInfixes(tbi, bc, qif, infixTriggers, no_retrigger_queues); } for (auto v : vertices_range(g)) { @@ -1769,6 +1683,9 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, u32 qi = mpv_outfix->get_queue(tbi.qif); nfa->queueIndex = qi; + enforceEngineSizeLimit(nfa.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(nfa->queueIndex, + engine_info(nfa.get(), false)); DEBUG_PRINTF("built mpv\n"); @@ -1827,6 +1744,9 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, setOutfixProperties(*n, out); n->queueIndex = out.get_queue(tbi.qif); + enforceEngineSizeLimit(n.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(n->queueIndex, + engine_info(n.get(), false)); if (!*historyRequired && requires_decompress_key(*n)) { *historyRequired = 1; @@ -1924,14 +1844,14 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc, } updateExclusiveSuffixProperties(build, exclusive_info, no_retrigger_queues); - buildSuffixContainer(g, bc, exclusive_info); + buildSuffixContainer(g, bc, exclusive_info, build.cc.grey); } static void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, - QueueIndexFactory &qif, - map> &suffixTriggers, - set *no_retrigger_queues) { + QueueIndexFactory &qif, + map> &suffixTriggers, + set *no_retrigger_queues) { const RoseGraph &g = tbi.g; map suffixes; @@ -2021,6 +1941,10 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, setSuffixProperties(*n, s, tbi.rm); n->queueIndex = queue; + enforceEngineSizeLimit(n.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(n->queueIndex, + engine_info(n.get(), false)); + if (s.graph() && nfaStuckOn(*s.graph())) { /* todo: have corresponding * haig analysis */ assert(!s.haig()); @@ -2114,44 +2038,28 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, } static -void allocateStateSpace(const NFA *nfa, NfaInfo &nfa_info, bool is_transient, +void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, RoseStateOffsets *so, u32 *scratchStateSize, u32 *streamStateSize, u32 *transientStateSize) { u32 state_offset; - if (is_transient) { + if (eng_info.transient) { // Transient engines do not use stream state, but must have room in // transient state (stored in scratch). state_offset = *transientStateSize; - *transientStateSize += nfa->streamStateSize; + *transientStateSize += eng_info.stream_size; } else { // Pack NFA stream state on to the end of the Rose stream state. state_offset = so->end; - so->end += nfa->streamStateSize; - *streamStateSize += nfa->streamStateSize; + so->end += eng_info.stream_size; + *streamStateSize += eng_info.stream_size; } nfa_info.stateOffset = state_offset; // Uncompressed state in scratch must be aligned. - u32 alignReq = state_alignment(*nfa); - assert(alignReq); - *scratchStateSize = ROUNDUP_N(*scratchStateSize, alignReq); + *scratchStateSize = ROUNDUP_N(*scratchStateSize, eng_info.scratch_align); nfa_info.fullStateOffset = *scratchStateSize; - *scratchStateSize += nfa->scratchStateSize; -} - -static -set -findTransientQueues(const map &leftfix_info) { - DEBUG_PRINTF("curating transient queues\n"); - set out; - for (const auto &left : leftfix_info | map_values) { - if (left.transient) { - DEBUG_PRINTF("q %u is transient\n", left.queue); - out.insert(left.queue); - } - } - return out; + *scratchStateSize += eng_info.scratch_size; } static @@ -2159,7 +2067,6 @@ void updateNfaState(const build_context &bc, vector &nfa_infos, RoseStateOffsets *so, u32 *scratchStateSize, u32 *streamStateSize, u32 *transientStateSize) { if (nfa_infos.empty()) { - assert(bc.engineOffsets.empty()); return; } @@ -2167,14 +2074,10 @@ void updateNfaState(const build_context &bc, vector &nfa_infos, *transientStateSize = 0; *scratchStateSize = 0; - auto transient_queues = findTransientQueues(bc.leftfix_info); - - for (const auto &m : bc.engineOffsets) { - const NFA *nfa = get_nfa_from_blob(bc, m.first); - u32 qi = nfa->queueIndex; - bool is_transient = contains(transient_queues, qi); + for (u32 qi = 0; qi < nfa_infos.size(); qi++) { NfaInfo &nfa_info = nfa_infos[qi]; - allocateStateSpace(nfa, nfa_info, is_transient, so, scratchStateSize, + const auto &eng_info = bc.engine_info_by_queue.at(qi); + allocateStateSpace(eng_info, nfa_info, so, scratchStateSize, streamStateSize, transientStateSize); } } @@ -2267,30 +2170,6 @@ u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { return bc.engine_blob.add_iterator(iter); } -static -void enforceEngineSizeLimit(const NFA *n, const size_t nfa_size, const Grey &grey) { - // Global limit. - if (nfa_size > grey.limitEngineSize) { - throw ResourceLimitError(); - } - - // Type-specific limit checks follow. - - if (isDfaType(n->type)) { - if (nfa_size > grey.limitDFASize) { - throw ResourceLimitError(); - } - } else if (isNfaType(n->type)) { - if (nfa_size > grey.limitNFASize) { - throw ResourceLimitError(); - } - } else if (isLbrType(n->type)) { - if (nfa_size > grey.limitLBRSize) { - throw ResourceLimitError(); - } - } -} - static u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, const vector &anchored_dfas) { @@ -2363,8 +2242,8 @@ static u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { vector keys; for (u32 qi = 0; qi < activeQueueCount; ++qi) { - const NFA *n = get_nfa_from_blob(bc, qi); - if (nfaAcceptsEod(n)) { + const auto &eng_info = bc.engine_info_by_queue.at(qi); + if (eng_info.accepts_eod) { DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi); keys.push_back(qi); } @@ -2451,61 +2330,7 @@ void addSomRevNfas(build_context &bc, RoseEngine &proto, } static -void applyFinalSpecialisation(RoseProgram &program) { - assert(!program.empty()); - assert(program.back().code() == ROSE_INSTR_END); - if (program.size() < 2) { - return; - } - - /* Replace the second-to-last instruction (before END) with a one-shot - * specialisation if available. */ - auto it = next(program.rbegin()); - if (auto *ri = dynamic_cast(it->get())) { - DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); - program.replace(it, make_unique( - ri->onmatch, ri->offset_adjust)); - } -} - -static -void recordResources(RoseResources &resources, const RoseProgram &program) { - for (const auto &ri : program) { - switch (ri->code()) { - case ROSE_INSTR_TRIGGER_SUFFIX: - resources.has_suffixes = true; - break; - case ROSE_INSTR_TRIGGER_INFIX: - case ROSE_INSTR_CHECK_INFIX: - case ROSE_INSTR_CHECK_PREFIX: - case ROSE_INSTR_SOM_LEFTFIX: - resources.has_leftfixes = true; - break; - case ROSE_INSTR_SET_STATE: - case ROSE_INSTR_CHECK_STATE: - case ROSE_INSTR_SPARSE_ITER_BEGIN: - case ROSE_INSTR_SPARSE_ITER_NEXT: - resources.has_states = true; - break; - case ROSE_INSTR_CHECK_GROUPS: - resources.checks_groups = true; - break; - case ROSE_INSTR_PUSH_DELAYED: - resources.has_lit_delay = true; - break; - case ROSE_INSTR_CHECK_LONG_LIT: - case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: - resources.has_lit_check = true; - break; - default: - break; - } - } -} - -static -void recordResources(RoseResources &resources, - const RoseBuildImpl &build, +void recordResources(RoseResources &resources, const RoseBuildImpl &build, const vector &fragments) { if (!build.outfixes.empty()) { resources.has_outfixes = true; @@ -2526,26 +2351,6 @@ void recordResources(RoseResources &resources, } } -static -void recordLongLiterals(vector &longLiterals, - const RoseProgram &program) { - for (const auto &ri : program) { - if (const auto *ri_check = - dynamic_cast(ri.get())) { - DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n", - escapeString(ri_check->literal).c_str()); - longLiterals.emplace_back(ri_check->literal, false); - continue; - } - if (const auto *ri_check = - dynamic_cast(ri.get())) { - DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n", - escapeString(ri_check->literal).c_str()); - longLiterals.emplace_back(ri_check->literal, true); - } - } -} - static u32 writeProgram(build_context &bc, RoseProgram &&program) { if (program.empty()) { @@ -2593,41 +2398,12 @@ u32 writeActiveLeftIter(RoseEngineBlob &engine_blob, return engine_blob.add_iterator(iter); } -static -bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) { - const auto &g = build.g; - const auto v = target(e, g); - - if (!build.g[v].eod_accept) { - return false; - } - - // If there's a graph between us and EOD, we shouldn't be eager. - if (build.g[v].left) { - return false; - } - - // Must be exactly at EOD. - if (g[e].minBound != 0 || g[e].maxBound != 0) { - return false; - } - - // In streaming mode, we can only eagerly report EOD for literals in the - // EOD-anchored table, as that's the only time we actually know where EOD - // is. In block mode, we always have this information. - const auto u = source(e, g); - if (build.cc.streaming && !build.isInETable(u)) { - return false; - } - - return true; -} - static bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, u32 outfixEndQueue) { for (u32 i = 0; i < outfixEndQueue; i++) { - if (nfaAcceptsEod(get_nfa_from_blob(bc, i))) { + const auto &eng_info = bc.engine_info_by_queue.at(i); + if (eng_info.accepts_eod) { DEBUG_PRINTF("outfix has eod\n"); return true; } @@ -2699,21 +2475,22 @@ void writeMultipathLookaround(const vector> &multi_look, } static -void writeLookaroundTables(build_context &bc, RoseEngine &proto) { - vector look_table(bc.lookTableSize, 0); - vector reach_table(bc.reachTableSize, 0); +void writeLookaroundTables(const lookaround_info &lookarounds, + RoseEngineBlob &engine_blob, RoseEngine &proto) { + vector look_table(lookarounds.lookTableSize, 0); + vector reach_table(lookarounds.reachTableSize, 0); s8 *look = look_table.data(); u8 *reach = reach_table.data(); - for (const auto &l : bc.lookaround) { - if (l.size() == 1) { - writeLookaround(l.front(), look, reach); + for (const auto &la : lookarounds.table) { + if (la.size() == 1) { + writeLookaround(la.front(), look, reach); } else { - writeMultipathLookaround(l, look, reach); + writeMultipathLookaround(la, look, reach); } } - proto.lookaroundTableOffset = bc.engine_blob.add_range(look_table); - proto.lookaroundReachOffset = bc.engine_blob.add_range(reach_table); + proto.lookaroundTableOffset = engine_blob.add_range(look_table); + proto.lookaroundReachOffset = engine_blob.add_range(reach_table); } static @@ -2750,9 +2527,6 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, memset(infos.data(), 0, sizeof(NfaInfo) * queue_count); for (u32 qi = 0; qi < queue_count; qi++) { - const NFA *n = get_nfa_from_blob(bc, qi); - enforceEngineSizeLimit(n, n->length, build.cc.grey); - NfaInfo &info = infos[qi]; info.nfaOffset = bc.engineOffsets.at(qi); assert(qi < ekey_lists.size()); @@ -2806,1475 +2580,6 @@ bool hasBoundaryReports(const BoundaryReports &boundary) { return false; } -/** - * \brief True if the given vertex is a role that can only be switched on at - * EOD. - */ -static -bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { - const RoseGraph &g = tbi.g; - - // All such roles have only (0,0) edges to vertices with the eod_accept - // property, and no other effects (suffixes, ordinary reports, etc, etc). - - if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { - return false; - } - - for (const auto &e : out_edges_range(v, g)) { - RoseVertex w = target(e, g); - if (!g[w].eod_accept) { - return false; - } - assert(!g[w].reports.empty()); - assert(g[w].literals.empty()); - - if (g[e].minBound || g[e].maxBound) { - return false; - } - } - - /* There is no pointing enforcing this check at runtime if - * this role is only fired by the eod event literal */ - if (tbi.eod_event_literal_id != MO_INVALID_IDX && - g[v].literals.size() == 1 && - *g[v].literals.begin() == tbi.eod_event_literal_id) { - return false; - } - - return true; -} - -static -void addLookaround(build_context &bc, - const vector> &look, - u32 &look_index, u32 &reach_index) { - // Check the cache. - auto it = bc.lookaround_cache.find(look); - if (it != bc.lookaround_cache.end()) { - look_index = verify_u32(it->second.first); - reach_index = verify_u32(it->second.second); - DEBUG_PRINTF("reusing look at idx %u\n", look_index); - DEBUG_PRINTF("reusing reach at idx %u\n", reach_index); - return; - } - - size_t look_idx = bc.lookTableSize; - size_t reach_idx = bc.reachTableSize; - - if (look.size() == 1) { - bc.lookTableSize += look.front().size(); - bc.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN; - } else { - bc.lookTableSize += look.size(); - bc.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN; - } - - bc.lookaround_cache.emplace(look, make_pair(look_idx, reach_idx)); - bc.lookaround.emplace_back(look); - - DEBUG_PRINTF("adding look at idx %zu\n", look_idx); - DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx); - look_index = verify_u32(look_idx); - reach_index = verify_u32(reach_idx); -} - -static -bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { - size_t reach_size = cr.count(); - assert(reach_size > 0); - // check whether entry_size is some power of 2. - if ((reach_size - 1) & reach_size) { - return false; - } - make_and_cmp_mask(cr, &andmask, &cmpmask); - if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { - return false; - } - return true; -} - -static -bool checkReachWithFlip(const CharReach &cr, u8 &andmask, - u8 &cmpmask, u8 &flip) { - if (checkReachMask(cr, andmask, cmpmask)) { - flip = 0; - return true; - } - if (checkReachMask(~cr, andmask, cmpmask)) { - flip = 1; - return true; - } - return false; -} - -static -bool makeRoleByte(const vector &look, RoseProgram &program) { - if (look.size() == 1) { - const auto &entry = look[0]; - u8 andmask_u8, cmpmask_u8; - u8 flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { - return false; - } - s32 checkbyte_offset = verify_s32(entry.offset); - DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(andmask_u8, cmpmask_u8, flip, - checkbyte_offset, end_inst); - program.add_before_end(move(ri)); - return true; - } - return false; -} - -static -bool makeRoleMask(const vector &look, RoseProgram &program) { - if (look.back().offset < look.front().offset + 8) { - s32 base_offset = verify_s32(look.front().offset); - u64a and_mask = 0; - u64a cmp_mask = 0; - u64a neg_mask = 0; - for (const auto &entry : look) { - u8 andmask_u8, cmpmask_u8, flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, - cmpmask_u8, flip)) { - return false; - } - DEBUG_PRINTF("entry offset %d\n", entry.offset); - u32 shift = (entry.offset - base_offset) << 3; - and_mask |= (u64a)andmask_u8 << shift; - cmp_mask |= (u64a)cmpmask_u8 << shift; - if (flip) { - neg_mask |= 0xffLLU << shift; - } - } - DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", - and_mask, cmp_mask); - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(and_mask, cmp_mask, neg_mask, - base_offset, end_inst); - program.add_before_end(move(ri)); - return true; - } - return false; -} - -static UNUSED -string convertMaskstoString(u8 *p, int byte_len) { - string s; - for (int i = 0; i < byte_len; i++) { - u8 hi = *p >> 4; - u8 lo = *p & 0xf; - s += (char)(hi + (hi < 10 ? 48 : 87)); - s += (char)(lo + (lo < 10 ? 48 : 87)); - p++; - } - return s; -} - -static -bool makeRoleMask32(const vector &look, - RoseProgram &program) { - if (look.back().offset >= look.front().offset + 32) { - return false; - } - s32 base_offset = verify_s32(look.front().offset); - array and_mask, cmp_mask; - and_mask.fill(0); - cmp_mask.fill(0); - u32 neg_mask = 0; - for (const auto &entry : look) { - u8 andmask_u8, cmpmask_u8, flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, - cmpmask_u8, flip)) { - return false; - } - u32 shift = entry.offset - base_offset; - assert(shift < 32); - and_mask[shift] = andmask_u8; - cmp_mask[shift] = cmpmask_u8; - if (flip) { - neg_mask |= 1 << shift; - } - } - - DEBUG_PRINTF("and_mask %s\n", - convertMaskstoString(and_mask.data(), 32).c_str()); - DEBUG_PRINTF("cmp_mask %s\n", - convertMaskstoString(cmp_mask.data(), 32).c_str()); - DEBUG_PRINTF("neg_mask %08x\n", neg_mask); - DEBUG_PRINTF("base_offset %d\n", base_offset); - - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(and_mask, cmp_mask, neg_mask, - base_offset, end_inst); - program.add_before_end(move(ri)); - return true; -} - -// Sorting by the size of every bucket. -// Used in map, cmpNibble>. -struct cmpNibble { - bool operator()(const u32 data1, const u32 data2) const{ - u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); - u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); - return std::tie(size1, data1) < std::tie(size2, data2); - } -}; - -// Insert all pairs of bucket and offset into buckets. -static really_inline -void getAllBuckets(const vector &look, - map, cmpNibble> &buckets, u64a &neg_mask) { - s32 base_offset = verify_s32(look.front().offset); - for (const auto &entry : look) { - CharReach cr = entry.reach; - // Flip heavy character classes to save buckets. - if (cr.count() > 128 ) { - cr.flip(); - } else { - neg_mask ^= 1ULL << (entry.offset - base_offset); - } - map lo2hi; - // We treat Ascii Table as a 16x16 grid. - // Push every row in cr into lo2hi and mark the row number. - for (size_t i = cr.find_first(); i != CharReach::npos;) { - u8 it_hi = i >> 4; - u16 low_encode = 0; - while (i != CharReach::npos && (i >> 4) == it_hi) { - low_encode |= 1 << (i & 0xf); - i = cr.find_next(i); - } - lo2hi[low_encode] |= 1 << it_hi; - } - for (const auto &it : lo2hi) { - u32 hi_lo = (it.second << 16) | it.first; - buckets[hi_lo].push_back(entry.offset); - } - } -} - -// Once we have a new bucket, we'll try to combine it with all old buckets. -static really_inline -void nibUpdate(map &nib, u32 hi_lo) { - u16 hi = hi_lo >> 16; - u16 lo = hi_lo & 0xffff; - for (const auto pairs : nib) { - u32 old = pairs.first; - if ((old >> 16) == hi || (old & 0xffff) == lo) { - if (!nib[old | hi_lo]) { - nib[old | hi_lo] = nib[old] | nib[hi_lo]; - } - } - } -} - -static really_inline -void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { - for (u8 index = 0; data > 0; data >>= 1, index++) { - if (data & 1) { - // 0 ~ 7 bucket in first 16 bytes, - // 8 ~ 15 bucket in second 16 bytes. - if (bit_index >= 8) { - mask[index + 16] |= 1 << (bit_index - 8); - } else { - mask[index] |= 1 << bit_index; - } - } - } -} - -static -bool getShuftiMasks(const vector &look, array &hi_mask, - array &lo_mask, u8 *bucket_select_hi, - u8 *bucket_select_lo, u64a &neg_mask, - u8 &bit_idx, size_t len) { - map nib; // map every bucket to its bucket number. - map, cmpNibble> bucket2offsets; - s32 base_offset = look.front().offset; - - bit_idx = 0; - neg_mask = ~0ULL; - - getAllBuckets(look, bucket2offsets, neg_mask); - - for (const auto &it : bucket2offsets) { - u32 hi_lo = it.first; - // New bucket. - if (!nib[hi_lo]) { - if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { - return false; - } - nib[hi_lo] = 1 << bit_idx; - - nibUpdate(nib, hi_lo); - nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); - nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); - bit_idx++; - } - - DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); - - // Update bucket_select_mask. - u8 nib_hi = nib[hi_lo] >> 8; - u8 nib_lo = nib[hi_lo] & 0xff; - for (const auto offset : it.second) { - bucket_select_hi[offset - base_offset] |= nib_hi; - bucket_select_lo[offset - base_offset] |= nib_lo; - } - } - return true; -} - -static -unique_ptr -makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, - const array &hi_mask, const array &lo_mask, - const array &bucket_select_mask, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 16 || bucket_idx > 8) { - return nullptr; - } - array nib_mask; - array bucket_select_mask_16; - copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); - copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); - copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, - bucket_select_mask_16.begin()); - return make_unique - (nib_mask, bucket_select_mask_16, - neg_mask & 0xffff, base_offset, end_inst); -} - -static -unique_ptr -makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, - const array &hi_mask, const array &lo_mask, - const array &bucket_select_mask, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 32 || bucket_idx > 8) { - return nullptr; - } - - array hi_mask_16; - array lo_mask_16; - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); - return make_unique - (hi_mask_16, lo_mask_16, bucket_select_mask, - neg_mask, base_offset, end_inst); -} - -static -unique_ptr -makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, - const array &hi_mask, const array &lo_mask, - const array &bucket_select_mask_lo, - const array &bucket_select_mask_hi, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 16 || bucket_idx > 16) { - return nullptr; - } - - array bucket_select_mask_32; - copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, - bucket_select_mask_32.begin()); - copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, - bucket_select_mask_32.begin() + 16); - return make_unique - (hi_mask, lo_mask, bucket_select_mask_32, - neg_mask & 0xffff, base_offset, end_inst); -} -static -unique_ptr -makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, - const array &hi_mask, const array &lo_mask, - const array &bucket_select_mask_lo, - const array &bucket_select_mask_hi, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 32 || bucket_idx > 16) { - return nullptr; - } - - return make_unique - (hi_mask, lo_mask, bucket_select_mask_hi, - bucket_select_mask_lo, neg_mask, base_offset, end_inst); -} - -static -bool makeRoleShufti(const vector &look, - RoseProgram &program) { - - s32 base_offset = verify_s32(look.front().offset); - if (look.back().offset >= base_offset + 32) { - return false; - } - - u8 bucket_idx = 0; // number of buckets - u64a neg_mask_64; - array hi_mask; - array lo_mask; - array bucket_select_hi; - array bucket_select_lo; - hi_mask.fill(0); - lo_mask.fill(0); - bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. - bucket_select_lo.fill(0); - - if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), - bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) { - return false; - } - u32 neg_mask = (u32)neg_mask_64; - - DEBUG_PRINTF("hi_mask %s\n", - convertMaskstoString(hi_mask.data(), 32).c_str()); - DEBUG_PRINTF("lo_mask %s\n", - convertMaskstoString(lo_mask.data(), 32).c_str()); - DEBUG_PRINTF("bucket_select_hi %s\n", - convertMaskstoString(bucket_select_hi.data(), 32).c_str()); - DEBUG_PRINTF("bucket_select_lo %s\n", - convertMaskstoString(bucket_select_lo.data(), 32).c_str()); - - const auto *end_inst = program.end_instruction(); - s32 offset_range = look.back().offset - base_offset + 1; - - auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, neg_mask, base_offset, - end_inst); - if (!ri) { - ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, neg_mask, base_offset, - end_inst); - } - if (!ri) { - ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, bucket_select_hi, - neg_mask, base_offset, end_inst); - } - if (!ri) { - ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, bucket_select_hi, - neg_mask, base_offset, end_inst); - } - assert(ri); - program.add_before_end(move(ri)); - - return true; -} - -/** - * Builds a lookaround instruction, or an appropriate specialization if one is - * available. - */ -static -void makeLookaroundInstruction(build_context &bc, const vector &look, - RoseProgram &program) { - assert(!look.empty()); - - if (makeRoleByte(look, program)) { - return; - } - - if (look.size() == 1) { - s8 offset = look.begin()->offset; - u32 look_idx, reach_idx; - vector> lookaround; - lookaround.emplace_back(look); - addLookaround(bc, lookaround, look_idx, reach_idx); - // We don't need look_idx here. - auto ri = make_unique(offset, reach_idx, - program.end_instruction()); - program.add_before_end(move(ri)); - return; - } - - if (makeRoleMask(look, program)) { - return; - } - - if (makeRoleMask32(look, program)) { - return; - } - - if (makeRoleShufti(look, program)) { - return; - } - - u32 look_idx, reach_idx; - vector> lookaround; - lookaround.emplace_back(look); - addLookaround(bc, lookaround, look_idx, reach_idx); - u32 look_count = verify_u32(look.size()); - - auto ri = make_unique(look_idx, reach_idx, - look_count, - program.end_instruction()); - program.add_before_end(move(ri)); -} - -#if defined(DEBUG) || defined(DUMP_SUPPORT) -static UNUSED -string dumpMultiLook(const vector &looks) { - ostringstream oss; - for (auto it = looks.begin(); it != looks.end(); ++it) { - if (it != looks.begin()) { - oss << ", "; - } - oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}"; - } - return oss.str(); -} -#endif - -static -bool makeRoleMultipathShufti(const vector> &multi_look, - RoseProgram &program) { - if (multi_look.empty()) { - return false; - } - - // find the base offset - assert(!multi_look[0].empty()); - s32 base_offset = multi_look[0].front().offset; - s32 last_start = base_offset; - s32 end_offset = multi_look[0].back().offset; - size_t multi_len = 0; - - for (const auto &look : multi_look) { - assert(look.size() > 0); - multi_len += look.size(); - - LIMIT_TO_AT_MOST(&base_offset, look.front().offset); - ENSURE_AT_LEAST(&last_start, look.front().offset); - ENSURE_AT_LEAST(&end_offset, look.back().offset); - } - - assert(last_start < 0); - - if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { - return false; - } - - if (multi_len <= 16) { - multi_len = 16; - } else if (multi_len <= 32) { - multi_len = 32; - } else if (multi_len <= 64) { - multi_len = 64; - } else { - DEBUG_PRINTF("too long for multi-path\n"); - return false; - } - - vector linear_look; - array data_select_mask; - data_select_mask.fill(0); - u64a hi_bits_mask = 0; - u64a lo_bits_mask = 0; - - for (const auto &look : multi_look) { - assert(linear_look.size() < 64); - lo_bits_mask |= 1LLU << linear_look.size(); - for (const auto &entry : look) { - assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); - data_select_mask[linear_look.size()] = - verify_u8(entry.offset - base_offset); - linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); - } - hi_bits_mask |= 1LLU << (linear_look.size() - 1); - } - - u8 bit_index = 0; // number of buckets - u64a neg_mask; - array hi_mask; - array lo_mask; - array bucket_select_hi; - array bucket_select_lo; - hi_mask.fill(0); - lo_mask.fill(0); - bucket_select_hi.fill(0); - bucket_select_lo.fill(0); - - if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), - bucket_select_lo.data(), neg_mask, bit_index, - multi_len)) { - return false; - } - - DEBUG_PRINTF("hi_mask %s\n", - convertMaskstoString(hi_mask.data(), 16).c_str()); - DEBUG_PRINTF("lo_mask %s\n", - convertMaskstoString(lo_mask.data(), 16).c_str()); - DEBUG_PRINTF("bucket_select_hi %s\n", - convertMaskstoString(bucket_select_hi.data(), 64).c_str()); - DEBUG_PRINTF("bucket_select_lo %s\n", - convertMaskstoString(bucket_select_lo.data(), 64).c_str()); - DEBUG_PRINTF("data_select_mask %s\n", - convertMaskstoString(data_select_mask.data(), 64).c_str()); - DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask); - DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask); - DEBUG_PRINTF("neg_mask %llx\n", neg_mask); - DEBUG_PRINTF("base_offset %d\n", base_offset); - DEBUG_PRINTF("last_start %d\n", last_start); - - // Since we don't have 16x16 now, just call 32x16 instead. - if (bit_index > 8) { - assert(multi_len <= 32); - multi_len = 32; - } - - const auto *end_inst = program.end_instruction(); - assert(multi_len == 16 || multi_len == 32 || multi_len == 64); - if (multi_len == 16) { - neg_mask &= 0xffff; - assert(!(hi_bits_mask & ~0xffffULL)); - assert(!(lo_bits_mask & ~0xffffULL)); - assert(bit_index <=8); - array nib_mask; - copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); - copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); - - auto ri = make_unique - (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start, end_inst); - program.add_before_end(move(ri)); - } else if (multi_len == 32) { - neg_mask &= 0xffffffff; - assert(!(hi_bits_mask & ~0xffffffffULL)); - assert(!(lo_bits_mask & ~0xffffffffULL)); - if (bit_index <= 8) { - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_lo, data_select_mask, - hi_bits_mask, lo_bits_mask, neg_mask, base_offset, - last_start, end_inst); - program.add_before_end(move(ri)); - } else { - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, - data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, - base_offset, last_start, end_inst); - program.add_before_end(move(ri)); - } - } else { - auto ri = make_unique - (hi_mask, lo_mask, bucket_select_lo, data_select_mask, - hi_bits_mask, lo_bits_mask, neg_mask, base_offset, - last_start, end_inst); - program.add_before_end(move(ri)); - } - return true; -} - -static -void makeRoleMultipathLookaround(build_context &bc, - const vector> &multi_look, - RoseProgram &program) { - assert(!multi_look.empty()); - assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); - vector> ordered_look; - set look_offset; - - assert(!multi_look[0].empty()); - s32 last_start = multi_look[0][0].offset; - - // build offset table. - for (const auto &look : multi_look) { - assert(look.size() > 0); - last_start = max(last_start, (s32)look.begin()->offset); - - for (const auto &t : look) { - look_offset.insert(t.offset); - } - } - - array start_mask; - if (multi_look.size() < MAX_LOOKAROUND_PATHS) { - start_mask.fill((1 << multi_look.size()) - 1); - } else { - start_mask.fill(0xff); - } - - u32 path_idx = 0; - for (const auto &look : multi_look) { - for (const auto &t : look) { - assert(t.offset >= (int)*look_offset.begin()); - size_t update_offset = t.offset - *look_offset.begin() + 1; - if (update_offset < start_mask.size()) { - start_mask[update_offset] &= ~(1 << path_idx); - } - } - path_idx++; - } - - for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { - start_mask[i] &= start_mask[i - 1]; - DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]); - } - - assert(look_offset.size() <= MULTIPATH_MAX_LEN); - - assert(last_start < 0); - - for (const auto &offset : look_offset) { - vector multi_entry; - multi_entry.resize(MAX_LOOKAROUND_PATHS); - - for (size_t i = 0; i < multi_look.size(); i++) { - for (const auto &t : multi_look[i]) { - if (t.offset == offset) { - multi_entry[i] = t; - } - } - } - ordered_look.emplace_back(multi_entry); - } - - u32 look_idx, reach_idx; - addLookaround(bc, ordered_look, look_idx, reach_idx); - u32 look_count = verify_u32(ordered_look.size()); - - auto ri = make_unique(look_idx, reach_idx, - look_count, last_start, - start_mask, - program.end_instruction()); - program.add_before_end(move(ri)); -} - -static -void makeRoleLookaround(const RoseBuildImpl &build, build_context &bc, - RoseVertex v, RoseProgram &program) { - if (!build.cc.grey.roseLookaroundMasks) { - return; - } - - vector> looks; - - // Lookaround from leftfix (mandatory). - if (contains(bc.leftfix_info, v) && bc.leftfix_info.at(v).has_lookaround) { - DEBUG_PRINTF("using leftfix lookaround\n"); - looks = bc.leftfix_info.at(v).lookaround; - } - - // We may be able to find more lookaround info (advisory) and merge it - // in. - if (looks.size() <= 1) { - vector look; - vector look_more; - if (!looks.empty()) { - look = move(looks.front()); - } - findLookaroundMasks(build, v, look_more); - mergeLookaround(look, look_more); - if (!look.empty()) { - makeLookaroundInstruction(bc, look, program); - } - return; - } - - if (!makeRoleMultipathShufti(looks, program)) { - assert(looks.size() <= 8); - makeRoleMultipathLookaround(bc, looks, program); - } -} - -static -void makeRoleCheckLeftfix(const RoseBuildImpl &build, - const map &leftfix_info, - RoseVertex v, RoseProgram &program) { - auto it = leftfix_info.find(v); - if (it == end(leftfix_info)) { - return; - } - const left_build_info &lni = it->second; - if (lni.has_lookaround) { - return; // Leftfix completely implemented by lookaround. - } - - assert(!build.cc.streaming || - build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - - bool is_prefix = build.isRootSuccessor(v); - const auto *end_inst = program.end_instruction(); - - unique_ptr ri; - if (is_prefix) { - ri = make_unique(lni.queue, build.g[v].left.lag, - build.g[v].left.leftfix_report, - end_inst); - } else { - ri = make_unique(lni.queue, build.g[v].left.lag, - build.g[v].left.leftfix_report, - end_inst); - } - program.add_before_end(move(ri)); -} - -static -void makeAnchoredLiteralDelay(const RoseBuildImpl &build, - const ProgramBuild &prog_build, u32 lit_id, - RoseProgram &program) { - // Only relevant for literals in the anchored table. - const rose_literal_id &lit = build.literals.right.at(lit_id); - if (lit.table != ROSE_ANCHORED) { - return; - } - - // If this literal match cannot occur after floatingMinLiteralMatchOffset, - // we do not need this check. - bool all_too_early = true; - rose_group groups = 0; - - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - for (RoseVertex v : lit_vertices) { - if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) { - all_too_early = false; - } - groups |= build.g[v].groups; - } - - if (all_too_early) { - return; - } - - assert(contains(prog_build.anchored_programs, lit_id)); - u32 anch_id = prog_build.anchored_programs.at(lit_id); - - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(groups, anch_id, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeDedupe(const RoseBuildImpl &build, const Report &report, - RoseProgram &program) { - const auto *end_inst = program.end_instruction(); - auto ri = - make_unique(report.quashSom, build.rm.getDkey(report), - report.offsetAdjust, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeDedupeSom(const RoseBuildImpl &build, const Report &report, - RoseProgram &program) { - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(report.quashSom, - build.rm.getDkey(report), - report.offsetAdjust, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeCatchup(const RoseBuildImpl &build, bool needs_catchup, - const flat_set &reports, RoseProgram &program) { - if (!needs_catchup) { - return; - } - - // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run - // before reports are triggered. - - auto report_needs_catchup = [&](const ReportID &id) { - const Report &report = build.rm.getReport(id); - return report.type != INTERNAL_ROSE_CHAIN; - }; - - if (!any_of(begin(reports), end(reports), report_needs_catchup)) { - DEBUG_PRINTF("none of the given reports needs catchup\n"); - return; - } - - program.add_before_end(make_unique()); -} - -static -void makeCatchupMpv(const RoseBuildImpl &build, bool needs_mpv_catchup, - ReportID id, RoseProgram &program) { - if (!needs_mpv_catchup) { - return; - } - - const Report &report = build.rm.getReport(id); - if (report.type == INTERNAL_ROSE_CHAIN) { - return; - } - - program.add_before_end(make_unique()); -} - -static -void writeSomOperation(const Report &report, som_operation *op) { - assert(op); - - memset(op, 0, sizeof(*op)); - - switch (report.type) { - case EXTERNAL_CALLBACK_SOM_REL: - op->type = SOM_EXTERNAL_CALLBACK_REL; - break; - case INTERNAL_SOM_LOC_SET: - op->type = SOM_INTERNAL_LOC_SET; - break; - case INTERNAL_SOM_LOC_SET_IF_UNSET: - op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; - break; - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_COPY: - op->type = SOM_INTERNAL_LOC_COPY; - break; - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; - break; - case EXTERNAL_CALLBACK_SOM_STORED: - op->type = SOM_EXTERNAL_CALLBACK_STORED; - break; - case EXTERNAL_CALLBACK_SOM_ABS: - op->type = SOM_EXTERNAL_CALLBACK_ABS; - break; - case EXTERNAL_CALLBACK_SOM_REV_NFA: - op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; - break; - case INTERNAL_SOM_LOC_SET_FROM: - op->type = SOM_INTERNAL_LOC_SET_FROM; - break; - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; - break; - default: - // This report doesn't correspond to a SOM operation. - assert(0); - throw CompileError("Unable to generate bytecode."); - } - - op->onmatch = report.onmatch; - - switch (report.type) { - case EXTERNAL_CALLBACK_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - op->aux.revNfaIndex = report.revNfaIndex; - break; - default: - op->aux.somDistance = report.somDistance; - break; - } -} - -static -void makeReport(const RoseBuildImpl &build, const ReportID id, - const bool has_som, RoseProgram &program) { - assert(id < build.rm.numReports()); - const Report &report = build.rm.getReport(id); - - RoseProgram report_block; - const RoseInstruction *end_inst = report_block.end_instruction(); - - // Handle min/max offset checks. - if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { - auto ri = make_unique(report.minOffset, - report.maxOffset, end_inst); - report_block.add_before_end(move(ri)); - } - - // If this report has an exhaustion key, we can check it in the program - // rather than waiting until we're in the callback adaptor. - if (report.ekey != INVALID_EKEY) { - auto ri = make_unique(report.ekey, end_inst); - report_block.add_before_end(move(ri)); - } - - // External SOM reports that aren't passthrough need their SOM value - // calculated. - if (isExternalSomReport(report) && - report.type != EXTERNAL_CALLBACK_SOM_PASS) { - auto ri = make_unique(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } - - // Min length constraint. - if (report.minLength > 0) { - assert(build.hasSom); - auto ri = make_unique( - report.offsetAdjust, report.minLength, end_inst); - report_block.add_before_end(move(ri)); - } - - if (report.quashSom) { - report_block.add_before_end(make_unique()); - } - - switch (report.type) { - case EXTERNAL_CALLBACK: - if (!has_som) { - // Dedupe is only necessary if this report has a dkey, or if there - // are SOM reports to catch up. - bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; - if (report.ekey == INVALID_EKEY) { - if (needs_dedupe) { - report_block.add_before_end( - make_unique( - report.quashSom, build.rm.getDkey(report), - report.onmatch, report.offsetAdjust, end_inst)); - } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } - } else { - if (needs_dedupe) { - makeDedupe(build, report, report_block); - } - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - } else { // has_som - makeDedupeSom(build, report, report_block); - if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } else { - report_block.add_before_end( - make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - } - break; - case INTERNAL_SOM_LOC_SET: - case INTERNAL_SOM_LOC_SET_IF_UNSET: - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - case INTERNAL_SOM_LOC_COPY: - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - case INTERNAL_SOM_LOC_SET_FROM: - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - if (has_som) { - auto ri = make_unique(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } else { - auto ri = make_unique(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } - break; - case INTERNAL_ROSE_CHAIN: { - report_block.add_before_end(make_unique( - report.onmatch, report.topSquashDistance)); - break; - } - case EXTERNAL_CALLBACK_SOM_REL: - case EXTERNAL_CALLBACK_SOM_STORED: - case EXTERNAL_CALLBACK_SOM_ABS: - case EXTERNAL_CALLBACK_SOM_REV_NFA: - makeDedupeSom(build, report, report_block); - if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - break; - case EXTERNAL_CALLBACK_SOM_PASS: - makeDedupeSom(build, report, report_block); - if (report.ekey == INVALID_EKEY) { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust)); - } else { - report_block.add_before_end(make_unique( - report.onmatch, report.offsetAdjust, report.ekey)); - } - break; - - default: - assert(0); - throw CompileError("Unable to generate bytecode."); - } - - assert(!report_block.empty()); - program.add_block(move(report_block)); -} - -static -void makeRoleReports(const RoseBuildImpl &build, const build_context &bc, - RoseVertex v, RoseProgram &program) { - const auto &g = build.g; - - /* we are a suffaig - need to update role to provide som to the - * suffix. */ - bool has_som = false; - if (g[v].left.tracksSom()) { - assert(contains(bc.leftfix_info, v)); - const left_build_info &lni = bc.leftfix_info.at(v); - program.add_before_end( - make_unique(lni.queue, g[v].left.lag)); - has_som = true; - } else if (g[v].som_adjust) { - program.add_before_end( - make_unique(g[v].som_adjust)); - has_som = true; - } - - const auto &reports = g[v].reports; - makeCatchup(build, bc.needs_catchup, reports, program); - - RoseProgram report_block; - for (ReportID id : reports) { - makeReport(build, id, has_som, report_block); - } - program.add_before_end(move(report_block)); -} - -static -void makeRoleSuffix(const RoseBuildImpl &build, const build_context &bc, - RoseVertex v, RoseProgram &program) { - const auto &g = build.g; - if (!g[v].suffix) { - return; - } - assert(contains(bc.suffixes, g[v].suffix)); - u32 qi = bc.suffixes.at(g[v].suffix); - assert(contains(bc.engineOffsets, qi)); - const NFA *nfa = get_nfa_from_blob(bc, qi); - u32 suffixEvent; - if (isContainerType(nfa->type)) { - auto tamaProto = g[v].suffix.tamarama.get(); - assert(tamaProto); - u32 top = (u32)MQE_TOP_FIRST + - tamaProto->top_remap.at(make_pair(g[v].index, - g[v].suffix.top)); - assert(top < MQE_INVALID); - suffixEvent = top; - } else if (isMultiTopType(nfa->type)) { - assert(!g[v].suffix.haig); - u32 top = (u32)MQE_TOP_FIRST + g[v].suffix.top; - assert(top < MQE_INVALID); - suffixEvent = top; - } else { - // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP - // event. - assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); - suffixEvent = MQE_TOP; - } - program.add_before_end( - make_unique(qi, suffixEvent)); -} - -static -void makeRoleGroups(const RoseBuildImpl &build, ProgramBuild &prog_build, - RoseVertex v, RoseProgram &program) { - const auto &g = build.g; - rose_group groups = g[v].groups; - if (!groups) { - return; - } - - // The set of "already on" groups as we process this vertex is the - // intersection of the groups set by our predecessors. - assert(in_degree(v, g) > 0); - rose_group already_on = ~rose_group{0}; - for (const auto &u : inv_adjacent_vertices_range(v, g)) { - already_on &= prog_build.vertex_group_map.at(u); - } - - DEBUG_PRINTF("already_on=0x%llx\n", already_on); - DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups); - DEBUG_PRINTF("groups=0x%llx\n", groups); - - already_on &= ~prog_build.squashable_groups; - DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); - - // We don't *have* to mask off the groups that we know are already on, but - // this will make bugs more apparent. - groups &= ~already_on; - - if (!groups) { - DEBUG_PRINTF("no new groups to set, skipping\n"); - return; - } - - program.add_before_end(make_unique(groups)); -} - -static -void makeRoleInfixTriggers(const RoseBuildImpl &build, const build_context &bc, - RoseVertex u, RoseProgram &program) { - const auto &g = build.g; - - vector infix_program; - - for (const auto &e : out_edges_range(u, g)) { - RoseVertex v = target(e, g); - if (!g[v].left) { - continue; - } - - assert(contains(bc.leftfix_info, v)); - const left_build_info &lbi = bc.leftfix_info.at(v); - if (lbi.has_lookaround) { - continue; - } - - const NFA *nfa = get_nfa_from_blob(bc, lbi.queue); - - // DFAs have no TOP_N support, so they get a classic MQE_TOP event. - u32 top; - if (isContainerType(nfa->type)) { - auto tamaProto = g[v].left.tamarama.get(); - assert(tamaProto); - top = MQE_TOP_FIRST + tamaProto->top_remap.at( - make_pair(g[v].index, g[e].rose_top)); - assert(top < MQE_INVALID); - } else if (!isMultiTopType(nfa->type)) { - assert(num_tops(g[v].left) == 1); - top = MQE_TOP; - } else { - top = MQE_TOP_FIRST + g[e].rose_top; - assert(top < MQE_INVALID); - } - - infix_program.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); - } - - if (infix_program.empty()) { - return; - } - - // Order, de-dupe and add instructions to the end of program. - sort_and_unique(infix_program, [](const RoseInstrTriggerInfix &a, - const RoseInstrTriggerInfix &b) { - return tie(a.cancel, a.queue, a.event) < - tie(b.cancel, b.queue, b.event); - }); - for (const auto &ri : infix_program) { - program.add_before_end(make_unique(ri)); - } -} - -static -void makeRoleSetState(const unordered_map &roleStateIndices, - RoseVertex v, RoseProgram &program) { - // We only need this instruction if a state index has been assigned to this - // vertex. - auto it = roleStateIndices.find(v); - if (it == end(roleStateIndices)) { - return; - } - program.add_before_end(make_unique(it->second)); -} - -static -void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, - const RoseEdge &e, RoseProgram &program) { - const RoseGraph &g = build.g; - const RoseVertex u = source(e, g); - - // We know that we can trust the anchored table (DFA) to always deliver us - // literals at the correct offset. - if (build.isAnchored(v)) { - DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); - return; - } - - // Use the minimum literal length. - u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); - - u64a min_bound = g[e].minBound + lit_length; - u64a max_bound = g[e].maxBound == ROSE_BOUND_INF - ? ROSE_BOUND_INF - : g[e].maxBound + lit_length; - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - assert(g[u].fixedOffset()); - // Make offsets absolute. - min_bound += g[u].max_offset; - if (max_bound != ROSE_BOUND_INF) { - max_bound += g[u].max_offset; - } - } - - assert(max_bound <= ROSE_BOUND_INF); - assert(min_bound <= max_bound); - - // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET - // (max value of a u64a) to represent ROSE_BOUND_INF. - if (max_bound == ROSE_BOUND_INF) { - max_bound = MAX_OFFSET; - } - - // This instruction should be doing _something_ -- bounds should be tighter - // than just {length, inf}. - assert(min_bound > lit_length || max_bound < MAX_OFFSET); - - const auto *end_inst = program.end_instruction(); - program.add_before_end( - make_unique(min_bound, max_bound, end_inst)); -} - -static -void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, - RoseProgram &program) { - u32 handled_key; - if (contains(prog_build.handledKeys, v)) { - handled_key = prog_build.handledKeys.at(v); - } else { - handled_key = verify_u32(prog_build.handledKeys.size()); - prog_build.handledKeys.emplace(v, handled_key); - } - - const auto *end_inst = program.end_instruction(); - auto ri = make_unique(handled_key, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeRoleEagerEodReports(const RoseBuildImpl &build, build_context &bc, - RoseVertex v, RoseProgram &program) { - RoseProgram eod_program; - - for (const auto &e : out_edges_range(v, build.g)) { - if (canEagerlyReportAtEod(build, e)) { - RoseProgram block; - makeRoleReports(build, bc, target(e, build.g), block); - eod_program.add_block(move(block)); - } - } - - if (eod_program.empty()) { - return; - } - - if (!onlyAtEod(build, v)) { - // The rest of our program wasn't EOD anchored, so we need to guard - // these reports with a check. - const auto *end_inst = eod_program.end_instruction(); - eod_program.insert(begin(eod_program), - make_unique(end_inst)); - } - - program.add_before_end(move(eod_program)); -} - -/* Makes a program for a role/vertex given a specfic pred/in_edge. */ -static -RoseProgram makeRoleProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, const RoseEdge &e) { - const RoseGraph &g = build.g; - auto v = target(e, g); - - RoseProgram program; - - // First, add program instructions that enforce preconditions without - // effects. - - if (onlyAtEod(build, v)) { - DEBUG_PRINTF("only at eod\n"); - const auto *end_inst = program.end_instruction(); - program.add_before_end(make_unique(end_inst)); - } - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - makeRoleCheckBounds(build, v, e, program); - } - - // This role program may be triggered by different predecessors, with - // different offset bounds. We must ensure we put this check/set operation - // after the bounds check to deal with this case. - if (in_degree(v, g) > 1) { - assert(!build.isRootSuccessor(v)); - makeRoleCheckNotHandled(prog_build, v, program); - } - - makeRoleLookaround(build, bc, v, program); - makeRoleCheckLeftfix(build, bc.leftfix_info, v, program); - - // Next, we can add program instructions that have effects. This must be - // done as a series of blocks, as some of them (like reports) are - // escapable. - - RoseProgram effects_block; - - RoseProgram reports_block; - makeRoleReports(build, bc, v, reports_block); - effects_block.add_block(move(reports_block)); - - RoseProgram infix_block; - makeRoleInfixTriggers(build, bc, v, infix_block); - effects_block.add_block(move(infix_block)); - - // Note: SET_GROUPS instruction must be after infix triggers, as an infix - // going dead may switch off groups. - RoseProgram groups_block; - makeRoleGroups(build, prog_build, v, groups_block); - effects_block.add_block(move(groups_block)); - - RoseProgram suffix_block; - makeRoleSuffix(build, bc, v, suffix_block); - effects_block.add_block(move(suffix_block)); - - RoseProgram state_block; - makeRoleSetState(bc.roleStateIndices, v, state_block); - effects_block.add_block(move(state_block)); - - // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if - // the program doesn't have one already). - RoseProgram eod_block; - makeRoleEagerEodReports(build, bc, v, eod_block); - effects_block.add_block(move(eod_block)); - - /* a 'ghost role' may do nothing if we know that its groups are already set - * - in this case we can avoid producing a program at all. */ - if (effects_block.empty()) { - return {}; - } - - program.add_before_end(move(effects_block)); - return program; -} - -static -u32 writeBoundaryProgram(const RoseBuildImpl &build, build_context &bc, - const set &reports) { - if (reports.empty()) { - return 0; - } - - // Note: no CATCHUP instruction is necessary in the boundary case, as we - // should always be caught up (and may not even have the resources in - // scratch to support it). - - const bool has_som = false; - RoseProgram program; - for (const auto &id : reports) { - makeReport(build, id, has_som, program); - } - return writeProgram(bc, move(program)); -} - static void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc, const BoundaryReports &boundary, @@ -4284,12 +2589,14 @@ void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc, DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); - out.reportEodOffset = - writeBoundaryProgram(build, bc, boundary.report_at_eod); - out.reportZeroOffset = - writeBoundaryProgram(build, bc, boundary.report_at_0); - out.reportZeroEodOffset = - writeBoundaryProgram(build, bc, dboundary.report_at_0_eod_full); + auto eod_prog = makeBoundaryProgram(build, boundary.report_at_eod); + out.reportEodOffset = writeProgram(bc, move(eod_prog)); + + auto zero_prog = makeBoundaryProgram(build, boundary.report_at_0); + out.reportZeroOffset = writeProgram(bc, move(zero_prog)); + + auto zeod_prog = makeBoundaryProgram(build, dboundary.report_at_0_eod_full); + out.reportZeroEodOffset = writeProgram(bc, move(zeod_prog)); } static @@ -4341,10 +2648,9 @@ bool hasUsefulStops(const left_build_info &build) { static void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, - const set &eager_queues, - u32 leftfixBeginQueue, u32 leftfixCount, - vector &leftTable, u32 *laggedRoseCount, - size_t *history) { + const set &eager_queues, u32 leftfixBeginQueue, + u32 leftfixCount, vector &leftTable, + u32 *laggedRoseCount, size_t *history) { const RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; @@ -4430,445 +2736,6 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, *laggedRoseCount = lagIndex; } -static -void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, - RoseProgram &program) { - // Prepend an instruction to check the pred state is on. - const auto *end_inst = pred_block.end_instruction(); - assert(!pred_block.empty()); - pred_block.insert(begin(pred_block), - make_unique(pred_state, end_inst)); - program.add_block(move(pred_block)); -} - -static -void addPredBlocksAny(map &pred_blocks, u32 num_states, - RoseProgram &program) { - RoseProgram sparse_program; - - vector keys; - for (const u32 &key : pred_blocks | map_keys) { - keys.push_back(key); - } - - const RoseInstruction *end_inst = sparse_program.end_instruction(); - auto ri = make_unique(num_states, keys, end_inst); - sparse_program.add_before_end(move(ri)); - - RoseProgram &block = pred_blocks.begin()->second; - assert(!block.empty()); - - /* we no longer need the check handled instruction as all the pred-role - * blocks are being collapsed together */ - stripCheckHandledInstruction(block); - - sparse_program.add_before_end(move(block)); - program.add_block(move(sparse_program)); -} - -static -void addPredBlocksMulti(map &pred_blocks, - u32 num_states, RoseProgram &program) { - assert(!pred_blocks.empty()); - - RoseProgram sparse_program; - const RoseInstruction *end_inst = sparse_program.end_instruction(); - vector> jump_table; - - // BEGIN instruction. - auto ri_begin = make_unique(num_states, end_inst); - RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); - sparse_program.add_before_end(move(ri_begin)); - - // NEXT instructions, one per pred program. - u32 prev_key = pred_blocks.begin()->first; - for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { - auto ri = make_unique(prev_key, begin_inst, - end_inst); - sparse_program.add_before_end(move(ri)); - prev_key = it->first; - } - - // Splice in each pred program after its BEGIN/NEXT. - auto out_it = begin(sparse_program); - for (auto &m : pred_blocks) { - u32 key = m.first; - RoseProgram &flat_prog = m.second; - assert(!flat_prog.empty()); - const size_t block_len = flat_prog.size() - 1; // without INSTR_END. - - assert(dynamic_cast(out_it->get()) || - dynamic_cast(out_it->get())); - out_it = sparse_program.insert(++out_it, move(flat_prog)); - - // Jump table target for this key is the beginning of the block we just - // spliced in. - jump_table.emplace_back(key, out_it->get()); - - assert(distance(begin(sparse_program), out_it) + block_len <= - sparse_program.size()); - advance(out_it, block_len); - } - - // Write the jump table back into the SPARSE_ITER_BEGIN instruction. - begin_inst->jump_table = move(jump_table); - - program.add_block(move(sparse_program)); -} - -static -void addPredBlocks(map &pred_blocks, u32 num_states, - RoseProgram &program) { - const size_t num_preds = pred_blocks.size(); - if (num_preds == 0) { - return; - } - - if (num_preds == 1) { - const auto head = pred_blocks.begin(); - addPredBlockSingle(head->first, head->second, program); - return; - } - - // First, see if all our blocks are equivalent, in which case we can - // collapse them down into one. - const auto &blocks = pred_blocks | map_values; - if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { - return RoseProgramEquivalence()(*begin(blocks), block); - })) { - DEBUG_PRINTF("all blocks equiv\n"); - addPredBlocksAny(pred_blocks, num_states, program); - return; - } - - addPredBlocksMulti(pred_blocks, num_states, program); -} - -static -void makePushDelayedInstructions(const RoseBuildImpl &build, - ProgramBuild &prog_build, u32 lit_id, - RoseProgram &program) { - const auto &info = build.literal_info.at(lit_id); - - vector delay_instructions; - - for (const auto &delayed_lit_id : info.delayed_ids) { - DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); - assert(contains(prog_build.delay_programs, delayed_lit_id)); - u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); - const auto &delay_lit = build.literals.right.at(delayed_lit_id); - delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); - } - - sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, - const RoseInstrPushDelayed &b) { - return tie(a.delay, a.index) < tie(b.delay, b.index); - }); - - for (const auto &ri : delay_instructions) { - program.add_before_end(make_unique(ri)); - } -} - -static -void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, - RoseProgram &program) { - const auto &info = build.literal_info.at(lit_id); - rose_group groups = info.group_mask; - if (!groups) { - return; - } - program.add_before_end(make_unique(groups)); -} - -static -void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, - u32 lit_id, RoseProgram &program) { - const auto &info = build.literal_info.at(lit_id); - if (!info.requires_benefits) { - return; - } - - vector look; - - const ue2_literal &s = build.literals.right.at(lit_id).s; - DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, - dumpString(s).c_str()); - assert(s.length() <= MAX_MASK2_WIDTH); - s32 i = 0 - s.length(); - for (const auto &e : s) { - if (!e.nocase) { - look.emplace_back(verify_s8(i), e); - } - i++; - } - - assert(!look.empty()); - makeLookaroundInstruction(bc, look, program); -} - -static -void makeGroupSquashInstruction(const RoseBuildImpl &build, - u32 lit_id, - RoseProgram &program) { - const auto &info = build.literal_info.at(lit_id); - if (!info.squash_group) { - return; - } - - rose_group groups = info.group_mask; - if (!groups) { - return; - } - - DEBUG_PRINTF("squashes 0x%llx\n", groups); - program.add_before_end( - make_unique(~groups)); // Note negated. -} - -static -u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - assert(!lit_vertices.empty()); - - u32 max_offset = 0; - for (const auto &v : lit_vertices) { - max_offset = max(max_offset, build.g[v].max_offset); - } - - return max_offset; -} - -static -u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - assert(!lit_vertices.empty()); - - u32 min_offset = UINT32_MAX; - for (const auto &v : lit_vertices) { - min_offset = min(min_offset, build.g[v].min_offset); - } - - return min_offset; -} - -static -void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, - const vector &lit_edges, - u32 floatingMinLiteralMatchOffset, - RoseProgram &program) { - if (lit_edges.empty()) { - return; - } - - if (floatingMinLiteralMatchOffset == 0) { - return; - } - - RoseVertex v = target(lit_edges.front(), build.g); - if (!build.isFloating(v)) { - return; - } - - const auto &lit = build.literals.right.at(lit_id); - size_t min_len = lit.elength(); - u32 min_offset = findMinOffset(build, lit_id); - DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len, - min_offset, floatingMinLiteralMatchOffset); - - // If we can't match before the min offset, we don't need the check. - if (min_len >= floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("no need for check, min is %u\n", - floatingMinLiteralMatchOffset); - return; - } - - assert(min_offset >= floatingMinLiteralMatchOffset); - assert(min_offset < UINT32_MAX); - - DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); - const auto *end_inst = program.end_instruction(); - program.add_before_end( - make_unique(min_offset, end_inst)); -} - -static -void makeCheckLiteralInstruction(const RoseBuildImpl &build, u32 lit_id, - size_t longLitLengthThreshold, - RoseProgram &program) { - assert(longLitLengthThreshold > 0); - - DEBUG_PRINTF("lit_id=%u, long lit threshold %zu\n", lit_id, - longLitLengthThreshold); - - if (build.isDelayed(lit_id)) { - return; - } - - const rose_literal_id &lit = build.literals.right.at(lit_id); - - if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("lit short enough to not need confirm\n"); - return; - } - - // Check resource limits as well. - if (lit.s.length() > build.cc.grey.limitLiteralLength) { - throw ResourceLimitError(); - } - - if (lit.s.length() <= longLitLengthThreshold) { - DEBUG_PRINTF("is a medium-length literal\n"); - const auto *end_inst = program.end_instruction(); - unique_ptr ri; - if (lit.s.any_nocase()) { - ri = make_unique(lit.s.get_string(), - end_inst); - } else { - ri = make_unique(lit.s.get_string(), - end_inst); - } - program.add_before_end(move(ri)); - return; - } - - // Long literal support should only really be used for the floating table - // in streaming mode. - assert(lit.table == ROSE_FLOATING && build.cc.streaming); - - DEBUG_PRINTF("is a long literal\n"); - - const auto *end_inst = program.end_instruction(); - unique_ptr ri; - if (lit.s.any_nocase()) { - ri = make_unique(lit.s.get_string(), - end_inst); - } else { - ri = make_unique(lit.s.get_string(), end_inst); - } - program.add_before_end(move(ri)); -} - -static -bool hasDelayedLiteral(const RoseBuildImpl &build, - const vector &lit_edges) { - auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1); - for (const auto &e : lit_edges) { - auto v = target(e, build.g); - const auto &lits = build.g[v].literals; - if (any_of(begin(lits), end(lits), is_delayed)) { - return true; - } - } - return false; -} - -static -RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, - build_context &bc, ProgramBuild &prog_build, - u32 lit_id, const vector &lit_edges, - bool is_anchored_replay_program) { - RoseProgram program; - - // Check long literal info. - makeCheckLiteralInstruction(build, lit_id, bc.longLitLengthThreshold, - program); - - // Check lit mask. - makeCheckLitMaskInstruction(build, bc, lit_id, program); - - // Check literal groups. This is an optimisation that we only perform for - // delayed literals, as their groups may be switched off; ordinarily, we - // can trust the HWLM matcher. - if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, lit_id, program); - } - - // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, prog_build, lit_id, program); - - // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, lit_id, lit_edges, - prog_build.floatingMinLiteralMatchOffset, - program); - - /* Check if we are able to deliever matches from the anchored table now */ - if (!is_anchored_replay_program) { - makeAnchoredLiteralDelay(build, prog_build, lit_id, program); - } - - return program; -} - -static -RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, u32 lit_id, - const vector &lit_edges, - bool is_anchored_replay_program) { - const auto &g = build.g; - - DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); - - // Construct initial program up front, as its early checks must be able - // to jump to end and terminate processing for this literal. - auto lit_program = makeLitInitialProgram(build, bc, prog_build, lit_id, - lit_edges, - is_anchored_replay_program); - - RoseProgram role_programs; - - // Predecessor state id -> program block. - map pred_blocks; - - // Construct sparse iter sub-programs. - for (const auto &e : lit_edges) { - const auto &u = source(e, g); - if (build.isAnyStart(u)) { - continue; // Root roles are not handled with sparse iterator. - } - DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, - g[target(e, g)].index); - assert(contains(bc.roleStateIndices, u)); - u32 pred_state = bc.roleStateIndices.at(u); - auto role_prog = makeRoleProgram(build, bc, prog_build, e); - if (!role_prog.empty()) { - pred_blocks[pred_state].add_block(move(role_prog)); - } - } - - // Add blocks to deal with non-root edges (triggered by sparse iterator or - // mmbit_isset checks). - addPredBlocks(pred_blocks, bc.roleStateIndices.size(), role_programs); - - // Add blocks to handle root roles. - for (const auto &e : lit_edges) { - const auto &u = source(e, g); - if (!build.isAnyStart(u)) { - continue; - } - DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, - g[target(e, g)].index); - role_programs.add_block(makeRoleProgram(build, bc, prog_build, e)); - } - - if (lit_id == build.eod_event_literal_id) { - /* Note: does not require the lit intial program */ - assert(build.eod_event_literal_id != MO_INVALID_IDX); - return role_programs; - } - - /* Instructions to run even if a role program bails out */ - RoseProgram unconditional_block; - - // Literal may squash groups. - makeGroupSquashInstruction(build, lit_id, unconditional_block); - - role_programs.add_block(move(unconditional_block)); - lit_program.add_before_end(move(role_programs)); - - return lit_program; -} - static RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, ProgramBuild &prog_build, u32 lit_id, @@ -4885,49 +2752,10 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, edges_ptr = &no_edges; } - return makeLiteralProgram(build, bc, prog_build, lit_id, *edges_ptr, - is_anchored_replay_program); -} - -/** - * \brief Consumes list of program blocks corresponding to different literals, - * checks them for duplicates and then concatenates them into one program. - * - * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is - * inserted to prevent the work_done flag being contaminated by early blocks. - */ -static -RoseProgram assembleProgramBlocks(vector &&blocks) { - RoseProgram program; - - DEBUG_PRINTF("%zu blocks before dedupe\n", blocks.size()); - - sort(blocks.begin(), blocks.end(), - [](const RoseProgram &a, const RoseProgram &b) { - RoseProgramHash hasher; - return hasher(a) < hasher(b); - }); - - blocks.erase(unique(blocks.begin(), blocks.end(), RoseProgramEquivalence()), - blocks.end()); - - DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); - - for (auto &block : blocks) { - /* If we have multiple blocks from different literals and any of them - * squash groups, we will have to add a CLEAR_WORK_DONE instruction to - * each literal program block to clear the work_done flags so that it's - * only set if a state has been. */ - if (!program.empty() && reads_work_done_flag(block)) { - RoseProgram clear_block; - clear_block.add_before_end(make_unique()); - program.add_block(move(clear_block)); - } - - program.add_block(move(block)); - } - - return program; + return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, + bc.engine_info_by_queue, bc.lookarounds, + bc.roleStateIndices, prog_build, lit_id, + *edges_ptr, is_anchored_replay_program); } static @@ -4947,38 +2775,6 @@ RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, return assembleProgramBlocks(move(blocks)); } -static -u32 writeDelayRebuildProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, - const vector &lit_ids) { - assert(!lit_ids.empty()); - - if (!build.cc.streaming) { - return 0; // We only do delayed rebuild in streaming mode. - } - - vector blocks; - - for (const auto &lit_id : lit_ids) { - DEBUG_PRINTF("lit_id=%u\n", lit_id); - const auto &info = build.literal_info.at(lit_id); - if (info.delayed_ids.empty()) { - continue; // No delayed IDs, no work to do. - } - - RoseProgram prog; - makeCheckLiteralInstruction(build, lit_id, bc.longLitLengthThreshold, - prog); - makeCheckLitMaskInstruction(build, bc, lit_id, prog); - makePushDelayedInstructions(build, prog_build, lit_id, prog); - blocks.push_back(move(prog)); - } - - auto program = assembleProgramBlocks(move(blocks)); - - return writeProgram(bc, move(program)); -} - /** * \brief Returns a map from literal ID to a list of edges leading into * vertices with that literal ID. @@ -5118,8 +2914,15 @@ void buildLiteralPrograms(const RoseBuildImpl &build, lit_edge_map); frag.lit_program_offset = writeProgram(bc, move(lit_prog)); - frag.delay_program_offset - = writeDelayRebuildProgram(build, bc, prog_build, frag.lit_ids); + // We only do delayed rebuild in streaming mode. + if (!build.cc.streaming) { + continue; + } + + auto rebuild_prog = makeDelayRebuildProgram(build, + bc.lookarounds, prog_build, + frag.lit_ids); + frag.delay_program_offset = writeProgram(bc, move(rebuild_prog)); } } @@ -5267,10 +3070,7 @@ pair buildReportPrograms(const RoseBuildImpl &build, programs.reserve(reports.size()); for (ReportID id : reports) { - RoseProgram program; - const bool has_som = false; - makeCatchupMpv(build, bc.needs_mpv_catchup, id, program); - makeReport(build, id, has_som, program); + auto program = makeReportProgram(build, bc.needs_mpv_catchup, id); u32 offset = writeProgram(bc, move(program)); programs.push_back(offset); build.rm.setProgramOffset(id, offset); @@ -5283,38 +3083,6 @@ pair buildReportPrograms(const RoseBuildImpl &build, return {offset, count}; } -static -RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, - bool needs_catchup, - ProgramBuild &prog_build, const RoseEdge &e, - const bool multiple_preds) { - const RoseGraph &g = build.g; - const RoseVertex v = target(e, g); - - RoseProgram program; - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - makeRoleCheckBounds(build, v, e, program); - } - - if (multiple_preds) { - // Only necessary when there is more than one pred. - makeRoleCheckNotHandled(prog_build, v, program); - } - - const auto &reports = g[v].reports; - makeCatchup(build, needs_catchup, reports, program); - - const bool has_som = false; - RoseProgram report_block; - for (const auto &id : reports) { - makeReport(build, id, has_som, report_block); - } - program.add_before_end(move(report_block)); - - return program; -} - static bool hasEodAnchoredSuffix(const RoseBuildImpl &build) { const RoseGraph &g = build.g; @@ -5366,7 +3134,8 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc, continue; } if (canEagerlyReportAtEod(build, e)) { - DEBUG_PRINTF("already done report for vertex %zu\n", g[u].index); + DEBUG_PRINTF("already done report for vertex %zu\n", + g[u].index); continue; } edge_list.push_back(e); @@ -5378,8 +3147,7 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc, assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); pred_blocks[pred_state].add_block( - makeEodAnchorProgram(build, bc.needs_catchup, prog_build, e, - multiple_preds)); + makeEodAnchorProgram(build, prog_build, e, multiple_preds)); } } @@ -5414,58 +3182,31 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, tie(g[source(b, g)].index, g[target(b, g)].index); }); - auto block = makeLiteralProgram(build, bc, prog_build, + auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, + bc.engine_info_by_queue, bc.lookarounds, + bc.roleStateIndices, prog_build, build.eod_event_literal_id, edge_list, false); program.add_block(move(block)); } static -void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { - if (!eodNfaIterOffset) { - return; - } - - RoseProgram block; - block.add_before_end(make_unique(eodNfaIterOffset)); - program.add_block(move(block)); -} - -static -void addSuffixesEodProgram(const RoseBuildImpl &build, RoseProgram &program) { - if (!hasEodAnchoredSuffix(build)) { - return; - } - - RoseProgram block; - block.add_before_end(make_unique()); - program.add_block(move(block)); -} - -static -void addMatcherEodProgram(const RoseBuildImpl &build, RoseProgram &program) { - if (!hasEodMatcher(build)) { - return; - } - - RoseProgram block; - block.add_before_end(make_unique()); - program.add_block(move(block)); -} - -static -u32 writeEodProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, u32 eodNfaIterOffset) { +RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, u32 eodNfaIterOffset) { RoseProgram program; addEodEventProgram(build, bc, prog_build, program); addEnginesEodProgram(eodNfaIterOffset, program); addEodAnchorProgram(build, bc, prog_build, false, program); - addMatcherEodProgram(build, program); + if (hasEodMatcher(build)) { + addMatcherEodProgram(program); + } addEodAnchorProgram(build, bc, prog_build, true, program); - addSuffixesEodProgram(build, program); + if (hasEodAnchoredSuffix(build)) { + addSuffixesEodProgram(program); + } - return writeProgram(bc, move(program)); + return program; } static @@ -5737,8 +3478,6 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { build_context bc; u32 floatingMinLiteralMatchOffset = findMinFloatingLiteralMatch(*this, anchored_dfas); - bc.longLitLengthThreshold = longLitLengthThreshold; - bc.needs_catchup = needsCatchup(*this); recordResources(bc.resources, *this, fragments); if (!anchored_dfas.empty()) { bc.resources.has_anchored = true; @@ -5791,7 +3530,8 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &laggedRoseCount, &historyRequired); // Information only needed for program construction. - ProgramBuild prog_build(floatingMinLiteralMatchOffset); + ProgramBuild prog_build(floatingMinLiteralMatchOffset, + longLitLengthThreshold, needsCatchup(*this)); prog_build.vertex_group_map = getVertexGroupMap(*this); prog_build.squashable_groups = getSquashableGroups(*this); @@ -5803,13 +3543,14 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { buildLiteralPrograms(*this, fragments, bc, prog_build); - proto.eodProgramOffset = - writeEodProgram(*this, bc, prog_build, eodNfaIterOffset); + auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset); + proto.eodProgramOffset = writeProgram(bc, move(eod_prog)); size_t longLitStreamStateRequired = 0; - proto.longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob, - bc.longLiterals, longLitLengthThreshold, &historyRequired, - &longLitStreamStateRequired); + proto.longLitTableOffset + = buildLongLiteralTable(*this, bc.engine_blob, bc.longLiterals, + longLitLengthThreshold, &historyRequired, + &longLitStreamStateRequired); proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc); proto.eagerIterOffset = writeEagerQueueIter( @@ -5817,7 +3558,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { addSomRevNfas(bc, proto, ssm); - writeLookaroundTables(bc, proto); + writeLookaroundTables(bc.lookarounds, bc.engine_blob, proto); writeDkeyInfo(rm, bc.engine_blob, proto); writeLeftInfo(bc.engine_blob, proto, leftInfoTable); @@ -5829,8 +3570,8 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build floating HWLM matcher. rose_group fgroups = 0; - auto ftable = buildFloatingMatcher(*this, fragments, - bc.longLitLengthThreshold, &fgroups, &historyRequired); + auto ftable = buildFloatingMatcher(*this, fragments, longLitLengthThreshold, + &fgroups, &historyRequired); if (ftable) { proto.fmatcherOffset = bc.engine_blob.add(ftable); bc.resources.has_floating = true; @@ -5838,7 +3579,7 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build delay rebuild HWLM matcher. auto drtable = buildDelayRebuildMatcher(*this, fragments, - bc.longLitLengthThreshold); + longLitLengthThreshold); if (drtable) { proto.drmatcherOffset = bc.engine_blob.add(drtable); } diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index 21db7a8e..b920e922 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -622,6 +622,11 @@ u64a findMaxOffset(const std::set &reports, const ReportManager &rm); void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, std::vector &cmp); +u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id); +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id); + +bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e); + #ifndef NDEBUG bool canImplementGraphs(const RoseBuildImpl &tbi); #endif diff --git a/src/rose/rose_build_misc.cpp b/src/rose/rose_build_misc.cpp index 44044cb9..51a6ea85 100644 --- a/src/rose/rose_build_misc.cpp +++ b/src/rose/rose_build_misc.cpp @@ -909,6 +909,59 @@ u32 roseQuality(const RoseEngine *t) { return 1; } +u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 min_offset = UINT32_MAX; + for (const auto &v : lit_vertices) { + min_offset = min(min_offset, build.g[v].min_offset); + } + + return min_offset; +} + +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 max_offset = 0; + for (const auto &v : lit_vertices) { + max_offset = max(max_offset, build.g[v].max_offset); + } + + return max_offset; +} + +bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) { + const auto &g = build.g; + const auto v = target(e, g); + + if (!build.g[v].eod_accept) { + return false; + } + + // If there's a graph between us and EOD, we shouldn't be eager. + if (build.g[v].left) { + return false; + } + + // Must be exactly at EOD. + if (g[e].minBound != 0 || g[e].maxBound != 0) { + return false; + } + + // In streaming mode, we can only eagerly report EOD for literals in the + // EOD-anchored table, as that's the only time we actually know where EOD + // is. In block mode, we always have this information. + const auto u = source(e, g); + if (build.cc.streaming && !build.isInETable(u)) { + return false; + } + + return true; +} + #ifndef NDEBUG /** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose * graph are implementable. */ diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp index c319eed2..8d0306ae 100644 --- a/src/rose/rose_build_program.cpp +++ b/src/rose/rose_build_program.cpp @@ -29,14 +29,52 @@ #include "rose_build_program.h" #include "rose_build_instructions.h" +#include "rose_build_lookaround.h" +#include "rose_build_resources.h" +#include "nfa/nfa_api_queue.h" +#include "nfa/nfa_build_util.h" +#include "nfa/tamaramacompile.h" +#include "nfagraph/ng_util.h" +#include "util/charreach_util.h" +#include "util/container.h" +#include "util/compile_context.h" +#include "util/compile_error.h" +#include "util/dump_charclass.h" +#include "util/report_manager.h" +#include "util/verify_types.h" + +#include #include #include using namespace std; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; namespace ue2 { +engine_info::engine_info(const NFA *nfa, bool trans) + : type((NFAEngineType)nfa->type), accepts_eod(nfaAcceptsEod(nfa)), + stream_size(nfa->streamStateSize), + scratch_size(nfa->scratchStateSize), + scratch_align(state_alignment(*nfa)), + transient(trans) { + assert(scratch_align); +} + +left_build_info::left_build_info(u32 q, u32 l, u32 t, rose_group sm, + const std::vector &stops, u32 max_ql, + u8 cm_count, const CharReach &cm_cr) + : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), + max_queuelen(max_ql), countingMiracleCount(cm_count), + countingMiracleReach(cm_cr) { +} + +left_build_info::left_build_info(const vector> &looks) + : has_lookaround(true), lookaround(looks) { +} + using OffsetMap = RoseInstruction::OffsetMap; static @@ -216,6 +254,8 @@ bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); } +/* Removes any CHECK_HANDLED instructions from the given program */ +static void stripCheckHandledInstruction(RoseProgram &prog) { for (auto it = prog.begin(); it != prog.end();) { auto ins = dynamic_cast(it->get()); @@ -238,6 +278,9 @@ void stripCheckHandledInstruction(RoseProgram &prog) { } } + +/** Returns true if the program may read the the interpreter's work_done flag */ +static bool reads_work_done_flag(const RoseProgram &prog) { for (const auto &ri : prog) { if (dynamic_cast(ri.get())) { @@ -247,4 +290,2111 @@ bool reads_work_done_flag(const RoseProgram &prog) { return false; } +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { + if (!eodNfaIterOffset) { + return; + } + + RoseProgram block; + block.add_before_end(make_unique(eodNfaIterOffset)); + program.add_block(move(block)); +} + +void addSuffixesEodProgram(RoseProgram &program) { + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); +} + +void addMatcherEodProgram(RoseProgram &program) { + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); +} + +static +void makeRoleCheckLeftfix(const RoseBuildImpl &build, + const map &leftfix_info, + RoseVertex v, RoseProgram &program) { + auto it = leftfix_info.find(v); + if (it == end(leftfix_info)) { + return; + } + const left_build_info &lni = it->second; + if (lni.has_lookaround) { + return; // Leftfix completely implemented by lookaround. + } + + assert(!build.cc.streaming || + build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); + + bool is_prefix = build.isRootSuccessor(v); + const auto *end_inst = program.end_instruction(); + + unique_ptr ri; + if (is_prefix) { + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); + } else { + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); + } + program.add_before_end(move(ri)); +} + +static +void makeAnchoredLiteralDelay(const RoseBuildImpl &build, + const ProgramBuild &prog_build, u32 lit_id, + RoseProgram &program) { + // Only relevant for literals in the anchored table. + const rose_literal_id &lit = build.literals.right.at(lit_id); + if (lit.table != ROSE_ANCHORED) { + return; + } + + // If this literal match cannot occur after floatingMinLiteralMatchOffset, + // we do not need this check. + bool all_too_early = true; + rose_group groups = 0; + + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + for (RoseVertex v : lit_vertices) { + if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) { + all_too_early = false; + } + groups |= build.g[v].groups; + } + + if (all_too_early) { + return; + } + + assert(contains(prog_build.anchored_programs, lit_id)); + u32 anch_id = prog_build.anchored_programs.at(lit_id); + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(groups, anch_id, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeDedupe(const ReportManager &rm, const Report &report, + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = + make_unique(report.quashSom, rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeDedupeSom(const ReportManager &rm, const Report &report, + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(report.quashSom, + rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeCatchup(const ReportManager &rm, bool needs_catchup, + const flat_set &reports, RoseProgram &program) { + if (!needs_catchup) { + return; + } + + // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run + // before reports are triggered. + + auto report_needs_catchup = [&](const ReportID &id) { + const Report &report = rm.getReport(id); + return report.type != INTERNAL_ROSE_CHAIN; + }; + + if (!any_of(begin(reports), end(reports), report_needs_catchup)) { + DEBUG_PRINTF("none of the given reports needs catchup\n"); + return; + } + + program.add_before_end(make_unique()); +} + +static +void writeSomOperation(const Report &report, som_operation *op) { + assert(op); + + memset(op, 0, sizeof(*op)); + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REL: + op->type = SOM_EXTERNAL_CALLBACK_REL; + break; + case INTERNAL_SOM_LOC_SET: + op->type = SOM_INTERNAL_LOC_SET; + break; + case INTERNAL_SOM_LOC_SET_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_COPY: + op->type = SOM_INTERNAL_LOC_COPY; + break; + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; + break; + case EXTERNAL_CALLBACK_SOM_STORED: + op->type = SOM_EXTERNAL_CALLBACK_STORED; + break; + case EXTERNAL_CALLBACK_SOM_ABS: + op->type = SOM_EXTERNAL_CALLBACK_ABS; + break; + case EXTERNAL_CALLBACK_SOM_REV_NFA: + op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_FROM: + op->type = SOM_INTERNAL_LOC_SET_FROM; + break; + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; + break; + default: + // This report doesn't correspond to a SOM operation. + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + op->onmatch = report.onmatch; + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->aux.revNfaIndex = report.revNfaIndex; + break; + default: + op->aux.somDistance = report.somDistance; + break; + } +} + +static +void makeReport(const RoseBuildImpl &build, const ReportID id, + const bool has_som, RoseProgram &program) { + assert(id < build.rm.numReports()); + const Report &report = build.rm.getReport(id); + + RoseProgram report_block; + const RoseInstruction *end_inst = report_block.end_instruction(); + + // Handle min/max offset checks. + if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { + auto ri = make_unique(report.minOffset, + report.maxOffset, end_inst); + report_block.add_before_end(move(ri)); + } + + // If this report has an exhaustion key, we can check it in the program + // rather than waiting until we're in the callback adaptor. + if (report.ekey != INVALID_EKEY) { + auto ri = make_unique(report.ekey, end_inst); + report_block.add_before_end(move(ri)); + } + + // External SOM reports that aren't passthrough need their SOM value + // calculated. + if (isExternalSomReport(report) && + report.type != EXTERNAL_CALLBACK_SOM_PASS) { + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } + + // Min length constraint. + if (report.minLength > 0) { + assert(build.hasSom); + auto ri = make_unique( + report.offsetAdjust, report.minLength, end_inst); + report_block.add_before_end(move(ri)); + } + + if (report.quashSom) { + report_block.add_before_end(make_unique()); + } + + switch (report.type) { + case EXTERNAL_CALLBACK: + if (!has_som) { + // Dedupe is only necessary if this report has a dkey, or if there + // are SOM reports to catch up. + bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; + if (report.ekey == INVALID_EKEY) { + if (needs_dedupe) { + report_block.add_before_end( + make_unique( + report.quashSom, build.rm.getDkey(report), + report.onmatch, report.offsetAdjust, end_inst)); + } else { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } + } else { + if (needs_dedupe) { + makeDedupe(build.rm, report, report_block); + } + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + } else { // has_som + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } else { + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + } + break; + case INTERNAL_SOM_LOC_SET: + case INTERNAL_SOM_LOC_SET_IF_UNSET: + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + case INTERNAL_SOM_LOC_COPY: + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + case INTERNAL_SOM_LOC_SET_FROM: + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + if (has_som) { + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } else { + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } + break; + case INTERNAL_ROSE_CHAIN: { + report_block.add_before_end(make_unique( + report.onmatch, report.topSquashDistance)); + break; + } + case EXTERNAL_CALLBACK_SOM_REL: + case EXTERNAL_CALLBACK_SOM_STORED: + case EXTERNAL_CALLBACK_SOM_ABS: + case EXTERNAL_CALLBACK_SOM_REV_NFA: + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } else { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + break; + case EXTERNAL_CALLBACK_SOM_PASS: + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); + } else { + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); + } + break; + + default: + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + assert(!report_block.empty()); + program.add_block(move(report_block)); +} + +static +void makeRoleReports(const RoseBuildImpl &build, + const std::map &leftfix_info, + bool needs_catchup, RoseVertex v, RoseProgram &program) { + const auto &g = build.g; + + bool report_som = false; + if (g[v].left.tracksSom()) { + /* we are a suffaig - need to update role to provide som to the + * suffix. */ + assert(contains(leftfix_info, v)); + const left_build_info &lni = leftfix_info.at(v); + program.add_before_end( + make_unique(lni.queue, g[v].left.lag)); + report_som = true; + } else if (g[v].som_adjust) { + program.add_before_end( + make_unique(g[v].som_adjust)); + report_som = true; + } + + makeCatchup(build.rm, needs_catchup, g[v].reports, program); + + RoseProgram report_block; + for (ReportID id : g[v].reports) { + makeReport(build, id, report_som, report_block); + } + program.add_before_end(move(report_block)); +} + +static +void makeRoleSetState(const unordered_map &roleStateIndices, + RoseVertex v, RoseProgram &program) { + // We only need this instruction if a state index has been assigned to this + // vertex. + auto it = roleStateIndices.find(v); + if (it == end(roleStateIndices)) { + return; + } + program.add_before_end(make_unique(it->second)); +} + +static +void makePushDelayedInstructions(const RoseLiteralMap &literals, + ProgramBuild &prog_build, + const flat_set &delayed_ids, + RoseProgram &program) { + vector delay_instructions; + + for (const auto &delayed_lit_id : delayed_ids) { + DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); + assert(contains(prog_build.delay_programs, delayed_lit_id)); + u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); + const auto &delay_lit = literals.right.at(delayed_lit_id); + delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); + } + + sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, + const RoseInstrPushDelayed &b) { + return tie(a.delay, a.index) < tie(b.delay, b.index); + }); + + for (const auto &ri : delay_instructions) { + program.add_before_end(make_unique(ri)); + } +} + +static +void makeCheckLiteralInstruction(const rose_literal_id &lit, + size_t longLitLengthThreshold, + RoseProgram &program, + const CompileContext &cc) { + assert(longLitLengthThreshold > 0); + + DEBUG_PRINTF("lit=%s, long lit threshold %zu\n", dumpString(lit.s).c_str(), + longLitLengthThreshold); + + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("lit short enough to not need confirm\n"); + return; + } + + // Check resource limits as well. + if (lit.s.length() > cc.grey.limitLiteralLength) { + throw ResourceLimitError(); + } + + if (lit.s.length() <= longLitLengthThreshold) { + DEBUG_PRINTF("is a medium-length literal\n"); + const auto *end_inst = program.end_instruction(); + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string(), + end_inst); + } else { + ri = make_unique(lit.s.get_string(), + end_inst); + } + program.add_before_end(move(ri)); + return; + } + + // Long literal support should only really be used for the floating table + // in streaming mode. + assert(lit.table == ROSE_FLOATING && cc.streaming); + + DEBUG_PRINTF("is a long literal\n"); + + const auto *end_inst = program.end_instruction(); + unique_ptr ri; + if (lit.s.any_nocase()) { + ri = make_unique(lit.s.get_string(), + end_inst); + } else { + ri = make_unique(lit.s.get_string(), end_inst); + } + program.add_before_end(move(ri)); +} + +static +void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, + RoseProgram &program) { + u32 handled_key; + if (contains(prog_build.handledKeys, v)) { + handled_key = prog_build.handledKeys.at(v); + } else { + handled_key = verify_u32(prog_build.handledKeys.size()); + prog_build.handledKeys.emplace(v, handled_key); + } + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(handled_key, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, + const RoseEdge &e, RoseProgram &program) { + const RoseGraph &g = build.g; + const RoseVertex u = source(e, g); + + // We know that we can trust the anchored table (DFA) to always deliver us + // literals at the correct offset. + if (build.isAnchored(v)) { + DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); + return; + } + + // Use the minimum literal length. + u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); + + u64a min_bound = g[e].minBound + lit_length; + u64a max_bound = g[e].maxBound == ROSE_BOUND_INF + ? ROSE_BOUND_INF + : g[e].maxBound + lit_length; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + assert(g[u].fixedOffset()); + // Make offsets absolute. + min_bound += g[u].max_offset; + if (max_bound != ROSE_BOUND_INF) { + max_bound += g[u].max_offset; + } + } + + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET + // (max value of a u64a) to represent ROSE_BOUND_INF. + if (max_bound == ROSE_BOUND_INF) { + max_bound = MAX_OFFSET; + } + + // This instruction should be doing _something_ -- bounds should be tighter + // than just {length, inf}. + assert(min_bound > lit_length || max_bound < MAX_OFFSET); + + const auto *end_inst = program.end_instruction(); + program.add_before_end( + make_unique(min_bound, max_bound, end_inst)); +} + +static +void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build, + RoseVertex v, RoseProgram &program) { + rose_group groups = g[v].groups; + if (!groups) { + return; + } + + // The set of "already on" groups as we process this vertex is the + // intersection of the groups set by our predecessors. + assert(in_degree(v, g) > 0); + rose_group already_on = ~rose_group{0}; + for (const auto &u : inv_adjacent_vertices_range(v, g)) { + already_on &= prog_build.vertex_group_map.at(u); + } + + DEBUG_PRINTF("already_on=0x%llx\n", already_on); + DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups); + DEBUG_PRINTF("groups=0x%llx\n", groups); + + already_on &= ~prog_build.squashable_groups; + DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); + + // We don't *have* to mask off the groups that we know are already on, but + // this will make bugs more apparent. + groups &= ~already_on; + + if (!groups) { + DEBUG_PRINTF("no new groups to set, skipping\n"); + return; + } + + program.add_before_end(make_unique(groups)); +} + +static +void addLookaround(lookaround_info &lookarounds, + const vector> &look, + u32 &look_index, u32 &reach_index) { + // Check the cache. + auto it = lookarounds.cache.find(look); + if (it != lookarounds.cache.end()) { + look_index = verify_u32(it->second.first); + reach_index = verify_u32(it->second.second); + DEBUG_PRINTF("reusing look at idx %u\n", look_index); + DEBUG_PRINTF("reusing reach at idx %u\n", reach_index); + return; + } + + size_t look_idx = lookarounds.lookTableSize; + size_t reach_idx = lookarounds.reachTableSize; + + if (look.size() == 1) { + lookarounds.lookTableSize += look.front().size(); + lookarounds.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN; + } else { + lookarounds.lookTableSize += look.size(); + lookarounds.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN; + } + + lookarounds.cache.emplace(look, make_pair(look_idx, reach_idx)); + lookarounds.table.emplace_back(look); + + DEBUG_PRINTF("adding look at idx %zu\n", look_idx); + DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx); + look_index = verify_u32(look_idx); + reach_index = verify_u32(reach_idx); +} + +static +bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { + size_t reach_size = cr.count(); + assert(reach_size > 0); + // check whether entry_size is some power of 2. + if ((reach_size - 1) & reach_size) { + return false; + } + make_and_cmp_mask(cr, &andmask, &cmpmask); + if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { + return false; + } + return true; +} + +static +bool checkReachWithFlip(const CharReach &cr, u8 &andmask, + u8 &cmpmask, u8 &flip) { + if (checkReachMask(cr, andmask, cmpmask)) { + flip = 0; + return true; + } + if (checkReachMask(~cr, andmask, cmpmask)) { + flip = 1; + return true; + } + return false; +} + +static +bool makeRoleByte(const vector &look, RoseProgram &program) { + if (look.size() == 1) { + const auto &entry = look[0]; + u8 andmask_u8, cmpmask_u8; + u8 flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { + return false; + } + s32 checkbyte_offset = verify_s32(entry.offset); + DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(andmask_u8, cmpmask_u8, flip, + checkbyte_offset, end_inst); + program.add_before_end(move(ri)); + return true; + } + return false; +} + +static +bool makeRoleMask(const vector &look, RoseProgram &program) { + if (look.back().offset < look.front().offset + 8) { + s32 base_offset = verify_s32(look.front().offset); + u64a and_mask = 0; + u64a cmp_mask = 0; + u64a neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + DEBUG_PRINTF("entry offset %d\n", entry.offset); + u32 shift = (entry.offset - base_offset) << 3; + and_mask |= (u64a)andmask_u8 << shift; + cmp_mask |= (u64a)cmpmask_u8 << shift; + if (flip) { + neg_mask |= 0xffLLU << shift; + } + } + DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", + and_mask, cmp_mask); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; + } + return false; +} + +static UNUSED +string convertMaskstoString(u8 *p, int byte_len) { + string s; + for (int i = 0; i < byte_len; i++) { + u8 hi = *p >> 4; + u8 lo = *p & 0xf; + s += (char)(hi + (hi < 10 ? 48 : 87)); + s += (char)(lo + (lo < 10 ? 48 : 87)); + p++; + } + return s; +} + +static +bool makeRoleMask32(const vector &look, + RoseProgram &program) { + if (look.back().offset >= look.front().offset + 32) { + return false; + } + s32 base_offset = verify_s32(look.front().offset); + array and_mask, cmp_mask; + and_mask.fill(0); + cmp_mask.fill(0); + u32 neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + u32 shift = entry.offset - base_offset; + assert(shift < 32); + and_mask[shift] = andmask_u8; + cmp_mask[shift] = cmpmask_u8; + if (flip) { + neg_mask |= 1 << shift; + } + } + + DEBUG_PRINTF("and_mask %s\n", + convertMaskstoString(and_mask.data(), 32).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", + convertMaskstoString(cmp_mask.data(), 32).c_str()); + DEBUG_PRINTF("neg_mask %08x\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; +} + +// Sorting by the size of every bucket. +// Used in map, cmpNibble>. +struct cmpNibble { + bool operator()(const u32 data1, const u32 data2) const{ + u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); + u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); + return std::tie(size1, data1) < std::tie(size2, data2); + } +}; + +// Insert all pairs of bucket and offset into buckets. +static really_inline +void getAllBuckets(const vector &look, + map, cmpNibble> &buckets, u64a &neg_mask) { + s32 base_offset = verify_s32(look.front().offset); + for (const auto &entry : look) { + CharReach cr = entry.reach; + // Flip heavy character classes to save buckets. + if (cr.count() > 128 ) { + cr.flip(); + } else { + neg_mask ^= 1ULL << (entry.offset - base_offset); + } + map lo2hi; + // We treat Ascii Table as a 16x16 grid. + // Push every row in cr into lo2hi and mark the row number. + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i & 0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + buckets[hi_lo].push_back(entry.offset); + } + } +} + +// Once we have a new bucket, we'll try to combine it with all old buckets. +static really_inline +void nibUpdate(map &nib, u32 hi_lo) { + u16 hi = hi_lo >> 16; + u16 lo = hi_lo & 0xffff; + for (const auto pairs : nib) { + u32 old = pairs.first; + if ((old >> 16) == hi || (old & 0xffff) == lo) { + if (!nib[old | hi_lo]) { + nib[old | hi_lo] = nib[old] | nib[hi_lo]; + } + } + } +} + +static really_inline +void nibMaskUpdate(array &mask, u32 data, u8 bit_index) { + for (u8 index = 0; data > 0; data >>= 1, index++) { + if (data & 1) { + // 0 ~ 7 bucket in first 16 bytes, + // 8 ~ 15 bucket in second 16 bytes. + if (bit_index >= 8) { + mask[index + 16] |= 1 << (bit_index - 8); + } else { + mask[index] |= 1 << bit_index; + } + } + } +} + +static +bool getShuftiMasks(const vector &look, array &hi_mask, + array &lo_mask, u8 *bucket_select_hi, + u8 *bucket_select_lo, u64a &neg_mask, + u8 &bit_idx, size_t len) { + map nib; // map every bucket to its bucket number. + map, cmpNibble> bucket2offsets; + s32 base_offset = look.front().offset; + + bit_idx = 0; + neg_mask = ~0ULL; + + getAllBuckets(look, bucket2offsets, neg_mask); + + for (const auto &it : bucket2offsets) { + u32 hi_lo = it.first; + // New bucket. + if (!nib[hi_lo]) { + if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { + return false; + } + nib[hi_lo] = 1 << bit_idx; + + nibUpdate(nib, hi_lo); + nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); + nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); + bit_idx++; + } + + DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); + + // Update bucket_select_mask. + u8 nib_hi = nib[hi_lo] >> 8; + u8 nib_lo = nib[hi_lo] & 0xff; + for (const auto offset : it.second) { + bucket_select_hi[offset - base_offset] |= nib_hi; + bucket_select_lo[offset - base_offset] |= nib_lo; + } + } + return true; +} + +static +unique_ptr +makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 8) { + return nullptr; + } + array nib_mask; + array bucket_select_mask_16; + copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); + copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, + bucket_select_mask_16.begin()); + return make_unique + (nib_mask, bucket_select_mask_16, + neg_mask & 0xffff, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 8) { + return nullptr; + } + + array hi_mask_16; + array lo_mask_16; + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); + return make_unique + (hi_mask_16, lo_mask_16, bucket_select_mask, + neg_mask, base_offset, end_inst); +} + +static +unique_ptr +makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 16) { + return nullptr; + } + + array bucket_select_mask_32; + copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, + bucket_select_mask_32.begin()); + copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, + bucket_select_mask_32.begin() + 16); + return make_unique + (hi_mask, lo_mask, bucket_select_mask_32, + neg_mask & 0xffff, base_offset, end_inst); +} +static +unique_ptr +makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, + const array &hi_mask, const array &lo_mask, + const array &bucket_select_mask_lo, + const array &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 16) { + return nullptr; + } + + return make_unique + (hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, neg_mask, base_offset, end_inst); +} + +static +bool makeRoleShufti(const vector &look, RoseProgram &program) { + + s32 base_offset = verify_s32(look.front().offset); + if (look.back().offset >= base_offset + 32) { + return false; + } + + u8 bucket_idx = 0; // number of buckets + u64a neg_mask_64; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. + bucket_select_lo.fill(0); + + if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) { + return false; + } + u32 neg_mask = (u32)neg_mask_64; + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 32).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 32).c_str()); + + const auto *end_inst = program.end_instruction(); + s32 offset_range = look.back().offset - base_offset + 1; + + auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + if (!ri) { + ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + } + if (!ri) { + ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + if (!ri) { + ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + assert(ri); + program.add_before_end(move(ri)); + + return true; +} + +/** + * Builds a lookaround instruction, or an appropriate specialization if one is + * available. + */ +static +void makeLookaroundInstruction(lookaround_info &lookarounds, + const vector &look, + RoseProgram &program) { + assert(!look.empty()); + + if (makeRoleByte(look, program)) { + return; + } + + if (look.size() == 1) { + s8 offset = look.begin()->offset; + u32 look_idx, reach_idx; + vector> lookaround; + lookaround.emplace_back(look); + addLookaround(lookarounds, lookaround, look_idx, reach_idx); + // We don't need look_idx here. + auto ri = make_unique(offset, reach_idx, + program.end_instruction()); + program.add_before_end(move(ri)); + return; + } + + if (makeRoleMask(look, program)) { + return; + } + + if (makeRoleMask32(look, program)) { + return; + } + + if (makeRoleShufti(look, program)) { + return; + } + + u32 look_idx, reach_idx; + vector> lookaround; + lookaround.emplace_back(look); + addLookaround(lookarounds, lookaround, look_idx, reach_idx); + u32 look_count = verify_u32(look.size()); + + auto ri = make_unique(look_idx, reach_idx, + look_count, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +static +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, + lookaround_info &lookarounds, u32 lit_id, + RoseProgram &program) { + const auto &info = build.literal_info.at(lit_id); + if (!info.requires_benefits) { + return; + } + + vector look; + + const ue2_literal &s = build.literals.right.at(lit_id).s; + DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, + dumpString(s).c_str()); + assert(s.length() <= MAX_MASK2_WIDTH); + s32 i = 0 - s.length(); + for (const auto &e : s) { + if (!e.nocase) { + look.emplace_back(verify_s8(i), e); + } + i++; + } + + assert(!look.empty()); + makeLookaroundInstruction(lookarounds, look, program); +} + +static +void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, + const vector &lit_edges, + u32 floatingMinLiteralMatchOffset, + RoseProgram &prog) { + if (lit_edges.empty()) { + return; + } + + if (floatingMinLiteralMatchOffset == 0) { + return; + } + + RoseVertex v = target(lit_edges.front(), build.g); + if (!build.isFloating(v)) { + return; + } + + const auto &lit = build.literals.right.at(lit_id); + size_t min_len = lit.elength(); + u32 min_offset = findMinOffset(build, lit_id); + DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len, + min_offset, floatingMinLiteralMatchOffset); + + // If we can't match before the min offset, we don't need the check. + if (min_len >= floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("no need for check, min is %u\n", + floatingMinLiteralMatchOffset); + return; + } + + assert(min_offset >= floatingMinLiteralMatchOffset); + assert(min_offset < UINT32_MAX); + + DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); + const auto *end = prog.end_instruction(); + prog.add_before_end(make_unique(min_offset, end)); +} + +static +void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &prog) { + const auto &info = build.literal_info.at(lit_id); + + if (!info.group_mask) { + return; + } + prog.add_before_end(make_unique(info.group_mask)); +} + +static +bool hasDelayedLiteral(const RoseBuildImpl &build, + const vector &lit_edges) { + auto is_delayed = bind(&RoseBuildImpl::isDelayed, &build, _1); + for (const auto &e : lit_edges) { + auto v = target(e, build.g); + const auto &lits = build.g[v].literals; + if (any_of(begin(lits), end(lits), is_delayed)) { + return true; + } + } + return false; +} + +static +RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, + lookaround_info &lookarounds, + ProgramBuild &prog_build, u32 lit_id, + const vector &lit_edges, + bool is_anchored_replay_program) { + RoseProgram program; + + // Check long literal info. + if (!build.isDelayed(lit_id)) { + makeCheckLiteralInstruction(build.literals.right.at(lit_id), + prog_build.longLitLengthThreshold, + program, build.cc); + } + + // Check lit mask. + makeCheckLitMaskInstruction(build, lookarounds, lit_id, program); + + // Check literal groups. This is an optimisation that we only perform for + // delayed literals, as their groups may be switched off; ordinarily, we + // can trust the HWLM matcher. + if (hasDelayedLiteral(build, lit_edges)) { + makeGroupCheckInstruction(build, lit_id, program); + } + + // Add instructions for pushing delayed matches, if there are any. + makePushDelayedInstructions(build.literals, prog_build, + build.literal_info.at(lit_id).delayed_ids, + program); + + // Add pre-check for early literals in the floating table. + makeCheckLitEarlyInstruction(build, lit_id, lit_edges, + prog_build.floatingMinLiteralMatchOffset, + program); + + /* Check if we are able to deliever matches from the anchored table now */ + if (!is_anchored_replay_program) { + makeAnchoredLiteralDelay(build, prog_build, lit_id, program); + } + + return program; +} + +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dumpMultiLook(const vector &looks) { + ostringstream oss; + for (auto it = looks.begin(); it != looks.end(); ++it) { + if (it != looks.begin()) { + oss << ", "; + } + oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}"; + } + return oss.str(); +} +#endif + +static +bool makeRoleMultipathShufti(const vector> &multi_look, + RoseProgram &program) { + if (multi_look.empty()) { + return false; + } + + // find the base offset + assert(!multi_look[0].empty()); + s32 base_offset = multi_look[0].front().offset; + s32 last_start = base_offset; + s32 end_offset = multi_look[0].back().offset; + size_t multi_len = 0; + + for (const auto &look : multi_look) { + assert(look.size() > 0); + multi_len += look.size(); + + LIMIT_TO_AT_MOST(&base_offset, look.front().offset); + ENSURE_AT_LEAST(&last_start, look.front().offset); + ENSURE_AT_LEAST(&end_offset, look.back().offset); + } + + assert(last_start < 0); + + if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { + return false; + } + + if (multi_len <= 16) { + multi_len = 16; + } else if (multi_len <= 32) { + multi_len = 32; + } else if (multi_len <= 64) { + multi_len = 64; + } else { + DEBUG_PRINTF("too long for multi-path\n"); + return false; + } + + vector linear_look; + array data_select_mask; + data_select_mask.fill(0); + u64a hi_bits_mask = 0; + u64a lo_bits_mask = 0; + + for (const auto &look : multi_look) { + assert(linear_look.size() < 64); + lo_bits_mask |= 1LLU << linear_look.size(); + for (const auto &entry : look) { + assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); + data_select_mask[linear_look.size()] = + verify_u8(entry.offset - base_offset); + linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); + } + hi_bits_mask |= 1LLU << (linear_look.size() - 1); + } + + u8 bit_index = 0; // number of buckets + u64a neg_mask; + array hi_mask; + array lo_mask; + array bucket_select_hi; + array bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); + bucket_select_lo.fill(0); + + if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask, bit_index, + multi_len)) { + return false; + } + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 16).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 16).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 64).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 64).c_str()); + DEBUG_PRINTF("data_select_mask %s\n", + convertMaskstoString(data_select_mask.data(), 64).c_str()); + DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask); + DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask); + DEBUG_PRINTF("neg_mask %llx\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + DEBUG_PRINTF("last_start %d\n", last_start); + + // Since we don't have 16x16 now, just call 32x16 instead. + if (bit_index > 8) { + assert(multi_len <= 32); + multi_len = 32; + } + + const auto *end_inst = program.end_instruction(); + assert(multi_len == 16 || multi_len == 32 || multi_len == 64); + if (multi_len == 16) { + neg_mask &= 0xffff; + assert(!(hi_bits_mask & ~0xffffULL)); + assert(!(lo_bits_mask & ~0xffffULL)); + assert(bit_index <=8); + array nib_mask; + copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); + copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); + + auto ri = make_unique + (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } else if (multi_len == 32) { + neg_mask &= 0xffffffff; + assert(!(hi_bits_mask & ~0xffffffffULL)); + assert(!(lo_bits_mask & ~0xffffffffULL)); + if (bit_index <= 8) { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } + } else { + auto ri = make_unique + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } + return true; +} + +static +void makeRoleMultipathLookaround(lookaround_info &lookarounds, + const vector> &multi_look, + RoseProgram &program) { + assert(!multi_look.empty()); + assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); + vector> ordered_look; + set look_offset; + + assert(!multi_look[0].empty()); + s32 last_start = multi_look[0][0].offset; + + // build offset table. + for (const auto &look : multi_look) { + assert(look.size() > 0); + last_start = max(last_start, (s32)look.begin()->offset); + + for (const auto &t : look) { + look_offset.insert(t.offset); + } + } + + array start_mask; + if (multi_look.size() < MAX_LOOKAROUND_PATHS) { + start_mask.fill((1 << multi_look.size()) - 1); + } else { + start_mask.fill(0xff); + } + + u32 path_idx = 0; + for (const auto &look : multi_look) { + for (const auto &t : look) { + assert(t.offset >= (int)*look_offset.begin()); + size_t update_offset = t.offset - *look_offset.begin() + 1; + if (update_offset < start_mask.size()) { + start_mask[update_offset] &= ~(1 << path_idx); + } + } + path_idx++; + } + + for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { + start_mask[i] &= start_mask[i - 1]; + DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]); + } + + assert(look_offset.size() <= MULTIPATH_MAX_LEN); + + assert(last_start < 0); + + for (const auto &offset : look_offset) { + vector multi_entry; + multi_entry.resize(MAX_LOOKAROUND_PATHS); + + for (size_t i = 0; i < multi_look.size(); i++) { + for (const auto &t : multi_look[i]) { + if (t.offset == offset) { + multi_entry[i] = t; + } + } + } + ordered_look.emplace_back(multi_entry); + } + + u32 look_idx, reach_idx; + addLookaround(lookarounds, ordered_look, look_idx, reach_idx); + u32 look_count = verify_u32(ordered_look.size()); + + auto ri = make_unique(look_idx, reach_idx, + look_count, last_start, + start_mask, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +static +void makeRoleLookaround(const RoseBuildImpl &build, + const map &leftfix_info, + lookaround_info &lookarounds, RoseVertex v, + RoseProgram &program) { + if (!build.cc.grey.roseLookaroundMasks) { + return; + } + + vector> looks; + + // Lookaround from leftfix (mandatory). + if (contains(leftfix_info, v) && leftfix_info.at(v).has_lookaround) { + DEBUG_PRINTF("using leftfix lookaround\n"); + looks = leftfix_info.at(v).lookaround; + } + + // We may be able to find more lookaround info (advisory) and merge it + // in. + if (looks.size() <= 1) { + vector look; + vector look_more; + if (!looks.empty()) { + look = move(looks.front()); + } + findLookaroundMasks(build, v, look_more); + mergeLookaround(look, look_more); + if (!look.empty()) { + makeLookaroundInstruction(lookarounds, look, program); + } + return; + } + + if (!makeRoleMultipathShufti(looks, program)) { + assert(looks.size() <= 8); + makeRoleMultipathLookaround(lookarounds, looks, program); + } +} + +static +void makeRoleSuffix(const RoseBuildImpl &build, + const map &suffixes, + const map &engine_info_by_queue, + RoseVertex v, RoseProgram &prog) { + const auto &g = build.g; + if (!g[v].suffix) { + return; + } + assert(contains(suffixes, g[v].suffix)); + u32 queue = suffixes.at(g[v].suffix); + u32 event; + assert(contains(engine_info_by_queue, queue)); + const auto eng_info = engine_info_by_queue.at(queue); + if (isContainerType(eng_info.type)) { + auto tamaProto = g[v].suffix.tamarama.get(); + assert(tamaProto); + event = (u32)MQE_TOP_FIRST + + tamaProto->top_remap.at(make_pair(g[v].index, + g[v].suffix.top)); + assert(event < MQE_INVALID); + } else if (isMultiTopType(eng_info.type)) { + assert(!g[v].suffix.haig); + event = (u32)MQE_TOP_FIRST + g[v].suffix.top; + assert(event < MQE_INVALID); + } else { + // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP + // event. + assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); + event = MQE_TOP; + } + + prog.add_before_end(make_unique(queue, event)); +} + +static +void addInfixTriggerInstructions(vector triggers, + RoseProgram &prog) { + // Order, de-dupe and add instructions to the end of program. + sort_and_unique(triggers, [](const TriggerInfo &a, const TriggerInfo &b) { + return tie(a.cancel, a.queue, a.event) < + tie(b.cancel, b.queue, b.event); + }); + for (const auto &ti : triggers) { + prog.add_before_end( + make_unique(ti.cancel, ti.queue, ti.event)); + } +} + +static +void makeRoleInfixTriggers(const RoseBuildImpl &build, + const map &leftfix_info, + const map &engine_info_by_queue, + RoseVertex u, RoseProgram &program) { + const auto &g = build.g; + + vector triggers; + + for (const auto &e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (!g[v].left) { + continue; + } + + assert(contains(leftfix_info, v)); + const left_build_info &lbi = leftfix_info.at(v); + if (lbi.has_lookaround) { + continue; + } + + assert(contains(engine_info_by_queue, lbi.queue)); + const auto &eng_info = engine_info_by_queue.at(lbi.queue); + + // DFAs have no TOP_N support, so they get a classic MQE_TOP event. + u32 top; + if (isContainerType(eng_info.type)) { + auto tamaProto = g[v].left.tamarama.get(); + assert(tamaProto); + top = MQE_TOP_FIRST + tamaProto->top_remap.at( + make_pair(g[v].index, g[e].rose_top)); + assert(top < MQE_INVALID); + } else if (!isMultiTopType(eng_info.type)) { + assert(num_tops(g[v].left) == 1); + top = MQE_TOP; + } else { + top = MQE_TOP_FIRST + g[e].rose_top; + assert(top < MQE_INVALID); + } + + triggers.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); + } + + addInfixTriggerInstructions(move(triggers), program); +} + + +/** + * \brief True if the given vertex is a role that can only be switched on at + * EOD. + */ +static +bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { + const RoseGraph &g = tbi.g; + + // All such roles have only (0,0) edges to vertices with the eod_accept + // property, and no other effects (suffixes, ordinary reports, etc, etc). + + if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { + return false; + } + + for (const auto &e : out_edges_range(v, g)) { + RoseVertex w = target(e, g); + if (!g[w].eod_accept) { + return false; + } + assert(!g[w].reports.empty()); + assert(g[w].literals.empty()); + + if (g[e].minBound || g[e].maxBound) { + return false; + } + } + + /* There is no pointing enforcing this check at runtime if + * this role is only fired by the eod event literal */ + if (tbi.eod_event_literal_id != MO_INVALID_IDX && + g[v].literals.size() == 1 && + *g[v].literals.begin() == tbi.eod_event_literal_id) { + return false; + } + + return true; +} + +static +void addCheckOnlyEodInstruction(RoseProgram &prog) { + DEBUG_PRINTF("only at eod\n"); + const auto *end_inst = prog.end_instruction(); + prog.add_before_end(make_unique(end_inst)); +} + +static +void makeRoleEagerEodReports(const RoseBuildImpl &build, + const map &leftfix_info, + bool needs_catchup, RoseVertex v, + RoseProgram &program) { + RoseProgram eod_program; + + for (const auto &e : out_edges_range(v, build.g)) { + if (canEagerlyReportAtEod(build, e)) { + RoseProgram block; + makeRoleReports(build, leftfix_info, needs_catchup, + target(e, build.g), block); + eod_program.add_block(move(block)); + } + } + + if (eod_program.empty()) { + return; + } + + if (!onlyAtEod(build, v)) { + // The rest of our program wasn't EOD anchored, so we need to guard + // these reports with a check. + addCheckOnlyEodInstruction(program); + } + + program.add_before_end(move(eod_program)); +} + +/* Makes a program for a role/vertex given a specfic pred/in_edge. */ +static +RoseProgram makeRoleProgram(const RoseBuildImpl &build, + const map &leftfix_info, + const map &suffixes, + const map &engine_info_by_queue, + lookaround_info &lookarounds, + unordered_map roleStateIndices, + ProgramBuild &prog_build, const RoseEdge &e) { + const RoseGraph &g = build.g; + auto v = target(e, g); + + RoseProgram program; + + // First, add program instructions that enforce preconditions without + // effects. + + if (onlyAtEod(build, v)) { + addCheckOnlyEodInstruction(program); + } + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + // This role program may be triggered by different predecessors, with + // different offset bounds. We must ensure we put this check/set operation + // after the bounds check to deal with this case. + if (in_degree(v, g) > 1) { + assert(!build.isRootSuccessor(v)); + makeRoleCheckNotHandled(prog_build, v, program); + } + + makeRoleLookaround(build, leftfix_info, lookarounds, v, program); + makeRoleCheckLeftfix(build, leftfix_info, v, program); + + // Next, we can add program instructions that have effects. This must be + // done as a series of blocks, as some of them (like reports) are + // escapable. + + RoseProgram effects_block; + + RoseProgram reports_block; + makeRoleReports(build, leftfix_info, prog_build.needs_catchup, v, + reports_block); + effects_block.add_block(move(reports_block)); + + RoseProgram infix_block; + makeRoleInfixTriggers(build, leftfix_info, engine_info_by_queue, v, + infix_block); + effects_block.add_block(move(infix_block)); + + // Note: SET_GROUPS instruction must be after infix triggers, as an infix + // going dead may switch off groups. + RoseProgram groups_block; + makeRoleGroups(build.g, prog_build, v, groups_block); + effects_block.add_block(move(groups_block)); + + RoseProgram suffix_block; + makeRoleSuffix(build, suffixes, engine_info_by_queue, v, suffix_block); + effects_block.add_block(move(suffix_block)); + + RoseProgram state_block; + makeRoleSetState(roleStateIndices, v, state_block); + effects_block.add_block(move(state_block)); + + // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if + // the program doesn't have one already). + RoseProgram eod_block; + makeRoleEagerEodReports(build, leftfix_info, prog_build.needs_catchup, v, + eod_block); + effects_block.add_block(move(eod_block)); + + /* a 'ghost role' may do nothing if we know that its groups are already set + * - in this case we can avoid producing a program at all. */ + if (effects_block.empty()) { + return {}; + } + + program.add_before_end(move(effects_block)); + return program; +} + +static +void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &prog) { + const auto &info = build.literal_info.at(lit_id); + if (!info.squash_group) { + return; + } + + DEBUG_PRINTF("squashes 0x%llx\n", info.group_mask); + assert(info.group_mask); + /* Note: group_mask is negated. */ + prog.add_before_end(make_unique(~info.group_mask)); +} + +RoseProgram assembleProgramBlocks(vector &&blocks) { + DEBUG_PRINTF("%zu blocks before dedupe\n", blocks.size()); + + sort(blocks.begin(), blocks.end(), + [](const RoseProgram &a, const RoseProgram &b) { + RoseProgramHash hasher; + return hasher(a) < hasher(b); + }); + + blocks.erase(unique(blocks.begin(), blocks.end(), RoseProgramEquivalence()), + blocks.end()); + + DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); + + RoseProgram prog; + for (auto &block : blocks) { + /* If we have multiple blocks from different literals and any of them + * squash groups, we will have to add a CLEAR_WORK_DONE instruction to + * each literal program block to clear the work_done flags so that it's + * only set if a state has been. */ + if (!prog.empty() && reads_work_done_flag(block)) { + RoseProgram clear_block; + clear_block.add_before_end(make_unique()); + prog.add_block(move(clear_block)); + } + + prog.add_block(move(block)); + } + + return prog; +} + +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, + const map &leftfix_info, + const map &suffixes, + const map &engine_info_by_queue, + lookaround_info &lookarounds, + unordered_map roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const vector &lit_edges, + bool is_anchored_replay_program) { + const auto &g = build.g; + + DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); + + // Construct initial program up front, as its early checks must be able + // to jump to end and terminate processing for this literal. + auto lit_program = makeLitInitialProgram(build, lookarounds, prog_build, + lit_id, lit_edges, + is_anchored_replay_program); + + RoseProgram role_programs; + + // Predecessor state id -> program block. + map pred_blocks; + + // Construct sparse iter sub-programs. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (build.isAnyStart(u)) { + continue; // Root roles are not handled with sparse iterator. + } + DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); + assert(contains(roleStateIndices, u)); + u32 pred_state = roleStateIndices.at(u); + auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, + engine_info_by_queue, lookarounds, + roleStateIndices, prog_build, e); + if (!role_prog.empty()) { + pred_blocks[pred_state].add_block(move(role_prog)); + } + } + + // Add blocks to deal with non-root edges (triggered by sparse iterator or + // mmbit_isset checks). + addPredBlocks(pred_blocks, roleStateIndices.size(), role_programs); + + // Add blocks to handle root roles. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (!build.isAnyStart(u)) { + continue; + } + DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); + auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, + engine_info_by_queue, lookarounds, + roleStateIndices, prog_build, e); + role_programs.add_block(move(role_prog)); + } + + if (lit_id == build.eod_event_literal_id) { + /* Note: does not require the lit intial program */ + assert(build.eod_event_literal_id != MO_INVALID_IDX); + return role_programs; + } + + /* Instructions to run even if a role program bails out */ + RoseProgram unconditional_block; + + // Literal may squash groups. + makeGroupSquashInstruction(build, lit_id, unconditional_block); + + role_programs.add_block(move(unconditional_block)); + lit_program.add_before_end(move(role_programs)); + + return lit_program; +} + +RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, + lookaround_info &lookarounds, + ProgramBuild &prog_build, + const vector &lit_ids) { + assert(!lit_ids.empty()); + assert(build.cc.streaming); + + vector blocks; + + for (const auto &lit_id : lit_ids) { + DEBUG_PRINTF("lit_id=%u\n", lit_id); + const auto &info = build.literal_info.at(lit_id); + if (info.delayed_ids.empty()) { + continue; // No delayed IDs, no work to do. + } + + RoseProgram prog; + if (!build.isDelayed(lit_id)) { + makeCheckLiteralInstruction(build.literals.right.at(lit_id), + prog_build.longLitLengthThreshold, prog, + build.cc); + } + + makeCheckLitMaskInstruction(build, lookarounds, lit_id, prog); + makePushDelayedInstructions(build.literals, prog_build, + build.literal_info.at(lit_id).delayed_ids, + prog); + blocks.push_back(move(prog)); + } + + return assembleProgramBlocks(move(blocks)); +} + +RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, const RoseEdge &e, + const bool multiple_preds) { + const RoseGraph &g = build.g; + const RoseVertex v = target(e, g); + + RoseProgram program; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + if (multiple_preds) { + // Only necessary when there is more than one pred. + makeRoleCheckNotHandled(prog_build, v, program); + } + + makeCatchup(build.rm, prog_build.needs_catchup, g[v].reports, program); + + const bool has_som = false; + RoseProgram report_block; + for (const auto &id : g[v].reports) { + makeReport(build, id, has_som, report_block); + } + program.add_before_end(move(report_block)); + + return program; +} + +static +void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup, + ReportID id, RoseProgram &program) { + if (!needs_mpv_catchup) { + return; + } + + const Report &report = rm.getReport(id); + if (report.type == INTERNAL_ROSE_CHAIN) { + return; + } + + program.add_before_end(make_unique()); +} + +RoseProgram makeReportProgram(const RoseBuildImpl &build, + bool needs_mpv_catchup, ReportID id) { + RoseProgram prog; + + makeCatchupMpv(build.rm, needs_mpv_catchup, id, prog); + + const bool has_som = false; + makeReport(build, id, has_som, prog); + + return prog; +} + +RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, + const set &reports) { + // Note: no CATCHUP instruction is necessary in the boundary case, as we + // should always be caught up (and may not even have the resources in + // scratch to support it). + + const bool has_som = false; + RoseProgram prog; + for (const auto &id : reports) { + makeReport(build, id, has_som, prog); + } + + return prog; +} + +static +void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, + RoseProgram &program) { + // Prepend an instruction to check the pred state is on. + const auto *end_inst = pred_block.end_instruction(); + pred_block.insert(begin(pred_block), + make_unique(pred_state, end_inst)); + program.add_block(move(pred_block)); +} + +static +void addPredBlocksAny(map &pred_blocks, u32 num_states, + RoseProgram &program) { + RoseProgram sparse_program; + + vector keys; + for (const u32 &key : pred_blocks | map_keys) { + keys.push_back(key); + } + + const RoseInstruction *end_inst = sparse_program.end_instruction(); + auto ri = make_unique(num_states, keys, end_inst); + sparse_program.add_before_end(move(ri)); + + RoseProgram &block = pred_blocks.begin()->second; + + /* we no longer need the check handled instruction as all the pred-role + * blocks are being collapsed together */ + stripCheckHandledInstruction(block); + + sparse_program.add_before_end(move(block)); + program.add_block(move(sparse_program)); +} + +static +void addPredBlocksMulti(map &pred_blocks, + u32 num_states, RoseProgram &program) { + assert(!pred_blocks.empty()); + + RoseProgram sparse_program; + const RoseInstruction *end_inst = sparse_program.end_instruction(); + vector> jump_table; + + // BEGIN instruction. + auto ri_begin = make_unique(num_states, end_inst); + RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); + sparse_program.add_before_end(move(ri_begin)); + + // NEXT instructions, one per pred program. + u32 prev_key = pred_blocks.begin()->first; + for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { + auto ri = make_unique(prev_key, begin_inst, + end_inst); + sparse_program.add_before_end(move(ri)); + prev_key = it->first; + } + + // Splice in each pred program after its BEGIN/NEXT. + auto out_it = begin(sparse_program); + for (auto &m : pred_blocks) { + u32 key = m.first; + RoseProgram &flat_prog = m.second; + assert(!flat_prog.empty()); + const size_t block_len = flat_prog.size() - 1; // without INSTR_END. + + assert(dynamic_cast(out_it->get()) || + dynamic_cast(out_it->get())); + out_it = sparse_program.insert(++out_it, move(flat_prog)); + + // Jump table target for this key is the beginning of the block we just + // spliced in. + jump_table.emplace_back(key, out_it->get()); + + assert(distance(begin(sparse_program), out_it) + block_len <= + sparse_program.size()); + advance(out_it, block_len); + } + + // Write the jump table back into the SPARSE_ITER_BEGIN instruction. + begin_inst->jump_table = move(jump_table); + + program.add_block(move(sparse_program)); +} + +void addPredBlocks(map &pred_blocks, u32 num_states, + RoseProgram &program) { + // Trim empty blocks, if any exist. + for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { + if (it->second.empty()) { + it = pred_blocks.erase(it); + } else { + ++it; + } + } + + const size_t num_preds = pred_blocks.size(); + if (num_preds == 0) { + return; + } + + if (num_preds == 1) { + const auto head = pred_blocks.begin(); + addPredBlockSingle(head->first, head->second, program); + return; + } + + // First, see if all our blocks are equivalent, in which case we can + // collapse them down into one. + const auto &blocks = pred_blocks | map_values; + if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { + return RoseProgramEquivalence()(*begin(blocks), block); + })) { + DEBUG_PRINTF("all blocks equiv\n"); + addPredBlocksAny(pred_blocks, num_states, program); + return; + } + + addPredBlocksMulti(pred_blocks, num_states, program); +} + +void applyFinalSpecialisation(RoseProgram &program) { + assert(!program.empty()); + assert(program.back().code() == ROSE_INSTR_END); + if (program.size() < 2) { + return; + } + + /* Replace the second-to-last instruction (before END) with a one-shot + * specialisation if available. */ + auto it = next(program.rbegin()); + if (auto *ri = dynamic_cast(it->get())) { + DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); + program.replace(it, make_unique( + ri->onmatch, ri->offset_adjust)); + } +} + +void recordLongLiterals(vector &longLiterals, + const RoseProgram &program) { + for (const auto &ri : program) { + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n", + escapeString(ri_check->literal).c_str()); + longLiterals.emplace_back(ri_check->literal, false); + continue; + } + if (const auto *ri_check = + dynamic_cast(ri.get())) { + DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n", + escapeString(ri_check->literal).c_str()); + longLiterals.emplace_back(ri_check->literal, true); + } + } +} + +void recordResources(RoseResources &resources, const RoseProgram &program) { + for (const auto &ri : program) { + switch (ri->code()) { + case ROSE_INSTR_TRIGGER_SUFFIX: + resources.has_suffixes = true; + break; + case ROSE_INSTR_TRIGGER_INFIX: + case ROSE_INSTR_CHECK_INFIX: + case ROSE_INSTR_CHECK_PREFIX: + case ROSE_INSTR_SOM_LEFTFIX: + resources.has_leftfixes = true; + break; + case ROSE_INSTR_SET_STATE: + case ROSE_INSTR_CHECK_STATE: + case ROSE_INSTR_SPARSE_ITER_BEGIN: + case ROSE_INSTR_SPARSE_ITER_NEXT: + resources.has_states = true; + break; + case ROSE_INSTR_CHECK_GROUPS: + resources.checks_groups = true; + break; + case ROSE_INSTR_PUSH_DELAYED: + resources.has_lit_delay = true; + break; + case ROSE_INSTR_CHECK_LONG_LIT: + case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: + resources.has_lit_check = true; + break; + default: + break; + } + } +} + } // namespace ue2 diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index c25aab61..d6a9e218 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -42,8 +42,10 @@ namespace ue2 { +struct LookEntry; class RoseEngineBlob; class RoseInstruction; +struct RoseResources; /** * \brief Container for a list of program instructions. @@ -145,11 +147,161 @@ public: bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; }; -/* Removes any CHECK_HANDLED instructions from the given program */ -void stripCheckHandledInstruction(RoseProgram &prog); +/** \brief Data only used during construction of various programs (literal, + * anchored, delay, etc). */ +struct ProgramBuild : noncopyable { + explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh, + bool catchup) + : floatingMinLiteralMatchOffset(fMinLitOffset), + longLitLengthThreshold(longLitThresh), needs_catchup(catchup) { + } -/** Returns true if the program may read the the interpreter's work_done flag */ -bool reads_work_done_flag(const RoseProgram &prog); + /** \brief Minimum offset of a match from the floating table. */ + const u32 floatingMinLiteralMatchOffset; + + /** \brief Long literal length threshold, used in streaming mode. */ + const size_t longLitLengthThreshold; + + /** \brief True if reports need CATCH_UP instructions to catch up suffixes, + * outfixes etc. */ + const bool needs_catchup; + + /** \brief Mapping from vertex to key, for vertices with a + * CHECK_NOT_HANDLED instruction. */ + ue2::unordered_map handledKeys; + + /** \brief Mapping from Rose literal ID to anchored program index. */ + std::map anchored_programs; + + /** \brief Mapping from Rose literal ID to delayed program index. */ + std::map delay_programs; + + /** \brief Mapping from every vertex to the groups that must be on for that + * vertex to be reached. */ + ue2::unordered_map vertex_group_map; + + /** \brief Global bitmap of groups that can be squashed. */ + rose_group squashable_groups = 0; +}; + +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); +void addSuffixesEodProgram(RoseProgram &program); +void addMatcherEodProgram(RoseProgram &program); + +static constexpr u32 INVALID_QUEUE = ~0U; + +struct left_build_info { + // Constructor for an engine implementation. + left_build_info(u32 q, u32 l, u32 t, rose_group sm, + const std::vector &stops, u32 max_ql, u8 cm_count, + const CharReach &cm_cr); + + // Constructor for a lookaround implementation. + explicit left_build_info(const std::vector> &looks); + + u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ + u32 lag = 0; + u32 transient = 0; + rose_group squash_mask = ~rose_group{0}; + std::vector stopAlphabet; + u32 max_queuelen = 0; + u8 countingMiracleCount = 0; + CharReach countingMiracleReach; + u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ + bool has_lookaround = false; + + // alternative implementation to the NFA + std::vector> lookaround; +}; + +struct lookaround_info : noncopyable { + /** \brief LookEntry list cache, so that we can reuse the look index and + * reach index for the same lookaround. */ + ue2::unordered_map>, + std::pair> cache; + + /** \brief Lookaround table for Rose roles. */ + std::vector>> table; + + /** \brief Lookaround look table size. */ + size_t lookTableSize = 0; + + /** \brief Lookaround reach table size. + * since single path lookaround and multi-path lookaround have different + * bitvectors range (32 and 256), we need to maintain both look table size + * and reach table size. */ + size_t reachTableSize = 0; +}; + +/** + * \brief Provides a brief summary of properties of an NFA that has already been + * finalised and stored in the blob. + */ +struct engine_info { + engine_info(const NFA *nfa, bool trans); + + enum NFAEngineType type; + bool accepts_eod; + u32 stream_size; + u32 scratch_size; + u32 scratch_align; + bool transient; +}; + +/** + * \brief Consumes list of program blocks corresponding to different literals, + * checks them for duplicates and then concatenates them into one program. + * + * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is + * inserted to prevent the work_done flag being contaminated by early blocks. + */ +RoseProgram assembleProgramBlocks(std::vector &&blocks); + +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, + const std::map &leftfix_info, + const std::map &suffixes, + const std::map &engine_info_by_queue, + lookaround_info &lookarounds, + unordered_map roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const std::vector &lit_edges, + bool is_anchored_replay_program); + +RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, + lookaround_info &lookarounds, + ProgramBuild &prog_build, + const std::vector &lit_ids); + +RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, const RoseEdge &e, + const bool multiple_preds); + +RoseProgram makeReportProgram(const RoseBuildImpl &build, + bool needs_mpv_catchup, ReportID id); + +RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, + const std::set &reports); + +struct TriggerInfo { + TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {} + bool cancel; + u32 queue; + u32 event; + + bool operator==(const TriggerInfo &b) const { + return cancel == b.cancel && queue == b.queue && event == b.event; + } +}; + +void addPredBlocks(std::map &pred_blocks, u32 num_states, + RoseProgram &program); + +void applyFinalSpecialisation(RoseProgram &program); + +void recordLongLiterals(std::vector &longLiterals, + const RoseProgram &program); + +void recordResources(RoseResources &resources, const RoseProgram &program); } // namespace ue2 diff --git a/src/rose/rose_build_resources.h b/src/rose/rose_build_resources.h new file mode 100644 index 00000000..3edb81b9 --- /dev/null +++ b/src/rose/rose_build_resources.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_RESOURCES_H +#define ROSE_BUILD_RESOURCES_H + +namespace ue2 { + +/** + * \brief Structure tracking which resources are used by this Rose instance at + * runtime. + * + * We use this to control how much initialisation we need to do at the + * beginning of a stream/block at runtime. + */ +struct RoseResources { + bool has_outfixes = false; + bool has_suffixes = false; + bool has_leftfixes = false; + bool has_literals = false; + bool has_states = false; + bool checks_groups = false; + bool has_lit_delay = false; + bool has_lit_check = false; // long literal support + bool has_anchored = false; + bool has_floating = false; + bool has_eod = false; +}; + +} + +#endif