diff --git a/CMakeLists.txt b/CMakeLists.txt index bdb60b74..0a236845 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -877,6 +877,7 @@ SET (hs_SRCS src/rose/rose_build_compile.cpp src/rose/rose_build_convert.cpp src/rose/rose_build_convert.h + src/rose/rose_build_engine_blob.h src/rose/rose_build_exclusive.cpp src/rose/rose_build_exclusive.h src/rose/rose_build_groups.cpp @@ -891,6 +892,8 @@ SET (hs_SRCS src/rose/rose_build_merge.cpp src/rose/rose_build_merge.h src/rose/rose_build_misc.cpp + src/rose/rose_build_program.cpp + src/rose/rose_build_program.h src/rose/rose_build_role_aliasing.cpp src/rose/rose_build_scatter.cpp src/rose/rose_build_scatter.h diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index b57aebe9..100d9140 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -1600,6 +1600,28 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_ANY) { + DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + const u8 *roles = getRoleState(scratch->core_info.state); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(roles, t->rolesWithStateCount, + &idx, it, si_state); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + DEBUG_PRINTF("state %u (idx=%u) is on\n", i, idx); + fatbit_clear(scratch->handled_roles); + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { if (roseEnginesEod(t, scratch, end, ri->iter_offset) == HWLM_TERMINATE_MATCHING) { diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index ed9b5bbb..68812b44 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -33,11 +33,13 @@ #include "hs_compile.h" // for HS_MODE_* #include "rose_build_add_internal.h" #include "rose_build_anchored.h" +#include "rose_build_engine_blob.h" #include "rose_build_exclusive.h" #include "rose_build_groups.h" #include "rose_build_infix.h" #include "rose_build_lookaround.h" #include "rose_build_matchers.h" +#include "rose_build_program.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" @@ -147,218 +149,6 @@ struct left_build_info { vector lookaround; // alternative implementation to the NFA }; -/** - * \brief Possible jump targets for roles that perform checks. - * - * Fixed up into offsets before the program is written to bytecode. - */ -enum class JumpTarget { - NO_JUMP, //!< Instruction does not jump. - PROGRAM_END, //!< Jump to end of program. - NEXT_BLOCK, //!< Jump to start of next block (sparse iter check, etc). - FIXUP_DONE, //!< Target fixup already applied. -}; - -/** \brief Role instruction model used at compile time. */ -class RoseInstruction { -public: - RoseInstruction(enum RoseInstructionCode c, JumpTarget j) : target(j) { - memset(&u, 0, sizeof(u)); - u.end.code = c; - } - - explicit RoseInstruction(enum RoseInstructionCode c) - : RoseInstruction(c, JumpTarget::NO_JUMP) {} - - bool operator<(const RoseInstruction &a) const { - if (code() != a.code()) { - return code() < a.code(); - } - if (target != a.target) { - return target < a.target; - } - return memcmp(&u, &a.u, sizeof(u)) < 0; - } - - bool operator==(const RoseInstruction &a) const { - return code() == a.code() && target == a.target && - memcmp(&u, &a.u, sizeof(u)) == 0; - } - - enum RoseInstructionCode code() const { - // Note that this sort of type-punning (relying on identical initial - // layout) is explicitly allowed by the C++11 standard. - return (enum RoseInstructionCode)u.end.code; - } - - const void *get() const { - switch (code()) { - case ROSE_INSTR_CHECK_LIT_EARLY: return &u.checkLitEarly; - case ROSE_INSTR_CHECK_GROUPS: return &u.checkGroups; - case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; - case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; - case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; - case ROSE_INSTR_CHECK_LOOKAROUND: return &u.checkLookaround; - case ROSE_INSTR_CHECK_MASK: return &u.checkMask; - case ROSE_INSTR_CHECK_MASK_32: return &u.checkMask32; - case ROSE_INSTR_CHECK_BYTE: return &u.checkByte; - case ROSE_INSTR_CHECK_INFIX: return &u.checkInfix; - case ROSE_INSTR_CHECK_PREFIX: return &u.checkPrefix; - case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; - case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; - case ROSE_INSTR_RECORD_ANCHORED: return &u.recordAnchored; - case ROSE_INSTR_CATCH_UP: return &u.catchUp; - case ROSE_INSTR_CATCH_UP_MPV: return &u.catchUpMpv; - case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; - case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; - case ROSE_INSTR_SOM_FROM_REPORT: return &u.somFromReport; - case ROSE_INSTR_SOM_ZERO: return &u.somZero; - case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; - case ROSE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; - case ROSE_INSTR_DEDUPE: return &u.dedupe; - case ROSE_INSTR_DEDUPE_SOM: return &u.dedupeSom; - case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain; - case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; - case ROSE_INSTR_REPORT_SOM_AWARE: return &u.reportSomAware; - case ROSE_INSTR_REPORT: return &u.report; - case ROSE_INSTR_REPORT_EXHAUST: return &u.reportExhaust; - case ROSE_INSTR_REPORT_SOM: return &u.reportSom; - case ROSE_INSTR_REPORT_SOM_EXHAUST: return &u.reportSomExhaust; - case ROSE_INSTR_DEDUPE_AND_REPORT: return &u.dedupeAndReport; - case ROSE_INSTR_FINAL_REPORT: return &u.finalReport; - case ROSE_INSTR_CHECK_EXHAUSTED: return &u.checkExhausted; - case ROSE_INSTR_CHECK_MIN_LENGTH: return &u.checkMinLength; - case ROSE_INSTR_SET_STATE: return &u.setState; - case ROSE_INSTR_SET_GROUPS: return &u.setGroups; - case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups; - case ROSE_INSTR_CHECK_STATE: return &u.checkState; - case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin; - case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; - case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod; - case ROSE_INSTR_SUFFIXES_EOD: return &u.suffixesEod; - case ROSE_INSTR_MATCHER_EOD: return &u.matcherEod; - case ROSE_INSTR_END: return &u.end; - } - assert(0); - return &u.end; - } - - size_t length() const { - switch (code()) { - case ROSE_INSTR_CHECK_LIT_EARLY: return sizeof(u.checkLitEarly); - case ROSE_INSTR_CHECK_GROUPS: return sizeof(u.checkGroups); - case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); - case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); - case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); - case ROSE_INSTR_CHECK_LOOKAROUND: return sizeof(u.checkLookaround); - case ROSE_INSTR_CHECK_MASK: return sizeof(u.checkMask); - case ROSE_INSTR_CHECK_MASK_32: return sizeof(u.checkMask32); - case ROSE_INSTR_CHECK_BYTE: return sizeof(u.checkByte); - case ROSE_INSTR_CHECK_INFIX: return sizeof(u.checkInfix); - case ROSE_INSTR_CHECK_PREFIX: return sizeof(u.checkPrefix); - case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); - case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); - case ROSE_INSTR_RECORD_ANCHORED: return sizeof(u.recordAnchored); - case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp); - case ROSE_INSTR_CATCH_UP_MPV: return sizeof(u.catchUpMpv); - case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); - case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); - case ROSE_INSTR_SOM_FROM_REPORT: return sizeof(u.somFromReport); - case ROSE_INSTR_SOM_ZERO: return sizeof(u.somZero); - case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); - case ROSE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); - case ROSE_INSTR_DEDUPE: return sizeof(u.dedupe); - case ROSE_INSTR_DEDUPE_SOM: return sizeof(u.dedupeSom); - case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); - case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); - case ROSE_INSTR_REPORT_SOM_AWARE: return sizeof(u.reportSomAware); - case ROSE_INSTR_REPORT: return sizeof(u.report); - case ROSE_INSTR_REPORT_EXHAUST: return sizeof(u.reportExhaust); - case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); - case ROSE_INSTR_REPORT_SOM_EXHAUST: return sizeof(u.reportSomExhaust); - case ROSE_INSTR_DEDUPE_AND_REPORT: return sizeof(u.dedupeAndReport); - case ROSE_INSTR_FINAL_REPORT: return sizeof(u.finalReport); - case ROSE_INSTR_CHECK_EXHAUSTED: return sizeof(u.checkExhausted); - case ROSE_INSTR_CHECK_MIN_LENGTH: return sizeof(u.checkMinLength); - case ROSE_INSTR_SET_STATE: return sizeof(u.setState); - case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); - case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups); - case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState); - case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin); - case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); - case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod); - case ROSE_INSTR_SUFFIXES_EOD: return sizeof(u.suffixesEod); - case ROSE_INSTR_MATCHER_EOD: return sizeof(u.matcherEod); - case ROSE_INSTR_END: return sizeof(u.end); - } - assert(0); - return 0; - } - - union { - ROSE_STRUCT_CHECK_LIT_EARLY checkLitEarly; - ROSE_STRUCT_CHECK_GROUPS checkGroups; - ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; - ROSE_STRUCT_CHECK_BOUNDS checkBounds; - ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; - ROSE_STRUCT_CHECK_LOOKAROUND checkLookaround; - ROSE_STRUCT_CHECK_MASK checkMask; - ROSE_STRUCT_CHECK_MASK_32 checkMask32; - ROSE_STRUCT_CHECK_BYTE checkByte; - ROSE_STRUCT_CHECK_INFIX checkInfix; - ROSE_STRUCT_CHECK_PREFIX checkPrefix; - ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; - ROSE_STRUCT_PUSH_DELAYED pushDelayed; - ROSE_STRUCT_RECORD_ANCHORED recordAnchored; - ROSE_STRUCT_CATCH_UP catchUp; - ROSE_STRUCT_CATCH_UP_MPV catchUpMpv; - ROSE_STRUCT_SOM_ADJUST somAdjust; - ROSE_STRUCT_SOM_LEFTFIX somLeftfix; - ROSE_STRUCT_SOM_FROM_REPORT somFromReport; - ROSE_STRUCT_SOM_ZERO somZero; - ROSE_STRUCT_TRIGGER_INFIX triggerInfix; - ROSE_STRUCT_TRIGGER_SUFFIX triggerSuffix; - ROSE_STRUCT_DEDUPE dedupe; - ROSE_STRUCT_DEDUPE_SOM dedupeSom; - ROSE_STRUCT_REPORT_CHAIN reportChain; - ROSE_STRUCT_REPORT_SOM_INT reportSomInt; - ROSE_STRUCT_REPORT_SOM_AWARE reportSomAware; - ROSE_STRUCT_REPORT report; - ROSE_STRUCT_REPORT_EXHAUST reportExhaust; - ROSE_STRUCT_REPORT_SOM reportSom; - ROSE_STRUCT_REPORT_SOM_EXHAUST reportSomExhaust; - ROSE_STRUCT_DEDUPE_AND_REPORT dedupeAndReport; - ROSE_STRUCT_FINAL_REPORT finalReport; - ROSE_STRUCT_CHECK_EXHAUSTED checkExhausted; - ROSE_STRUCT_CHECK_MIN_LENGTH checkMinLength; - ROSE_STRUCT_SET_STATE setState; - ROSE_STRUCT_SET_GROUPS setGroups; - ROSE_STRUCT_SQUASH_GROUPS squashGroups; - ROSE_STRUCT_CHECK_STATE checkState; - ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin; - ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; - ROSE_STRUCT_ENGINES_EOD enginesEod; - ROSE_STRUCT_SUFFIXES_EOD suffixesEod; - ROSE_STRUCT_MATCHER_EOD matcherEod; - ROSE_STRUCT_END end; - } u; - - JumpTarget target; -}; - -static -size_t hash_value(const RoseInstruction &ri) { - size_t val = 0; - boost::hash_combine(val, ri.code()); - boost::hash_combine(val, ri.target); - const char *bytes = (const char *)ri.get(); - const size_t len = ri.length(); - for (size_t i = 0; i < len; i++) { - boost::hash_combine(val, bytes[i]); - } - return val; -} - /** * \brief Structure tracking which resources are used by this Rose instance at * runtime. @@ -402,7 +192,8 @@ struct build_context : boost::noncopyable { /** \brief Simple cache of programs written to engine blob, used for * deduplication. */ - ue2::unordered_map, u32> program_cache; + ue2::unordered_map program_cache; /** \brief LookEntry list cache, so that we don't have to go scanning * through the full list to find cases we've used already. */ @@ -427,7 +218,7 @@ struct build_context : boost::noncopyable { /** \brief Contents of the Rose bytecode immediately following the * RoseEngine. */ - vector> engine_blob; + RoseEngineBlob engine_blob; /** \brief True if reports need CATCH_UP instructions, to catch up anchored * matches, suffixes, outfixes etc. */ @@ -445,81 +236,17 @@ struct build_context : boost::noncopyable { /** \brief Global bitmap of groups that can be squashed. */ rose_group squashable_groups = 0; - - /** \brief Base offset of engine_blob in the Rose engine bytecode. */ - static constexpr u32 engine_blob_base = ROUNDUP_CL(sizeof(RoseEngine)); }; } -static -void pad_engine_blob(build_context &bc, size_t align) { - assert(ISALIGNED_N(bc.engine_blob_base, align)); - size_t s = bc.engine_blob.size(); - - if (ISALIGNED_N(s, align)) { - return; - } - - bc.engine_blob.resize(s + align - s % align); -} - -static -u32 add_to_engine_blob(build_context &bc, const void *a, const size_t len, - const size_t align) { - pad_engine_blob(bc, align); - - size_t rv = bc.engine_blob_base + bc.engine_blob.size(); - assert(rv >= bc.engine_blob_base); - DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); - - assert(ISALIGNED_N(bc.engine_blob.size(), align)); - - bc.engine_blob.resize(bc.engine_blob.size() + len); - memcpy(&bc.engine_blob.back() - len + 1, a, len); - - return verify_u32(rv); -} - -template -static -u32 add_to_engine_blob(build_context &bc, const T &a) { - static_assert(is_pod::value, "should be pod"); - return add_to_engine_blob(bc, &a, sizeof(a), alignof(T)); -} - -template -static -u32 add_to_engine_blob(build_context &bc, const T &a, const size_t len) { - static_assert(is_pod::value, "should be pod"); - return add_to_engine_blob(bc, &a, len, alignof(T)); -} - -template -static -u32 add_to_engine_blob(build_context &bc, Iter b, const Iter &e) { - using value_type = typename std::iterator_traits::value_type; - static_assert(is_pod::value, "should be pod"); - - if (b == e) { - return 0; - } - - u32 offset = add_to_engine_blob(bc, *b); - for (++b; b != e; ++b) { - add_to_engine_blob(bc, *b); - } - - return offset; -} - static const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { assert(contains(bc.engineOffsets, qi)); u32 nfa_offset = bc.engineOffsets.at(qi); - assert(nfa_offset >= bc.engine_blob_base); + assert(nfa_offset >= bc.engine_blob.base_offset); const NFA *n = (const NFA *)(bc.engine_blob.data() + nfa_offset - - bc.engine_blob_base); + bc.engine_blob.base_offset); assert(n->queueIndex == qi); return n; } @@ -527,7 +254,7 @@ const NFA *get_nfa_from_blob(const build_context &bc, u32 qi) { static const NFA *add_nfa_to_blob(build_context &bc, NFA &nfa) { u32 qi = nfa.queueIndex; - u32 nfa_offset = add_to_engine_blob(bc, nfa, nfa.length); + u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length); DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, nfa.type, nfa.length, nfa_offset); @@ -2282,7 +2009,7 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) { rcm.count = lbi.countingMiracleCount; - lbi.countingMiracleOffset = add_to_engine_blob(bc, rcm); + lbi.countingMiracleOffset = bc.engine_blob.add(rcm); pre_built[key] = lbi.countingMiracleOffset; DEBUG_PRINTF("built cm for count of %u @ %u\n", rcm.count, lbi.countingMiracleOffset); @@ -2462,7 +2189,7 @@ u32 addIteratorToTable(build_context &bc, return offset; } - u32 offset = add_to_engine_blob(bc, iter.begin(), iter.end()); + u32 offset = bc.engine_blob.add(iter.begin(), iter.end()); bc.iterCache.insert(make_pair(iter, offset)); @@ -2577,7 +2304,7 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc, for (auto &e : qi_to_ekeys) { assert(!e.second.empty()); e.second.push_back(INVALID_EKEY); /* terminator */ - (*out)[e.first] = add_to_engine_blob(bc, e.second.begin(), + (*out)[e.first] = bc.engine_blob.add(e.second.begin(), e.second.end()); } } @@ -2765,132 +2492,8 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { return out; } -/** - * \brief Flattens a list of role programs into one finalised program with its - * fail_jump/done_jump targets set correctly. - */ static -vector -flattenProgram(const vector> &programs) { - vector out; - - vector offsets; // offset of each instruction (bytes) - vector blocks; // track which block we're in - vector block_offsets; // start offsets for each block - - DEBUG_PRINTF("%zu program blocks\n", programs.size()); - - size_t curr_offset = 0; - for (const auto &program : programs) { - DEBUG_PRINTF("block with %zu instructions\n", program.size()); - block_offsets.push_back(curr_offset); - for (const auto &ri : program) { - assert(ri.code() != ROSE_INSTR_END); - out.push_back(ri); - offsets.push_back(curr_offset); - blocks.push_back(block_offsets.size() - 1); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } - } - - // Add a final END instruction, which is its own block. - out.emplace_back(ROSE_INSTR_END); - block_offsets.push_back(curr_offset); - offsets.push_back(curr_offset); - - assert(offsets.size() == out.size()); - - for (size_t i = 0; i < out.size(); i++) { - auto &ri = out[i]; - - u32 jump_target = 0; - switch (ri.target) { - case JumpTarget::NO_JUMP: - case JumpTarget::FIXUP_DONE: - continue; // Next instruction. - case JumpTarget::PROGRAM_END: - assert(i != out.size() - 1); - jump_target = offsets.back(); - break; - case JumpTarget::NEXT_BLOCK: - assert(blocks[i] + 1 < block_offsets.size()); - jump_target = block_offsets[blocks[i] + 1]; - break; - } - - // We currently always make progress and never jump backwards. - assert(jump_target > offsets[i]); - assert(jump_target <= offsets.back()); - u32 jump_val = jump_target - offsets[i]; - - switch (ri.code()) { - case ROSE_INSTR_ANCHORED_DELAY: - ri.u.anchoredDelay.done_jump = jump_val; - break; - case ROSE_INSTR_CHECK_ONLY_EOD: - ri.u.checkOnlyEod.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_BOUNDS: - ri.u.checkBounds.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_NOT_HANDLED: - ri.u.checkNotHandled.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_LOOKAROUND: - ri.u.checkLookaround.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_MASK: - ri.u.checkMask.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_MASK_32: - ri.u.checkMask32.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_BYTE: - ri.u.checkByte.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_INFIX: - ri.u.checkInfix.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_PREFIX: - ri.u.checkPrefix.fail_jump = jump_val; - break; - case ROSE_INSTR_DEDUPE: - ri.u.dedupe.fail_jump = jump_val; - break; - case ROSE_INSTR_DEDUPE_SOM: - ri.u.dedupeSom.fail_jump = jump_val; - break; - case ROSE_INSTR_DEDUPE_AND_REPORT: - ri.u.dedupeAndReport.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_EXHAUSTED: - ri.u.checkExhausted.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_MIN_LENGTH: - ri.u.checkMinLength.fail_jump = jump_val; - break; - case ROSE_INSTR_CHECK_STATE: - ri.u.checkState.fail_jump = jump_val; - break; - case ROSE_INSTR_SPARSE_ITER_BEGIN: - ri.u.sparseIterBegin.fail_jump = jump_val; - break; - case ROSE_INSTR_SPARSE_ITER_NEXT: - ri.u.sparseIterNext.fail_jump = jump_val; - break; - default: - assert(0); // Unhandled opcode? - break; - } - - ri.target = JumpTarget::FIXUP_DONE; - } - - return out; -} - -static -void applyFinalSpecialisation(vector &program) { +void applyFinalSpecialisation(RoseProgram &program) { assert(!program.empty()); assert(program.back().code() == ROSE_INSTR_END); if (program.size() < 2) { @@ -2899,26 +2502,18 @@ void applyFinalSpecialisation(vector &program) { /* Replace the second-to-last instruction (before END) with a one-shot * specialisation if available. */ - auto &ri = *(next(program.rbegin())); - switch (ri.code()) { - case ROSE_INSTR_REPORT: { + auto it = next(program.rbegin()); + if (auto *ri = dynamic_cast(it->get())) { DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); - auto ri2 = RoseInstruction(ROSE_INSTR_FINAL_REPORT); - ri2.u.finalReport.onmatch = ri.u.report.onmatch; - ri2.u.finalReport.offset_adjust = ri.u.report.offset_adjust; - ri = ri2; - break; - } - default: - break; + program.replace(it, make_unique( + ri->onmatch, ri->offset_adjust)); } } static -void recordResources(RoseResources &resources, - const vector &program) { +void recordResources(RoseResources &resources, const RoseProgram &program) { for (const auto &ri : program) { - switch (ri.code()) { + switch (ri->code()) { case ROSE_INSTR_TRIGGER_SUFFIX: resources.has_suffixes = true; break; @@ -2973,22 +2568,12 @@ void recordResources(RoseResources &resources, } static -u32 writeProgram(build_context &bc, const vector &program) { +u32 writeProgram(build_context &bc, RoseProgram &&program) { if (program.empty()) { DEBUG_PRINTF("no program\n"); return 0; } - assert(program.back().code() == ROSE_INSTR_END); - assert(program.size() >= 1); - - // This program must have been flattened; i.e. all check instructions must - // have their jump offsets set. - assert(all_of(begin(program), end(program), [](const RoseInstruction &ri) { - return ri.target == JumpTarget::NO_JUMP || - ri.target == JumpTarget::FIXUP_DONE; - })); - auto it = bc.program_cache.find(program); if (it != end(bc.program_cache)) { DEBUG_PRINTF("reusing cached program at %u\n", it->second); @@ -2997,20 +2582,13 @@ u32 writeProgram(build_context &bc, const vector &program) { recordResources(bc.resources, program); - DEBUG_PRINTF("writing %zu instructions\n", program.size()); - u32 programOffset = 0; - for (const auto &ri : program) { - u32 offset = - add_to_engine_blob(bc, ri.get(), ri.length(), ROSE_INSTR_MIN_ALIGN); - DEBUG_PRINTF("code %u len %zu written at offset %u\n", ri.code(), - ri.length(), offset); - if (!programOffset) { - programOffset = offset; - } - } - DEBUG_PRINTF("program begins at offset %u\n", programOffset); - bc.program_cache.emplace(program, programOffset); - return programOffset; + u32 len = 0; + auto prog_bytecode = writeProgram(bc.engine_blob, program, &len); + u32 offset = bc.engine_blob.add(prog_bytecode.get(), len, + ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("prog len %u written at offset %u\n", len, offset); + bc.program_cache.emplace(move(program), offset); + return offset; } static @@ -3228,8 +2806,7 @@ bool checkReachWithFlip(const CharReach &cr, u8 &andmask, } static -bool makeRoleByte(const vector &look, - vector &program) { +bool makeRoleByte(const vector &look, RoseProgram &program) { if (look.size() == 1) { const auto &entry = look[0]; u8 andmask_u8, cmpmask_u8; @@ -3239,21 +2816,17 @@ bool makeRoleByte(const vector &look, } s32 checkbyte_offset = verify_s32(entry.offset); DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BYTE, - JumpTarget::NEXT_BLOCK); - ri.u.checkByte.and_mask = andmask_u8; - ri.u.checkByte.cmp_mask = cmpmask_u8; - ri.u.checkByte.negation = flip; - ri.u.checkByte.offset = checkbyte_offset; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(andmask_u8, cmpmask_u8, flip, + checkbyte_offset, end_inst); + program.add_before_end(move(ri)); return true; } return false; } static -bool makeRoleMask(const vector &look, - vector &program) { +bool makeRoleMask(const vector &look, RoseProgram &program) { if (look.back().offset < look.front().offset + 8) { s32 base_offset = verify_s32(look.front().offset); u64a and_mask = 0; @@ -3275,13 +2848,10 @@ bool makeRoleMask(const vector &look, } DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", and_mask, cmp_mask); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK, - JumpTarget::NEXT_BLOCK); - ri.u.checkMask.and_mask = and_mask; - ri.u.checkMask.cmp_mask = cmp_mask; - ri.u.checkMask.neg_mask = neg_mask; - ri.u.checkMask.offset = base_offset; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); return true; } return false; @@ -3302,14 +2872,14 @@ string convertMaskstoString(u8 *p, int byte_len) { static bool makeRoleMask32(const vector &look, - vector &program) { + RoseProgram &program) { if (look.back().offset >= look.front().offset + 32) { return false; } s32 base_offset = verify_s32(look.front().offset); - u8 and_mask[32], cmp_mask[32]; - memset(and_mask, 0, sizeof(and_mask)); - memset(cmp_mask, 0, sizeof(cmp_mask)); + array and_mask, cmp_mask; + and_mask.fill(0); + cmp_mask.fill(0); u32 neg_mask = 0; for (const auto &entry : look) { u8 andmask_u8, cmpmask_u8, flip; @@ -3326,18 +2896,17 @@ bool makeRoleMask32(const vector &look, } } - DEBUG_PRINTF("and_mask %s\n", convertMaskstoString(and_mask, 32).c_str()); - DEBUG_PRINTF("cmp_mask %s\n", convertMaskstoString(cmp_mask, 32).c_str()); + DEBUG_PRINTF("and_mask %s\n", + convertMaskstoString(and_mask.data(), 32).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", + convertMaskstoString(cmp_mask.data(), 32).c_str()); DEBUG_PRINTF("neg_mask %08x\n", neg_mask); DEBUG_PRINTF("base_offset %d\n", base_offset); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_MASK_32, - JumpTarget::NEXT_BLOCK); - memcpy(ri.u.checkMask32.and_mask, and_mask, sizeof(and_mask)); - memcpy(ri.u.checkMask32.cmp_mask, cmp_mask, sizeof(cmp_mask)); - ri.u.checkMask32.neg_mask = neg_mask; - ri.u.checkMask32.offset = base_offset; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); return true; } @@ -3347,7 +2916,7 @@ bool makeRoleMask32(const vector &look, */ static void makeLookaroundInstruction(build_context &bc, const vector &look, - vector &program) { + RoseProgram &program) { assert(!look.empty()); if (makeRoleByte(look, program)) { @@ -3365,16 +2934,14 @@ void makeLookaroundInstruction(build_context &bc, const vector &look, u32 look_idx = addLookaround(bc, look); u32 look_count = verify_u32(look.size()); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND, - JumpTarget::NEXT_BLOCK); - ri.u.checkLookaround.index = look_idx; - ri.u.checkLookaround.count = look_count; - program.push_back(ri); + auto ri = make_unique(look_idx, look_count, + program.end_instruction()); + program.add_before_end(move(ri)); } static void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { if (!build.cc.grey.roseLookaroundMasks) { return; } @@ -3402,7 +2969,7 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, static void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { auto it = bc.leftfix_info.find(v); if (it == end(bc.leftfix_info)) { return; @@ -3416,26 +2983,24 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); bool is_prefix = build.isRootSuccessor(v); + const auto *end_inst = program.end_instruction(); + + unique_ptr ri; if (is_prefix) { - auto ri = - RoseInstruction(ROSE_INSTR_CHECK_PREFIX, JumpTarget::NEXT_BLOCK); - ri.u.checkPrefix.queue = lni.queue; - ri.u.checkPrefix.lag = build.g[v].left.lag; - ri.u.checkPrefix.report = build.g[v].left.leftfix_report; - program.push_back(move(ri)); + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); } else { - auto ri = - RoseInstruction(ROSE_INSTR_CHECK_INFIX, JumpTarget::NEXT_BLOCK); - ri.u.checkInfix.queue = lni.queue; - ri.u.checkInfix.lag = build.g[v].left.lag; - ri.u.checkInfix.report = build.g[v].left.leftfix_report; - program.push_back(move(ri)); + ri = make_unique(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); } + program.add_before_end(move(ri)); } static void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, - RoseVertex v, vector &program) { + RoseVertex v, RoseProgram &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { return; @@ -3447,36 +3012,34 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, return; } - auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY, - JumpTarget::NEXT_BLOCK); - ri.u.anchoredDelay.groups = build.g[v].groups; - program.push_back(ri); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(build.g[v].groups, end_inst); + program.add_before_end(move(ri)); } static void makeDedupe(const RoseBuildImpl &build, const Report &report, - vector &report_block) { - auto ri = RoseInstruction(ROSE_INSTR_DEDUPE, JumpTarget::NEXT_BLOCK); - ri.u.dedupe.quash_som = report.quashSom; - ri.u.dedupe.dkey = build.rm.getDkey(report); - ri.u.dedupe.offset_adjust = report.offsetAdjust; - report_block.push_back(move(ri)); + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = + make_unique(report.quashSom, build.rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); } static void makeDedupeSom(const RoseBuildImpl &build, const Report &report, - vector &report_block) { - auto ri = RoseInstruction(ROSE_INSTR_DEDUPE_SOM, JumpTarget::NEXT_BLOCK); - ri.u.dedupeSom.quash_som = report.quashSom; - ri.u.dedupeSom.dkey = build.rm.getDkey(report); - ri.u.dedupeSom.offset_adjust = report.offsetAdjust; - report_block.push_back(move(ri)); + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(report.quashSom, + build.rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); } static void makeCatchup(RoseBuildImpl &build, build_context &bc, - const flat_set &reports, - vector &program) { + const flat_set &reports, RoseProgram &program) { if (!bc.needs_catchup) { return; } @@ -3494,12 +3057,12 @@ void makeCatchup(RoseBuildImpl &build, build_context &bc, return; } - program.emplace_back(ROSE_INSTR_CATCH_UP); + program.add_before_end(make_unique()); } static void makeCatchupMpv(RoseBuildImpl &build, build_context &bc, ReportID id, - vector &program) { + RoseProgram &program) { if (!bc.needs_mpv_catchup) { return; } @@ -3509,13 +3072,15 @@ void makeCatchupMpv(RoseBuildImpl &build, build_context &bc, ReportID id, return; } - program.emplace_back(ROSE_INSTR_CATCH_UP_MPV); + program.add_before_end(make_unique()); } static void writeSomOperation(const Report &report, som_operation *op) { assert(op); + memset(op, 0, sizeof(*op)); + switch (report.type) { case EXTERNAL_CALLBACK_SOM_REL: op->type = SOM_EXTERNAL_CALLBACK_REL; @@ -3585,51 +3150,46 @@ void writeSomOperation(const Report &report, som_operation *op) { static void makeReport(RoseBuildImpl &build, const ReportID id, - const bool has_som, vector &program) { + const bool has_som, RoseProgram &program) { assert(id < build.rm.numReports()); const Report &report = build.rm.getReport(id); - vector report_block; + RoseProgram report_block; + const RoseInstruction *end_inst = report_block.end_instruction(); // Handle min/max offset checks. if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, - JumpTarget::NEXT_BLOCK); - ri.u.checkBounds.min_bound = report.minOffset; - ri.u.checkBounds.max_bound = report.maxOffset; - report_block.push_back(move(ri)); + auto ri = make_unique(report.minOffset, + report.maxOffset, end_inst); + report_block.add_before_end(move(ri)); } // If this report has an exhaustion key, we can check it in the program // rather than waiting until we're in the callback adaptor. if (report.ekey != INVALID_EKEY) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_EXHAUSTED, - JumpTarget::NEXT_BLOCK); - ri.u.checkExhausted.ekey = report.ekey; - report_block.push_back(move(ri)); + auto ri = make_unique(report.ekey, end_inst); + report_block.add_before_end(move(ri)); } // External SOM reports that aren't passthrough need their SOM value // calculated. if (isExternalSomReport(report) && report.type != EXTERNAL_CALLBACK_SOM_PASS) { - auto ri = RoseInstruction(ROSE_INSTR_SOM_FROM_REPORT); - writeSomOperation(report, &ri.u.somFromReport.som); - report_block.push_back(move(ri)); + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); } // Min length constraint. if (report.minLength > 0) { assert(build.hasSom); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_MIN_LENGTH, - JumpTarget::NEXT_BLOCK); - ri.u.checkMinLength.end_adj = report.offsetAdjust; - ri.u.checkMinLength.min_length = report.minLength; - report_block.push_back(move(ri)); + auto ri = make_unique( + report.offsetAdjust, report.minLength, end_inst); + report_block.add_before_end(move(ri)); } if (report.quashSom) { - report_block.emplace_back(ROSE_INSTR_SOM_ZERO); + report_block.add_before_end(make_unique()); } switch (report.type) { @@ -3640,42 +3200,30 @@ void makeReport(RoseBuildImpl &build, const ReportID id, bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; if (report.ekey == INVALID_EKEY) { if (needs_dedupe) { - report_block.emplace_back(ROSE_INSTR_DEDUPE_AND_REPORT, - JumpTarget::NEXT_BLOCK); - auto &ri = report_block.back(); - ri.u.dedupeAndReport.quash_som = report.quashSom; - ri.u.dedupeAndReport.dkey = build.rm.getDkey(report); - ri.u.dedupeAndReport.onmatch = report.onmatch; - ri.u.dedupeAndReport.offset_adjust = report.offsetAdjust; + report_block.add_before_end( + make_unique( + report.quashSom, build.rm.getDkey(report), + report.onmatch, report.offsetAdjust, end_inst)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT); - auto &ri = report_block.back(); - ri.u.report.onmatch = report.onmatch; - ri.u.report.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } } else { if (needs_dedupe) { makeDedupe(build, report, report_block); } - report_block.emplace_back(ROSE_INSTR_REPORT_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportExhaust.onmatch = report.onmatch; - ri.u.reportExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportExhaust.ekey = report.ekey; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } } else { // has_som makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - auto &ri = report_block.back(); - ri.u.reportSom.onmatch = report.onmatch; - ri.u.reportSom.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportSomExhaust.onmatch = report.onmatch; - ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportSomExhaust.ekey = report.ekey; + report_block.add_before_end( + make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } } break; @@ -3691,20 +3239,18 @@ void makeReport(RoseBuildImpl &build, const ReportID id, case INTERNAL_SOM_LOC_SET_FROM: case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: if (has_som) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_AWARE); - auto &ri = report_block.back(); - writeSomOperation(report, &ri.u.reportSomAware.som); + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_INT); - auto &ri = report_block.back(); - writeSomOperation(report, &ri.u.reportSomInt.som); + auto ri = make_unique(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); } break; case INTERNAL_ROSE_CHAIN: { - report_block.emplace_back(ROSE_INSTR_REPORT_CHAIN); - auto &ri = report_block.back(); - ri.u.reportChain.event = report.onmatch; - ri.u.reportChain.top_squash_distance = report.topSquashDistance; + report_block.add_before_end(make_unique( + report.onmatch, report.topSquashDistance)); break; } case EXTERNAL_CALLBACK_SOM_REL: @@ -3713,31 +3259,21 @@ void makeReport(RoseBuildImpl &build, const ReportID id, case EXTERNAL_CALLBACK_SOM_REV_NFA: makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - auto &ri = report_block.back(); - ri.u.reportSom.onmatch = report.onmatch; - ri.u.reportSom.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportSomExhaust.onmatch = report.onmatch; - ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportSomExhaust.ekey = report.ekey; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } break; case EXTERNAL_CALLBACK_SOM_PASS: makeDedupeSom(build, report, report_block); if (report.ekey == INVALID_EKEY) { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM); - auto &ri = report_block.back(); - ri.u.reportSom.onmatch = report.onmatch; - ri.u.reportSom.offset_adjust = report.offsetAdjust; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust)); } else { - report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); - auto &ri = report_block.back(); - ri.u.reportSomExhaust.onmatch = report.onmatch; - ri.u.reportSomExhaust.offset_adjust = report.offsetAdjust; - ri.u.reportSomExhaust.ekey = report.ekey; + report_block.add_before_end(make_unique( + report.onmatch, report.offsetAdjust, report.ekey)); } break; @@ -3747,15 +3283,12 @@ void makeReport(RoseBuildImpl &build, const ReportID id, } assert(!report_block.empty()); - report_block = flattenProgram({report_block}); - assert(report_block.back().code() == ROSE_INSTR_END); - report_block.pop_back(); - insert(&program, program.end(), report_block); + program.add_block(move(report_block)); } static void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { const auto &g = build.g; /* we are a suffaig - need to update role to provide som to the @@ -3764,29 +3297,28 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, if (g[v].left.tracksSom()) { assert(contains(bc.leftfix_info, v)); const left_build_info &lni = bc.leftfix_info.at(v); - auto ri = RoseInstruction(ROSE_INSTR_SOM_LEFTFIX); - ri.u.somLeftfix.queue = lni.queue; - ri.u.somLeftfix.lag = g[v].left.lag; - program.push_back(ri); + program.add_before_end( + make_unique(lni.queue, g[v].left.lag)); has_som = true; } else if (g[v].som_adjust) { - auto ri = RoseInstruction(ROSE_INSTR_SOM_ADJUST); - ri.u.somAdjust.distance = g[v].som_adjust; - program.push_back(ri); + program.add_before_end( + make_unique(g[v].som_adjust)); has_som = true; } const auto &reports = g[v].reports; makeCatchup(build, bc, reports, program); + RoseProgram report_block; for (ReportID id : reports) { - makeReport(build, id, has_som, program); + makeReport(build, id, has_som, report_block); } + program.add_before_end(move(report_block)); } static void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { const auto &g = build.g; if (!g[v].suffix) { return; @@ -3815,15 +3347,13 @@ void makeRoleSuffix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); suffixEvent = MQE_TOP; } - auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_SUFFIX); - ri.u.triggerSuffix.queue = qi; - ri.u.triggerSuffix.event = suffixEvent; - program.push_back(ri); + program.add_before_end( + make_unique(qi, suffixEvent)); } static void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { const auto &g = build.g; rose_group groups = g[v].groups; if (!groups) { @@ -3854,17 +3384,15 @@ void makeRoleGroups(RoseBuildImpl &build, build_context &bc, RoseVertex v, return; } - auto ri = RoseInstruction(ROSE_INSTR_SET_GROUPS); - ri.u.setGroups.groups = groups; - program.push_back(ri); + program.add_before_end(make_unique(groups)); } static void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, - RoseVertex u, vector &program) { + RoseVertex u, RoseProgram &program) { const auto &g = build.g; - vector infix_program; + vector infix_program; for (const auto &e : out_edges_range(u, g)) { RoseVertex v = target(e, g); @@ -3896,11 +3424,7 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, assert(top < MQE_INVALID); } - auto ri = RoseInstruction(ROSE_INSTR_TRIGGER_INFIX); - ri.u.triggerInfix.queue = lbi.queue; - ri.u.triggerInfix.event = top; - ri.u.triggerInfix.cancel = g[e].rose_cancel_prev_top; - infix_program.push_back(ri); + infix_program.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); } if (infix_program.empty()) { @@ -3908,30 +3432,33 @@ void makeRoleInfixTriggers(RoseBuildImpl &build, build_context &bc, } // Order, de-dupe and add instructions to the end of program. - sort(begin(infix_program), end(infix_program)); - unique_copy(begin(infix_program), end(infix_program), - back_inserter(program)); + sort(begin(infix_program), end(infix_program), + [](const RoseInstrTriggerInfix &a, const RoseInstrTriggerInfix &b) { + return tie(a.cancel, a.queue, a.event) < + tie(b.cancel, b.queue, b.event); + }); + infix_program.erase(unique(begin(infix_program), end(infix_program)), + end(infix_program)); + for (const auto &ri : infix_program) { + program.add_before_end(make_unique(ri)); + } } static void makeRoleSetState(const build_context &bc, RoseVertex v, - vector &program) { + RoseProgram &program) { // We only need this instruction if a state index has been assigned to this // vertex. auto it = bc.roleStateIndices.find(v); if (it == end(bc.roleStateIndices)) { return; } - - u32 idx = it->second; - auto ri = RoseInstruction(ROSE_INSTR_SET_STATE); - ri.u.setState.index = idx; - program.push_back(ri); + program.add_before_end(make_unique(it->second)); } static void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, - const RoseEdge &e, vector &program) { + const RoseEdge &e, RoseProgram &program) { const RoseGraph &g = build.g; const RoseVertex u = source(e, g); @@ -3972,19 +3499,14 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, // than just {length, inf}. assert(min_bound > lit_length || max_bound < MAX_OFFSET); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, JumpTarget::NEXT_BLOCK); - ri.u.checkBounds.min_bound = min_bound; - ri.u.checkBounds.max_bound = max_bound; - - program.push_back(move(ri)); + const auto *end_inst = program.end_instruction(); + program.add_before_end( + make_unique(min_bound, max_bound, end_inst)); } static void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, - vector &program) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED, - JumpTarget::NEXT_BLOCK); - + RoseProgram &program) { u32 handled_key; if (contains(bc.handledKeys, v)) { handled_key = bc.handledKeys.at(v); @@ -3993,19 +3515,21 @@ void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, bc.handledKeys.emplace(v, handled_key); } - ri.u.checkNotHandled.key = handled_key; - - program.push_back(move(ri)); + const auto *end_inst = program.end_instruction(); + auto ri = make_unique(handled_key, end_inst); + program.add_before_end(move(ri)); } static void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, - RoseVertex v, vector &program) { - vector eod_program; + RoseVertex v, RoseProgram &program) { + RoseProgram eod_program; for (const auto &e : out_edges_range(v, build.g)) { if (canEagerlyReportAtEod(build, e)) { - makeRoleReports(build, bc, target(e, build.g), eod_program); + RoseProgram block; + makeRoleReports(build, bc, target(e, build.g), block); + eod_program.add_block(move(block)); } } @@ -4016,19 +3540,21 @@ void makeRoleEagerEodReports(RoseBuildImpl &build, build_context &bc, if (!onlyAtEod(build, v)) { // The rest of our program wasn't EOD anchored, so we need to guard // these reports with a check. - program.emplace_back(ROSE_INSTR_CHECK_ONLY_EOD, JumpTarget::NEXT_BLOCK); + const auto *end_inst = eod_program.end_instruction(); + eod_program.insert(begin(eod_program), + make_unique(end_inst)); } - program.insert(end(program), begin(eod_program), end(eod_program)); + program.add_before_end(move(eod_program)); } static -vector makeProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e) { +RoseProgram makeProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e) { const RoseGraph &g = build.g; auto v = target(e, g); - vector program; + RoseProgram program; // First, add program instructions that enforce preconditions without // effects. @@ -4037,8 +3563,8 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, if (onlyAtEod(build, v)) { DEBUG_PRINTF("only at eod\n"); - program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD, - JumpTarget::NEXT_BLOCK)); + const auto *end_inst = program.end_instruction(); + program.add_before_end(make_unique(end_inst)); } if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { @@ -4055,24 +3581,41 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, makeRoleLookaround(build, bc, v, program); makeRoleCheckLeftfix(build, bc, v, program); - // Next, we can add program instructions that have effects. + // Next, we can add program instructions that have effects. This must be + // done as a series of blocks, as some of them (like reports) are + // escapable. - makeRoleReports(build, bc, v, program); + RoseProgram effects_block; - makeRoleInfixTriggers(build, bc, v, program); + RoseProgram reports_block; + makeRoleReports(build, bc, v, reports_block); + effects_block.add_block(move(reports_block)); + + RoseProgram infix_block; + makeRoleInfixTriggers(build, bc, v, infix_block); + effects_block.add_block(move(infix_block)); // Note: SET_GROUPS instruction must be after infix triggers, as an infix // going dead may switch off groups. - makeRoleGroups(build, bc, v, program); + RoseProgram groups_block; + makeRoleGroups(build, bc, v, groups_block); + effects_block.add_block(move(groups_block)); - makeRoleSuffix(build, bc, v, program); + RoseProgram suffix_block; + makeRoleSuffix(build, bc, v, suffix_block); + effects_block.add_block(move(suffix_block)); - makeRoleSetState(bc, v, program); + RoseProgram state_block; + makeRoleSetState(bc, v, state_block); + effects_block.add_block(move(state_block)); // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if // the program doesn't have one already). - makeRoleEagerEodReports(build, bc, v, program); + RoseProgram eod_block; + makeRoleEagerEodReports(build, bc, v, eod_block); + effects_block.add_block(move(eod_block)); + program.add_before_end(move(effects_block)); return program; } @@ -4088,13 +3631,12 @@ u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, // scratch to support it). const bool has_som = false; - vector program; + RoseProgram program; for (const auto &id : reports) { makeReport(build, id, has_som, program); } - program = flattenProgram({program}); applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -4217,7 +3759,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, if (hasUsefulStops(lbi)) { assert(lbi.stopAlphabet.size() == N_CHARS); - left.stopTable = add_to_engine_blob(bc, lbi.stopAlphabet.begin(), + left.stopTable = bc.engine_blob.add(lbi.stopAlphabet.begin(), lbi.stopAlphabet.end()); } @@ -4258,178 +3800,125 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, } static -void addPredBlocksSingle( - map>> &predProgramLists, - vector &program) { - - vector> prog_blocks; - - for (const auto &m : predProgramLists) { - const u32 &pred_state = m.first; - assert(!m.second.empty()); - auto subprog = flattenProgram(m.second); - - // Check our pred state. - auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE, - JumpTarget::NEXT_BLOCK); - ri.u.checkState.index = pred_state; - subprog.insert(begin(subprog), ri); - assert(subprog.back().code() == ROSE_INSTR_END); - subprog.pop_back(); - prog_blocks.push_back(move(subprog)); - } - - auto prog = flattenProgram(prog_blocks); - program.insert(end(program), begin(prog), end(prog)); +void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, + RoseProgram &program) { + // Prepend an instruction to check the pred state is on. + const auto *end_inst = pred_block.end_instruction(); + pred_block.insert(begin(pred_block), + make_unique(pred_state, end_inst)); + program.add_block(move(pred_block)); } static -u32 programLength(const vector &program) { - u32 len = 0; - for (const auto &ri : program) { - len += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } - return len; -} +void addPredBlocksAny(build_context &bc, map &pred_blocks, + RoseProgram &program) { + RoseProgram sparse_program; -static -void addPredBlocksMulti(build_context &bc, - map>> &predProgramLists, - vector &program) { - assert(!predProgramLists.empty()); - - // First, add the iterator itself. vector keys; - for (const auto &elem : predProgramLists) { - keys.push_back(elem.first); - } - DEBUG_PRINTF("%zu keys: %s\n", keys.size(), as_string_list(keys).c_str()); - - vector iter; - mmbBuildSparseIterator(iter, keys, bc.numStates); - assert(!iter.empty()); - u32 iter_offset = addIteratorToTable(bc, iter); - - // Construct our program, starting with the SPARSE_ITER_BEGIN - // instruction, keeping track of the jump offset for each sub-program. - vector sparse_program; - vector jump_table; - - sparse_program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN, - JumpTarget::PROGRAM_END)); - u32 curr_offset = programLength(program) + programLength(sparse_program); - - for (const auto &e : predProgramLists) { - DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), - curr_offset); - jump_table.push_back(curr_offset); - assert(!e.second.empty()); - auto subprog = flattenProgram(e.second); - - if (e.first != keys.back()) { - // For all but the last subprogram, replace the END instruction - // with a SPARSE_ITER_NEXT. - assert(!subprog.empty()); - assert(subprog.back().code() == ROSE_INSTR_END); - subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT, - JumpTarget::PROGRAM_END); - } - - curr_offset += programLength(subprog); - insert(&sparse_program, end(sparse_program), subprog); + for (const u32 &key : pred_blocks | map_keys) { + keys.push_back(key); } - // Strip the END instruction from the last block. - assert(sparse_program.back().code() == ROSE_INSTR_END); - sparse_program.pop_back(); + const RoseInstruction *end_inst = sparse_program.end_instruction(); + auto ri = make_unique(bc.numStates, keys, end_inst); + sparse_program.add_before_end(move(ri)); - sparse_program = flattenProgram({sparse_program}); - - // Write the jump table into the bytecode. - const u32 jump_table_offset = - add_to_engine_blob(bc, begin(jump_table), end(jump_table)); - - // Write jump table and iterator offset into sparse iter instructions. - auto keys_it = begin(keys); - for (auto &ri : sparse_program) { - switch (ri.code()) { - case ROSE_INSTR_SPARSE_ITER_BEGIN: - ri.u.sparseIterBegin.iter_offset = iter_offset; - ri.u.sparseIterBegin.jump_table = jump_table_offset; - break; - case ROSE_INSTR_SPARSE_ITER_NEXT: - ri.u.sparseIterNext.iter_offset = iter_offset; - ri.u.sparseIterNext.jump_table = jump_table_offset; - assert(keys_it != end(keys)); - ri.u.sparseIterNext.state = *keys_it++; - break; - default: - break; - } - } - - program.insert(end(program), begin(sparse_program), end(sparse_program)); + RoseProgram &block = pred_blocks.begin()->second; + sparse_program.add_before_end(move(block)); + program.add_block(move(sparse_program)); } static -void addPredBlocks(build_context &bc, - map>> &predProgramLists, - vector &program) { - const size_t num_preds = predProgramLists.size(); +void addPredBlocksMulti(build_context &bc, map &pred_blocks, + RoseProgram &program) { + assert(!pred_blocks.empty()); + + RoseProgram sparse_program; + const RoseInstruction *end_inst = sparse_program.end_instruction(); + vector> jump_table; + + // BEGIN instruction. + auto ri_begin = + make_unique(bc.numStates, end_inst); + RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); + sparse_program.add_before_end(move(ri_begin)); + + // NEXT instructions, one per pred program. + u32 prev_key = pred_blocks.begin()->first; + for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { + auto ri = make_unique(prev_key, begin_inst, + end_inst); + sparse_program.add_before_end(move(ri)); + prev_key = it->first; + } + + // Splice in each pred program after its BEGIN/NEXT. + auto out_it = begin(sparse_program); + for (auto &m : pred_blocks) { + u32 key = m.first; + RoseProgram &flat_prog = m.second; + assert(!flat_prog.empty()); + const size_t block_len = flat_prog.size() - 1; // without INSTR_END. + + assert(dynamic_cast(out_it->get()) || + dynamic_cast(out_it->get())); + out_it = sparse_program.insert(++out_it, move(flat_prog)); + + // Jump table target for this key is the beginning of the block we just + // spliced in. + jump_table.emplace_back(key, out_it->get()); + + assert(distance(begin(sparse_program), out_it) + block_len <= + sparse_program.size()); + advance(out_it, block_len); + } + + // Write the jump table back into the SPARSE_ITER_BEGIN instruction. + begin_inst->jump_table = move(jump_table); + + program.add_block(move(sparse_program)); +} + +static +void addPredBlocks(build_context &bc, map &pred_blocks, + RoseProgram &program) { + // Trim empty blocks, if any exist. + for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { + if (it->second.empty()) { + it = pred_blocks.erase(it); + } else { + ++it; + } + } + + const size_t num_preds = pred_blocks.size(); if (num_preds == 0) { - program.emplace_back(ROSE_INSTR_END); return; } if (num_preds == 1) { - addPredBlocksSingle(predProgramLists, program); + const auto head = pred_blocks.begin(); + addPredBlockSingle(head->first, head->second, program); return; } - addPredBlocksMulti(bc, predProgramLists, program); -} - -/** - * Returns the pair (program offset, sparse iter offset). - */ -static -vector makeSparseIterProgram(build_context &bc, - map>> &predProgramLists, - const vector &root_program, - const vector &pre_program) { - vector program; - u32 curr_offset = 0; - - // Add pre-program first. - for (const auto &ri : pre_program) { - program.push_back(ri); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + // First, see if all our blocks are equivalent, in which case we can + // collapse them down into one. + const auto &blocks = pred_blocks | map_values; + if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { + return RoseProgramEquivalence()(*begin(blocks), block); + })) { + DEBUG_PRINTF("all blocks equiv\n"); + addPredBlocksAny(bc, pred_blocks, program); + return; } - // Add blocks to deal with non-root edges (triggered by sparse iterator or - // mmbit_isset checks). This operation will flatten the program up to this - // point. - addPredBlocks(bc, predProgramLists, program); - - // If we have a root program, replace the END instruction with it. Note - // that the root program has already been flattened. - assert(!program.empty()); - assert(program.back().code() == ROSE_INSTR_END); - if (!root_program.empty()) { - program.pop_back(); - program.insert(end(program), begin(root_program), end(root_program)); - } - - assert(!program.empty()); - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - program = flattenProgram({program}); - return program; + addPredBlocksMulti(bc, pred_blocks, program); } static void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, - vector &program) { + RoseProgram &program) { const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); const auto &arb_lit_info = **lit_infos.begin(); if (arb_lit_info.delayed_ids.empty()) { @@ -4444,10 +3933,9 @@ void makePushDelayedInstructions(const RoseBuildImpl &build, u32 final_id, DEBUG_PRINTF("final_id=%u delay=%u child_id=%u\n", final_id, child_literal.delay, child_id); - auto ri = RoseInstruction(ROSE_INSTR_PUSH_DELAYED); - ri.u.pushDelayed.delay = verify_u8(child_literal.delay); - ri.u.pushDelayed.index = delay_index; - program.push_back(move(ri)); + auto ri = make_unique( + verify_u8(child_literal.delay), delay_index); + program.add_before_end(move(ri)); } } @@ -4465,21 +3953,17 @@ rose_group getFinalIdGroupsUnion(const RoseBuildImpl &build, u32 final_id) { static void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 final_id, - vector &program) { + RoseProgram &program) { rose_group groups = getFinalIdGroupsUnion(build, final_id); if (!groups) { return; } - - auto ri = RoseInstruction(ROSE_INSTR_CHECK_GROUPS); - ri.u.checkGroups.groups = groups; - program.push_back(move(ri)); + program.add_before_end(make_unique(groups)); } static void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, - u32 final_id, - vector &program) { + u32 final_id, RoseProgram &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); assert(!lit_infos.empty()); @@ -4510,7 +3994,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build, build_context &bc, static void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, - vector &program) { + RoseProgram &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_infos = getLiteralInfoByFinalId(build, final_id); @@ -4524,10 +4008,8 @@ void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 final_id, } DEBUG_PRINTF("final_id %u squashes 0x%llx\n", final_id, groups); - - auto ri = RoseInstruction(ROSE_INSTR_SQUASH_GROUPS); - ri.u.squashGroups.groups = ~groups; // Negated, so we can just AND it in. - program.push_back(move(ri)); + program.add_before_end( + make_unique(~groups)); // Note negated. } static @@ -4546,7 +4028,7 @@ u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { static void makeRecordAnchoredInstruction(const RoseBuildImpl &build, build_context &bc, u32 final_id, - vector &program) { + RoseProgram &program) { assert(contains(build.final_id_to_literal, final_id)); const auto &lit_ids = build.final_id_to_literal.at(final_id); @@ -4568,9 +4050,7 @@ void makeRecordAnchoredInstruction(const RoseBuildImpl &build, return; } - auto ri = RoseInstruction(ROSE_INSTR_RECORD_ANCHORED); - ri.u.recordAnchored.id = final_id; - program.push_back(move(ri)); + program.add_before_end(make_unique(final_id)); } static @@ -4590,7 +4070,7 @@ static void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, u32 final_id, const vector &lit_edges, - vector &program) { + RoseProgram &program) { if (lit_edges.empty()) { return; } @@ -4636,9 +4116,7 @@ void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, build_context &bc, assert(min_offset < UINT32_MAX); DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LIT_EARLY); - ri.u.checkLitEarly.min_offset = min_offset; - program.push_back(move(ri)); + program.add_before_end(make_unique(min_offset)); } static @@ -4656,47 +4134,49 @@ bool hasDelayedLiteral(RoseBuildImpl &build, } static -vector buildLitInitialProgram(RoseBuildImpl &build, - build_context &bc, u32 final_id, - const vector &lit_edges) { - vector pre_program; +RoseProgram buildLitInitialProgram(RoseBuildImpl &build, build_context &bc, + u32 final_id, + const vector &lit_edges) { + RoseProgram program; // No initial program for EOD. if (final_id == MO_INVALID_IDX) { - return pre_program; + return program; } DEBUG_PRINTF("final_id %u\n", final_id); // Check lit mask. - makeCheckLitMaskInstruction(build, bc, final_id, pre_program); + makeCheckLitMaskInstruction(build, bc, final_id, program); // Check literal groups. This is an optimisation that we only perform for // delayed literals, as their groups may be switched off; ordinarily, we // can trust the HWLM matcher. if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, final_id, pre_program); + makeGroupCheckInstruction(build, final_id, program); } // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build, final_id, pre_program); + makePushDelayedInstructions(build, final_id, program); // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, pre_program); + makeCheckLitEarlyInstruction(build, bc, final_id, lit_edges, program); - return pre_program; + return program; } static -vector buildLiteralProgram(RoseBuildImpl &build, - build_context &bc, u32 final_id, - const vector &lit_edges) { +RoseProgram buildLiteralProgram(RoseBuildImpl &build, build_context &bc, + u32 final_id, + const vector &lit_edges) { const auto &g = build.g; DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); - // pred state id -> list of programs - map>> predProgramLists; + RoseProgram program; + + // Predecessor state id -> program block. + map pred_blocks; // Construct sparse iter sub-programs. for (const auto &e : lit_edges) { @@ -4708,64 +4188,51 @@ vector buildLiteralProgram(RoseBuildImpl &build, g[target(e, g)].idx); assert(contains(bc.roleStateIndices, u)); u32 pred_state = bc.roleStateIndices.at(u); - auto program = makeProgram(build, bc, e); - if (program.empty()) { - continue; - } - predProgramLists[pred_state].push_back(program); + pred_blocks[pred_state].add_block(makeProgram(build, bc, e)); } - // Construct sub-program for handling root roles. - vector> root_programs; + // Add blocks to deal with non-root edges (triggered by sparse iterator or + // mmbit_isset checks). + addPredBlocks(bc, pred_blocks, program); + + // Add blocks to handle root roles. for (const auto &e : lit_edges) { const auto &u = source(e, g); if (!build.isAnyStart(u)) { continue; } DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].idx, g[target(e, g)].idx); - auto role_prog = makeProgram(build, bc, e); - if (role_prog.empty()) { - continue; - } - root_programs.push_back(role_prog); + program.add_block(makeProgram(build, bc, e)); } if (final_id != MO_INVALID_IDX) { - vector prog; + RoseProgram root_block; // Literal may squash groups. - makeGroupSquashInstruction(build, final_id, prog); + makeGroupSquashInstruction(build, final_id, root_block); // Literal may be anchored and need to be recorded. - makeRecordAnchoredInstruction(build, bc, final_id, prog); + makeRecordAnchoredInstruction(build, bc, final_id, root_block); - if (!prog.empty()) { - root_programs.push_back(move(prog)); - } + program.add_block(move(root_block)); } - vector root_program; - if (!root_programs.empty()) { - root_program = flattenProgram(root_programs); - } - - auto pre_program = buildLitInitialProgram(build, bc, final_id, lit_edges); - - // Put it all together. - return makeSparseIterProgram(bc, predProgramLists, root_program, - pre_program); + // Construct initial program up front, as its early checks must be able to + // jump to end and terminate processing for this literal. + auto lit_program = buildLitInitialProgram(build, bc, final_id, lit_edges); + lit_program.add_before_end(move(program)); + return lit_program; } static u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, const vector &lit_edges) { - auto program = buildLiteralProgram(build, bc, final_id, lit_edges); + RoseProgram program = buildLiteralProgram(build, bc, final_id, lit_edges); if (program.empty()) { return 0; } - // Note: already flattened. applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -4777,13 +4244,12 @@ u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc, return 0; // No delayed IDs, no work to do. } - vector program; + RoseProgram program; makeCheckLitMaskInstruction(build, bc, final_id, program); makePushDelayedInstructions(build, final_id, program); assert(!program.empty()); - program = flattenProgram({program}); applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -4844,9 +4310,9 @@ pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { } u32 litProgramsOffset = - add_to_engine_blob(bc, begin(bc.litPrograms), end(bc.litPrograms)); - u32 delayRebuildProgramsOffset = add_to_engine_blob( - bc, begin(delayRebuildPrograms), end(delayRebuildPrograms)); + bc.engine_blob.add(begin(bc.litPrograms), end(bc.litPrograms)); + u32 delayRebuildProgramsOffset = bc.engine_blob.add( + begin(delayRebuildPrograms), end(delayRebuildPrograms)); return {litProgramsOffset, delayRebuildProgramsOffset}; } @@ -4884,35 +4350,31 @@ pair buildReportPrograms(RoseBuildImpl &build, build_context &bc) { vector programs; programs.reserve(reports.size()); - vector program; for (ReportID id : reports) { - program.clear(); + RoseProgram program; const bool has_som = false; makeCatchupMpv(build, bc, id, program); makeReport(build, id, has_som, program); - program = flattenProgram({program}); applyFinalSpecialisation(program); - u32 offset = writeProgram(bc, program); + u32 offset = writeProgram(bc, move(program)); programs.push_back(offset); build.rm.setProgramOffset(id, offset); DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id, programs.back(), program.size()); } - u32 offset = add_to_engine_blob(bc, begin(programs), end(programs)); + u32 offset = bc.engine_blob.add(begin(programs), end(programs)); u32 count = verify_u32(programs.size()); return {offset, count}; } static -vector makeEodAnchorProgram(RoseBuildImpl &build, - build_context &bc, - const RoseEdge &e, - const bool multiple_preds) { +RoseProgram makeEodAnchorProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e, const bool multiple_preds) { const RoseGraph &g = build.g; const RoseVertex v = target(e, g); - vector program; + RoseProgram program; if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { makeRoleCheckBounds(build, v, e, program); @@ -4927,9 +4389,11 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, makeCatchup(build, bc, reports, program); const bool has_som = false; + RoseProgram report_block; for (const auto &id : reports) { - makeReport(build, id, has_som, program); + makeReport(build, id, has_som, report_block); } + program.add_before_end(move(report_block)); return program; } @@ -4961,11 +4425,11 @@ bool hasEodMatcher(const RoseBuildImpl &build) { static void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, - bool in_etable, vector &program) { + bool in_etable, RoseProgram &program) { const RoseGraph &g = build.g; - // pred state id -> list of programs - map>> predProgramLists; + // Predecessor state id -> program block. + map pred_blocks; for (auto v : vertices_range(g)) { if (!g[v].eod_accept) { @@ -4994,29 +4458,18 @@ void addEodAnchorProgram(RoseBuildImpl &build, build_context &bc, for (const auto &e : edge_list) { RoseVertex u = source(e, g); assert(contains(bc.roleStateIndices, u)); - u32 predStateIdx = bc.roleStateIndices.at(u); - - auto prog = makeEodAnchorProgram(build, bc, e, multiple_preds); - if (prog.empty()) { - continue; - } - predProgramLists[predStateIdx].push_back(prog); + u32 pred_state = bc.roleStateIndices.at(u); + pred_blocks[pred_state].add_block( + makeEodAnchorProgram(build, bc, e, multiple_preds)); } } - if (predProgramLists.empty()) { - return; - } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - addPredBlocks(bc, predProgramLists, program); + addPredBlocks(bc, pred_blocks, program); } static void addEodEventProgram(RoseBuildImpl &build, build_context &bc, - vector &program) { + RoseProgram &program) { if (build.eod_event_literal_id == MO_INVALID_IDX) { return; } @@ -5042,61 +4495,47 @@ void addEodEventProgram(RoseBuildImpl &build, build_context &bc, tie(g[source(b, g)].idx, g[target(b, g)].idx); }); - auto prog = buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); - program.insert(end(program), begin(prog), end(prog)); + program.add_block( + buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list)); } static -void addEnginesEodProgram(u32 eodNfaIterOffset, - vector &program) { +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { if (!eodNfaIterOffset) { return; } - auto ri = RoseInstruction(ROSE_INSTR_ENGINES_EOD); - ri.u.enginesEod.iter_offset = eodNfaIterOffset; - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - program.push_back(move(ri)); - program.emplace_back(ROSE_INSTR_END); + RoseProgram block; + block.add_before_end(make_unique(eodNfaIterOffset)); + program.add_block(move(block)); } static -void addSuffixesEodProgram(const RoseBuildImpl &build, - vector &program) { +void addSuffixesEodProgram(const RoseBuildImpl &build, RoseProgram &program) { if (!hasEodAnchoredSuffix(build)) { return; } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - program.emplace_back(ROSE_INSTR_SUFFIXES_EOD); - program.emplace_back(ROSE_INSTR_END); + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); } static -void addMatcherEodProgram(const RoseBuildImpl &build, - vector &program) { +void addMatcherEodProgram(const RoseBuildImpl &build, RoseProgram &program) { if (!hasEodMatcher(build)) { return; } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); - } - program.emplace_back(ROSE_INSTR_MATCHER_EOD); - program.emplace_back(ROSE_INSTR_END); + RoseProgram block; + block.add_before_end(make_unique()); + program.add_block(move(block)); } static u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, u32 eodNfaIterOffset) { - vector program; + RoseProgram program; addEodEventProgram(build, bc, program); addEnginesEodProgram(eodNfaIterOffset, program); @@ -5105,17 +4544,12 @@ u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, addEodAnchorProgram(build, bc, true, program); addSuffixesEodProgram(build, program); - if (program.size() == 1) { - assert(program.back().code() == ROSE_INSTR_END); - return 0; - } - if (program.empty()) { return 0; } applyFinalSpecialisation(program); - return writeProgram(bc, program); + return writeProgram(bc, move(program)); } static @@ -5358,13 +4792,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 currOffset; /* relative to base of RoseEngine */ if (!bc.engine_blob.empty()) { - currOffset = bc.engine_blob_base + byte_length(bc.engine_blob); + currOffset = bc.engine_blob.base_offset + bc.engine_blob.size(); } else { currOffset = sizeof(RoseEngine); } - UNUSED const size_t engineBlobSize = - byte_length(bc.engine_blob); // test later + UNUSED const size_t engineBlobSize = bc.engine_blob.size(); // test later currOffset = ROUNDUP_CL(currOffset); DEBUG_PRINTF("currOffset %u\n", currOffset); @@ -5616,7 +5049,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { &engine->tStateSize); // Copy in other tables - copy_bytes(ptr + bc.engine_blob_base, bc.engine_blob); + bc.engine_blob.write_bytes(engine.get()); copy_bytes(ptr + engine->leftOffset, leftInfoTable); fillLookaroundTables(ptr + lookaroundTableOffset, @@ -5627,7 +5060,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Safety check: we shouldn't have written anything to the engine blob // after we copied it into the engine bytecode. - assert(byte_length(bc.engine_blob) == engineBlobSize); + assert(bc.engine_blob.size() == engineBlobSize); // Add a small write engine if appropriate. engine = addSmallWriteEngine(*this, move(engine)); diff --git a/src/rose/rose_build_engine_blob.h b/src/rose/rose_build_engine_blob.h new file mode 100644 index 00000000..0914502e --- /dev/null +++ b/src/rose/rose_build_engine_blob.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_ENGINE_BLOB_H +#define ROSE_BUILD_ENGINE_BLOB_H + +#include "rose_internal.h" + +#include "ue2common.h" +#include "util/alloc.h" +#include "util/container.h" +#include "util/verify_types.h" + +#include +#include + +#include + +namespace ue2 { + +class RoseEngineBlob : boost::noncopyable { +public: + /** \brief Base offset of engine_blob in the Rose engine bytecode. */ + static constexpr u32 base_offset = ROUNDUP_CL(sizeof(RoseEngine)); + + bool empty() const { + return blob.empty(); + } + + size_t size() const { + return blob.size(); + } + + const char *data() const { + return blob.data(); + } + + u32 add(const void *a, const size_t len, const size_t align) { + pad(align); + + size_t rv = base_offset + blob.size(); + assert(rv >= base_offset); + DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); + + assert(ISALIGNED_N(blob.size(), align)); + + blob.resize(blob.size() + len); + memcpy(&blob.back() - len + 1, a, len); + + return verify_u32(rv); + } + + template + u32 add(const T &a) { + static_assert(std::is_pod::value, "should be pod"); + return add(&a, sizeof(a), alignof(T)); + } + + template + u32 add(const T &a, const size_t len) { + static_assert(std::is_pod::value, "should be pod"); + return add(&a, len, alignof(T)); + } + + template + u32 add(Iter b, const Iter &e) { + using value_type = typename std::iterator_traits::value_type; + static_assert(std::is_pod::value, "should be pod"); + + if (b == e) { + return 0; + } + + u32 offset = add(*b); + for (++b; b != e; ++b) { + add(*b); + } + + return offset; + } + + void write_bytes(RoseEngine *engine) { + copy_bytes((char *)engine + base_offset, blob); + } + +private: + void pad(size_t align) { + assert(ISALIGNED_N(base_offset, align)); + size_t s = blob.size(); + + if (ISALIGNED_N(s, align)) { + return; + } + + blob.resize(s + align - s % align); + } + + /** + * \brief Contents of the Rose bytecode immediately following the + * RoseEngine. + */ + std::vector> blob; +}; + +} // namespace ue2 + +#endif // ROSE_BUILD_ENGINE_BLOB_H diff --git a/src/rose/rose_build_program.cpp b/src/rose/rose_build_program.cpp new file mode 100644 index 00000000..73740976 --- /dev/null +++ b/src/rose/rose_build_program.cpp @@ -0,0 +1,491 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_engine_blob.h" +#include "rose_build_program.h" +#include "util/container.h" +#include "util/multibit_build.h" +#include "util/verify_types.h" + +#include +#include + +using namespace std; + +namespace ue2 { + +/* Destructors to avoid weak vtables. */ + +RoseInstruction::~RoseInstruction() = default; +RoseInstrCatchUp::~RoseInstrCatchUp() = default; +RoseInstrCatchUpMpv::~RoseInstrCatchUpMpv() = default; +RoseInstrSomZero::~RoseInstrSomZero() = default; +RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; +RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; +RoseInstrEnd::~RoseInstrEnd() = default; + +using OffsetMap = RoseInstruction::OffsetMap; + +static +u32 calc_jump(const OffsetMap &offset_map, const RoseInstruction *from, + const RoseInstruction *to) { + DEBUG_PRINTF("computing relative jump from %p to %p\n", from, to); + assert(from && contains(offset_map, from)); + assert(to && contains(offset_map, to)); + + u32 from_offset = offset_map.at(from); + u32 to_offset = offset_map.at(to); + DEBUG_PRINTF("offsets: %u -> %u\n", from_offset, to_offset); + assert(from_offset <= to_offset); + + return to_offset - from_offset; +} + +void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; + inst->done_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_offset = min_offset; +} + +void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckOnlyEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckBounds::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->min_bound = min_bound; + inst->max_bound = max_bound; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->key = key; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; + inst->count = count; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + copy(begin(and_mask), end(and_mask), inst->and_mask); + copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->negation = negation; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckPrefix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->delay = delay; + inst->index = index; +} + +void RoseInstrRecordAnchored::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->id = id; +} + +void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->distance = distance; +} + +void RoseInstrSomLeftfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->lag = lag; +} + +void RoseInstrSomFromReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrTriggerInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->cancel = cancel; + inst->queue = queue; + inst->event = event; +} + +void RoseInstrTriggerSuffix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->queue = queue; + inst->event = event; +} + +void RoseInstrDedupe::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrDedupeSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrReportChain::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->event = event; + inst->top_squash_distance = top_squash_distance; +} + +void RoseInstrReportSomInt::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReportSomAware::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->som = som; +} + +void RoseInstrReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrReportSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportSomExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrDedupeAndReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrFinalReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrCheckExhausted::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->ekey = ekey; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMinLength::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->end_adj = end_adj; + inst->min_length = min_length; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSetState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; +} + +void RoseInstrSetGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrSquashGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->groups = groups; +} + +void RoseInstrCheckState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->index = index; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Resolve and write the multibit sparse iterator and the jump table. + vector keys; + vector jump_offsets; + for (const auto &jump : jump_table) { + keys.push_back(jump.first); + assert(contains(offset_map, jump.second)); + jump_offsets.push_back(offset_map.at(jump.second)); + } + + vector iter; + mmbBuildSparseIterator(iter, keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add(iter.begin(), iter.end()); + inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); + + // Store offsets for corresponding SPARSE_ITER_NEXT operations. + is_written = true; + iter_offset = inst->iter_offset; + jump_table_offset = inst->jump_table; +} + +void RoseInstrSparseIterNext::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->state = state; + inst->fail_jump = calc_jump(offset_map, this, target); + + // Use the same sparse iterator and jump table as the SPARSE_ITER_BEGIN + // instruction. + assert(begin); + assert(contains(offset_map, begin)); + assert(begin->is_written); + inst->iter_offset = begin->iter_offset; + inst->jump_table = begin->jump_table_offset; +} + +void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Write the multibit sparse iterator. + vector iter; + mmbBuildSparseIterator(iter, keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add(iter.begin(), iter.end()); +} + +void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast(dest); + inst->iter_offset = iter_offset; +} + +static +OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { + OffsetMap offset_map; + u32 offset = 0; + for (const auto &ri : program) { + offset = ROUNDUP_N(offset, ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("instr %p (opcode %d) -> offset %u\n", ri.get(), + ri->code(), offset); + assert(!contains(offset_map, ri.get())); + offset_map.emplace(ri.get(), offset); + offset += ri->byte_length(); + } + *total_len = offset; + return offset_map; +} + +aligned_unique_ptr +writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len) { + const auto offset_map = makeOffsetMap(program, total_len); + DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), *total_len); + + auto bytecode = aligned_zmalloc_unique(*total_len); + char *ptr = bytecode.get(); + + for (const auto &ri : program) { + assert(contains(offset_map, ri.get())); + const u32 offset = offset_map.at(ri.get()); + ri->write(ptr + offset, blob, offset_map); + } + + return bytecode; +} + +bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, + const RoseProgram &prog2) const { + if (prog1.size() != prog2.size()) { + return false; + } + + u32 len_1 = 0, len_2 = 0; + const auto offset_map_1 = makeOffsetMap(prog1, &len_1); + const auto offset_map_2 = makeOffsetMap(prog2, &len_2); + + if (len_1 != len_2) { + return false; + } + + auto is_equiv = [&](const unique_ptr &a, + const unique_ptr &b) { + assert(a && b); + return a->equiv(*b, offset_map_1, offset_map_2); + }; + + return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); +} + +} // namespace ue2 diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h new file mode 100644 index 00000000..0853210b --- /dev/null +++ b/src/rose/rose_build_program.h @@ -0,0 +1,1802 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_PROGRAM_H +#define ROSE_BUILD_PROGRAM_H + +#include "rose_build_impl.h" +#include "rose_program.h" +#include "som/som_operation.h" +#include "util/alloc.h" +#include "util/container.h" +#include "util/make_unique.h" +#include "util/ue2_containers.h" + +#include +#include +#include +#include + +namespace ue2 { + +class RoseEngineBlob; + +/** + * \brief Abstract base class representing a single Rose instruction. + */ +class RoseInstruction { +public: + virtual ~RoseInstruction(); + + /** \brief Opcode used for the instruction in the bytecode. */ + virtual RoseInstructionCode code() const = 0; + + /** + * \brief Simple hash used for program equivalence. + * + * Note that pointers (jumps, for example) should not be used when + * calculating the hash: they will be converted to instruction offsets when + * compared later. + */ + virtual size_t hash() const = 0; + + /** \brief Length of the bytecode instruction in bytes. */ + virtual size_t byte_length() const = 0; + + using OffsetMap = unordered_map; + + /** + * \brief Writes a concrete implementation of this instruction. + * + * Other data that this instruction depends on is written directly into the + * blob, while the instruction structure itself (of size given by + * the byte_length() function) is written to dest. + */ + virtual void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const = 0; + + /** + * \brief Update a target pointer. + * + * If this instruction contains any reference to the old target, replace it + * with the new one. + */ + virtual void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) = 0; + + /** + * \brief True if these instructions are equivalent within their own + * programs. + * + * Checks that any pointers to other instructions point to the same + * offsets. + */ + bool equiv(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return equiv_impl(other, offsets, other_offsets); + } + +private: + virtual bool equiv_impl(const RoseInstruction &other, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const = 0; +}; + +/** + * \brief Templated implementation class to handle boring boilerplate code. + */ +template +class RoseInstrBase : public RoseInstruction { +protected: + static constexpr RoseInstructionCode opcode = Opcode; + using impl_type = ImplType; + +public: + RoseInstructionCode code() const override { return opcode; } + + size_t byte_length() const override { + return sizeof(impl_type); + } + + /** + * Note: this implementation simply zeroes the destination region and + * writes in the correct opcode. This is sufficient for trivial + * instructions, but instructions with data members will want to override + * it. + */ + void write(void *dest, RoseEngineBlob &, + const RoseInstruction::OffsetMap &) const override { + assert(dest != nullptr); + assert(ISALIGNED_N(dest, ROSE_INSTR_MIN_ALIGN)); + + impl_type *inst = static_cast(dest); + memset(inst, 0, sizeof(impl_type)); + inst->code = verify_u8(opcode); + } + +private: + bool equiv_impl(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const override { + const auto *ri_that = dynamic_cast(&other); + if (!ri_that) { + return false; + } + const auto *ri_this = dynamic_cast(this); + assert(ri_this); + return ri_this->equiv_to(*ri_that, offsets, other_offsets); + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have + * just a single target member, called "target". + */ +template +class RoseInstrBaseOneTarget + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + RoseInstrType *ri = dynamic_cast(this); + assert(ri); + if (ri->target == old_target) { + ri->target = new_target; + } + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have no + * targets. + */ +template +class RoseInstrBaseNoTargets + : public RoseInstrBase { +public: + void update_target(const RoseInstruction *, + const RoseInstruction *) override {} +}; + +/** + * \brief Refinement of RoseInstrBaseNoTargets to use for instructions that + * have no members at all, just an opcode. + */ +template +class RoseInstrBaseTrivial + : public RoseInstrBaseNoTargets { +public: + virtual bool operator==(const RoseInstrType &) const { return true; } + + size_t hash() const override { + return Opcode; + } + + bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, + const RoseInstruction::OffsetMap &) const { + return true; + } +}; + +//// +//// Concrete implementation classes start here. +//// + +class RoseInstrAnchoredDelay + : public RoseInstrBaseOneTarget { +public: + rose_group groups; + const RoseInstruction *target; + + RoseInstrAnchoredDelay(rose_group groups_in, + const RoseInstruction *target_in) + : groups(groups_in), target(target_in) {} + + bool operator==(const RoseInstrAnchoredDelay &ri) const { + return groups == ri.groups && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, groups); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return groups == ri.groups && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLitEarly + : public RoseInstrBaseNoTargets { +public: + u32 min_offset; + + explicit RoseInstrCheckLitEarly(u32 min) : min_offset(min) {} + + bool operator==(const RoseInstrCheckLitEarly &ri) const { + return min_offset == ri.min_offset; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, min_offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &, + const OffsetMap &) const { + return min_offset == ri.min_offset; + } +}; + +class RoseInstrCheckGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrCheckGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrCheckGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, groups); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckOnlyEod + : public RoseInstrBaseOneTarget { +public: + const RoseInstruction *target; + + explicit RoseInstrCheckOnlyEod(const RoseInstruction *target_in) + : target(target_in) {} + + bool operator==(const RoseInstrCheckOnlyEod &ri) const { + return target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckOnlyEod &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckBounds + : public RoseInstrBaseOneTarget { +public: + u64a min_bound; + u64a max_bound; + const RoseInstruction *target; + + RoseInstrCheckBounds(u64a min, u64a max, const RoseInstruction *target_in) + : min_bound(min), max_bound(max), target(target_in) {} + + bool operator==(const RoseInstrCheckBounds &ri) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, min_bound); + boost::hash_combine(v, max_bound); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckBounds &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckNotHandled + : public RoseInstrBaseOneTarget { +public: + u32 key; + const RoseInstruction *target; + + RoseInstrCheckNotHandled(u32 key_in, const RoseInstruction *target_in) + : key(key_in), target(target_in) {} + + bool operator==(const RoseInstrCheckNotHandled &ri) const { + return key == ri.key && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, key); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckNotHandled &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return key == ri.key && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLookaround + : public RoseInstrBaseOneTarget { +public: + u32 index; + u32 count; + const RoseInstruction *target; + + RoseInstrCheckLookaround(u32 index_in, u32 count_in, + const RoseInstruction *target_in) + : index(index_in), count(count_in), target(target_in) {} + + bool operator==(const RoseInstrCheckLookaround &ri) const { + return index == ri.index && count == ri.count && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, index); + boost::hash_combine(v, count); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return index == ri.index && count == ri.count && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask + : public RoseInstrBaseOneTarget { +public: + u64a and_mask; + u64a cmp_mask; + u64a neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask(u64a and_mask_in, u64a cmp_mask_in, u64a neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), neg_mask(neg_mask_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, and_mask); + boost::hash_combine(v, cmp_mask); + boost::hash_combine(v, neg_mask); + boost::hash_combine(v, offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask32 + : public RoseInstrBaseOneTarget { +public: + std::array and_mask; + std::array cmp_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask32(std::array and_mask_in, + std::array cmp_mask_in, u32 neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(move(and_mask_in)), cmp_mask(move(cmp_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask32 &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, and_mask); + boost::hash_combine(v, cmp_mask); + boost::hash_combine(v, neg_mask); + boost::hash_combine(v, offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask32 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckByte + : public RoseInstrBaseOneTarget { +public: + u8 and_mask; + u8 cmp_mask; + u8 negation; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckByte(u8 and_mask_in, u8 cmp_mask_in, u8 negation_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), negation(negation_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckByte &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, and_mask); + boost::hash_combine(v, cmp_mask); + boost::hash_combine(v, negation); + boost::hash_combine(v, offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckByte &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckInfix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckInfix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckInfix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, queue); + boost::hash_combine(v, lag); + boost::hash_combine(v, report); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckInfix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckPrefix + : public RoseInstrBaseOneTarget { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckPrefix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckPrefix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, queue); + boost::hash_combine(v, lag); + boost::hash_combine(v, report); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckPrefix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrPushDelayed + : public RoseInstrBaseNoTargets { +public: + u8 delay; + u32 index; + + RoseInstrPushDelayed(u8 delay_in, u32 index_in) + : delay(delay_in), index(index_in) {} + + bool operator==(const RoseInstrPushDelayed &ri) const { + return delay == ri.delay && index == ri.index; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, delay); + boost::hash_combine(v, index); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrPushDelayed &ri, const OffsetMap &, + const OffsetMap &) const { + return delay == ri.delay && index == ri.index; + } +}; + +class RoseInstrRecordAnchored + : public RoseInstrBaseNoTargets { +public: + u32 id; + + explicit RoseInstrRecordAnchored(u32 id_in) : id(id_in) {} + + bool operator==(const RoseInstrRecordAnchored &ri) const { + return id == ri.id; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, id); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrRecordAnchored &ri, const OffsetMap &, + const OffsetMap &) const { + return id == ri.id; + } +}; + +class RoseInstrCatchUp + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUp() override; +}; + +class RoseInstrCatchUpMpv + : public RoseInstrBaseTrivial { +public: + ~RoseInstrCatchUpMpv() override; +}; + +class RoseInstrSomAdjust + : public RoseInstrBaseNoTargets { +public: + u32 distance; + + explicit RoseInstrSomAdjust(u32 distance_in) : distance(distance_in) {} + + bool operator==(const RoseInstrSomAdjust &ri) const { + return distance == ri.distance; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, distance); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomAdjust &ri, const OffsetMap &, + const OffsetMap &) const { + return distance == ri.distance; + } +}; + +class RoseInstrSomLeftfix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 lag; + + RoseInstrSomLeftfix(u32 queue_in, u32 lag_in) + : queue(queue_in), lag(lag_in) {} + + bool operator==(const RoseInstrSomLeftfix &ri) const { + return queue == ri.queue && lag == ri.lag; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, queue); + boost::hash_combine(v, lag); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomLeftfix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && lag == ri.lag; + } +}; + +class RoseInstrSomFromReport + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrSomFromReport() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrSomFromReport &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, som.type); + boost::hash_combine(v, som.onmatch); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomFromReport &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrSomZero + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSomZero() override; +}; + +class RoseInstrTriggerInfix + : public RoseInstrBaseNoTargets { +public: + u8 cancel; + u32 queue; + u32 event; + + RoseInstrTriggerInfix(u8 cancel_in, u32 queue_in, u32 event_in) + : cancel(cancel_in), queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerInfix &ri) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, cancel); + boost::hash_combine(v, queue); + boost::hash_combine(v, event); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerInfix &ri, const OffsetMap &, + const OffsetMap &) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrTriggerSuffix + : public RoseInstrBaseNoTargets { +public: + u32 queue; + u32 event; + + RoseInstrTriggerSuffix(u32 queue_in, u32 event_in) + : queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerSuffix &ri) const { + return queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, queue); + boost::hash_combine(v, event); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerSuffix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrDedupe + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupe(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupe &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, quash_som); + boost::hash_combine(v, dkey); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupe &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrDedupeSom + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeSom(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeSom &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, quash_som); + boost::hash_combine(v, dkey); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeSom &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrReportChain + : public RoseInstrBaseNoTargets { +public: + u32 event; + u64a top_squash_distance; + + RoseInstrReportChain(u32 event_in, u32 top_squash_distance_in) + : event(event_in), top_squash_distance(top_squash_distance_in) {} + + bool operator==(const RoseInstrReportChain &ri) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, event); + boost::hash_combine(v, top_squash_distance); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportChain &ri, const OffsetMap &, + const OffsetMap &) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } +}; + +class RoseInstrReportSomInt + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomInt() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomInt &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, som.type); + boost::hash_combine(v, som.onmatch); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomInt &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReportSomAware + : public RoseInstrBaseNoTargets { +public: + som_operation som; + + RoseInstrReportSomAware() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomAware &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, som.type); + boost::hash_combine(v, som.onmatch); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomAware &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + boost::hash_combine(v, ekey); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrReportSom + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReportSom(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReportSom &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSom &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportSomExhaust + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportSomExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportSomExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + boost::hash_combine(v, ekey); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrDedupeAndReport + : public RoseInstrBaseOneTarget { +public: + u8 quash_som; + u32 dkey; + ReportID onmatch; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeAndReport(u8 quash_som_in, u32 dkey_in, ReportID onmatch_in, + s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), onmatch(onmatch_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeAndReport &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, quash_som); + boost::hash_combine(v, dkey); + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeAndReport &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrFinalReport + : public RoseInstrBaseNoTargets { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrFinalReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrFinalReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, onmatch); + boost::hash_combine(v, offset_adjust); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrFinalReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrCheckExhausted + : public RoseInstrBaseOneTarget { +public: + u32 ekey; + const RoseInstruction *target; + + RoseInstrCheckExhausted(u32 ekey_in, const RoseInstruction *target_in) + : ekey(ekey_in), target(target_in) {} + + bool operator==(const RoseInstrCheckExhausted &ri) const { + return ekey == ri.ekey && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, ekey); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckExhausted &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return ekey == ri.ekey && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMinLength + : public RoseInstrBaseOneTarget { +public: + s32 end_adj; + u64a min_length; + const RoseInstruction *target; + + RoseInstrCheckMinLength(s32 end_adj_in, u64a min_length_in, + const RoseInstruction *target_in) + : end_adj(end_adj_in), min_length(min_length_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMinLength &ri) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, end_adj); + boost::hash_combine(v, min_length); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMinLength &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSetState + : public RoseInstrBaseNoTargets { +public: + u32 index; + + explicit RoseInstrSetState(u32 index_in) : index(index_in) {} + + bool operator==(const RoseInstrSetState &ri) const { + return index == ri.index; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, index); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetState &ri, const OffsetMap &, + const OffsetMap &) const { + return index == ri.index; + } +}; + +class RoseInstrSetGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSetGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSetGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, groups); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrSquashGroups + : public RoseInstrBaseNoTargets { +public: + rose_group groups; + + explicit RoseInstrSquashGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSquashGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, groups); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSquashGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckState + : public RoseInstrBaseOneTarget { +public: + u32 index; + const RoseInstruction *target; + + RoseInstrCheckState(u32 index_in, const RoseInstruction *target_in) + : index(index_in), target(target_in) {} + + bool operator==(const RoseInstrCheckState &ri) const { + return index == ri.index && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, index); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckState &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return index == ri.index && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterBegin + : public RoseInstrBase { +public: + u32 num_keys; // total number of multibit keys + std::vector> jump_table; + const RoseInstruction *target; + + RoseInstrSparseIterBegin(u32 num_keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterBegin &ri) const { + return num_keys == ri.num_keys && jump_table == ri.jump_table && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, num_keys); + for (const auto &jump : jump_table) { + boost::hash_combine(v, jump.first); + } + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + for (auto &jump : jump_table) { + if (jump.second == old_target) { + jump.second = new_target; + } + } + } + + bool equiv_to(const RoseInstrSparseIterBegin &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + if (iter_offset != ri.iter_offset || + offsets.at(target) != other_offsets.at(ri.target)) { + return false; + } + if (jump_table.size() != ri.jump_table.size()) { + return false; + } + auto it1 = jump_table.begin(), it2 = ri.jump_table.begin(); + for (; it1 != jump_table.end(); ++it1, ++it2) { + if (it1->first != it2->first) { + return false; + } + if (offsets.at(it1->second) != other_offsets.at(it2->second)) { + return false; + } + } + return true; + } + +private: + friend class RoseInstrSparseIterNext; + + // These variables allow us to use the same multibit iterator and jump + // table in subsequent SPARSE_ITER_NEXT write() operations. + mutable bool is_written = false; + mutable u32 iter_offset = 0; + mutable u32 jump_table_offset = 0; +}; + +class RoseInstrSparseIterNext + : public RoseInstrBase { +public: + u32 state; + const RoseInstrSparseIterBegin *begin; + const RoseInstruction *target; + + RoseInstrSparseIterNext(u32 state_in, + const RoseInstrSparseIterBegin *begin_in, + const RoseInstruction *target_in) + : state(state_in), begin(begin_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterNext &ri) const { + return state == ri.state && begin == ri.begin && target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, state); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + if (begin == old_target) { + assert(new_target->code() == ROSE_INSTR_SPARSE_ITER_BEGIN); + begin = static_cast(new_target); + } + } + + bool equiv_to(const RoseInstrSparseIterNext &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return state == ri.state && + offsets.at(begin) == other_offsets.at(ri.begin) && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterAny + : public RoseInstrBaseOneTarget { +public: + u32 num_keys; // total number of multibit keys + std::vector keys; + const RoseInstruction *target; + + RoseInstrSparseIterAny(u32 num_keys_in, std::vector keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), keys(std::move(keys_in)), target(target_in) {} + + bool operator==(const RoseInstrSparseIterAny &ri) const { + return num_keys == ri.num_keys && keys == ri.keys && + target == ri.target; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, num_keys); + boost::hash_combine(v, keys); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSparseIterAny &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return num_keys == ri.num_keys && keys == ri.keys && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrEnginesEod + : public RoseInstrBaseNoTargets { +public: + u32 iter_offset; + + explicit RoseInstrEnginesEod(u32 iter_in) : iter_offset(iter_in) {} + + bool operator==(const RoseInstrEnginesEod &ri) const { + return iter_offset == ri.iter_offset; + } + + size_t hash() const override { + size_t v = opcode; + boost::hash_combine(v, iter_offset); + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrEnginesEod &ri, const OffsetMap &, + const OffsetMap &) const { + return iter_offset == ri.iter_offset; + } +}; + +class RoseInstrSuffixesEod + : public RoseInstrBaseTrivial { +public: + ~RoseInstrSuffixesEod() override; +}; + +class RoseInstrMatcherEod : public RoseInstrBaseTrivial { +public: + ~RoseInstrMatcherEod() override; +}; + +class RoseInstrEnd + : public RoseInstrBaseTrivial { +public: + ~RoseInstrEnd() override; +}; + +/** + * \brief Container for a list of program instructions. + */ +class RoseProgram { +private: + std::vector> prog; + +public: + RoseProgram() { + prog.push_back(make_unique()); + } + + bool empty() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + // Empty if we only have one element, the END instruction. + return std::next(prog.begin()) == prog.end(); + } + + size_t size() const { return prog.size(); } + + const RoseInstruction &back() const { return *prog.back(); } + const RoseInstruction &front() const { return *prog.front(); } + + using iterator = decltype(prog)::iterator; + iterator begin() { return prog.begin(); } + iterator end() { return prog.end(); } + + using const_iterator = decltype(prog)::const_iterator; + const_iterator begin() const { return prog.begin(); } + const_iterator end() const { return prog.end(); } + + using reverse_iterator = decltype(prog)::reverse_iterator; + reverse_iterator rbegin() { return prog.rbegin(); } + reverse_iterator rend() { return prog.rend(); } + + using const_reverse_iterator = decltype(prog)::const_reverse_iterator; + const_reverse_iterator rbegin() const { return prog.rbegin(); } + const_reverse_iterator rend() const { return prog.rend(); } + + /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */ + const RoseInstruction *end_instruction() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.back().get(); + } + +private: + static void update_targets(iterator it, iterator it_end, + const RoseInstruction *old_target, + const RoseInstruction *new_target) { + assert(old_target && new_target && old_target != new_target); + for (; it != it_end; ++it) { + std::unique_ptr &ri = *it; + assert(ri); + ri->update_target(old_target, new_target); + } + } + +public: + iterator insert(iterator it, std::unique_ptr ri) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.insert(it, std::move(ri)); + } + + iterator insert(iterator it, RoseProgram &&block) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return it; + } + + const RoseInstruction *end_ptr = block.end_instruction(); + assert(end_ptr->code() == ROSE_INSTR_END); + block.prog.pop_back(); + + const RoseInstruction *new_target = it->get(); + update_targets(block.prog.begin(), block.prog.end(), end_ptr, + new_target); + + // Workaround: container insert() for ranges doesn't return an iterator + // in the version of the STL distributed with gcc 4.8. + auto dist = distance(prog.begin(), it); + prog.insert(it, std::make_move_iterator(block.prog.begin()), + std::make_move_iterator(block.prog.end())); + it = prog.begin(); + std::advance(it, dist); + return it; + } + + /** + * \brief Adds this instruction to the program just before the terminating + * ROSE_INSTR_END. + */ + void add_before_end(std::unique_ptr ri) { + assert(!prog.empty()); + insert(std::prev(prog.end()), std::move(ri)); + } + + /** + * \brief Adds this block to the program just before the terminating + * ROSE_INSTR_END. + */ + void add_before_end(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + insert(std::prev(prog.end()), std::move(block)); + } + + /** + * \brief Append this program block, replacing our current ROSE_INSTR_END. + */ + void add_block(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + // Replace pointers to the current END with pointers to the first + // instruction in the new sequence. + const RoseInstruction *end_ptr = end_instruction(); + prog.pop_back(); + update_targets(prog.begin(), prog.end(), end_ptr, + block.prog.front().get()); + prog.insert(prog.end(), std::make_move_iterator(block.prog.begin()), + std::make_move_iterator(block.prog.end())); + } + + /** + * \brief Replace the instruction pointed to by the given iterator. + */ + template + void replace(Iter it, std::unique_ptr ri) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + const RoseInstruction *old_ptr = it->get(); + *it = move(ri); + update_targets(prog.begin(), prog.end(), old_ptr, it->get()); + + assert(prog.back()->code() == ROSE_INSTR_END); + } +}; + +aligned_unique_ptr +writeProgram(RoseEngineBlob &blob, const RoseProgram &program, u32 *total_len); + +class RoseProgramHash { +public: + size_t operator()(const RoseProgram &program) const { + size_t v = 0; + for (const auto &ri : program) { + assert(ri); + boost::hash_combine(v, ri->hash()); + } + return v; + } +}; + +class RoseProgramEquivalence { +public: + bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; +}; + +} // namespace ue2 + +#endif // ROSE_BUILD_PROGRAM_H diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 95f43d6c..40057d78 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -511,6 +511,12 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_ANY) { + os << " iter_offset " << ri->iter_offset << endl; + os << " fail_jump " << offset + ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { os << " iter_offset " << ri->iter_offset << endl; } @@ -577,9 +583,8 @@ void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); const char *base = (const char *)t; - os << "EOD Program:" << endl; - if (t->eodProgramOffset) { + os << "EOD Program @ " << t->eodProgramOffset << ":" << endl; dumpProgram(os, t, base + t->eodProgramOffset); os << endl; } else { diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 007eb70d..ba3e586b 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -99,6 +99,7 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. + ROSE_INSTR_SPARSE_ITER_ANY, //!< Test for any bit in the sparse iterator. /** \brief Check outfixes and suffixes for EOD and fire reports if so. */ ROSE_INSTR_ENGINES_EOD, @@ -386,6 +387,12 @@ struct ROSE_STRUCT_SPARSE_ITER_NEXT { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_SPARSE_ITER_ANY { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_ENGINES_EOD { u8 code; //!< From enum RoseInstructionCode. u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.