diff --git a/src/rose/eod.c b/src/rose/eod.c index 48b330d4..771c77fe 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -55,80 +55,6 @@ void initContext(const struct RoseEngine *t, u64a offset, fatbit_clear(scratch->aqa); } -static rose_inline -hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch, - const char is_streaming) { - assert(t->ematcherOffset); - - size_t eod_len; - const u8 *eod_data; - if (!is_streaming) { /* Block */ - eod_data = scratch->core_info.buf; - eod_len = scratch->core_info.len; - } else { /* Streaming */ - eod_len = scratch->core_info.hlen; - eod_data = scratch->core_info.hbuf; - } - - assert(eod_data); - assert(eod_len); - - // If we don't have enough bytes to produce a match from an EOD table scan, - // there's no point scanning. - if (eod_len < t->eodmatcherMinWidth) { - DEBUG_PRINTF("len=%zu < eodmatcherMinWidth=%u\n", eod_len, - t->eodmatcherMinWidth); - return HWLM_CONTINUE_MATCHING; - } - - // Ensure that we only need scan the last N bytes, where N is the length of - // the eod-anchored matcher region. - size_t adj = eod_len - MIN(eod_len, t->ematcherRegionSize); - - DEBUG_PRINTF("eod offset=%llu, eod length=%zu\n", offset, eod_len); - - struct RoseContext *tctxt = &scratch->tctxt; - const struct HWLM *etable = getELiteralMatcher(t); - - hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch, - tctxt->groups); - - // We may need to fire delayed matches - return cleanUpDelayed(t, scratch, 0, offset); -} - -static rose_inline -int roseEodRunIterator(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - if (!t->eodIterProgramOffset) { - return MO_CONTINUE_MATCHING; - } - - DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset); - - const u64a som = 0; - const size_t match_len = 0; - const char in_anchored = 0; - const char in_catchup = 0; - const char from_mpv = 0; - const char skip_mpv_catchup = 1; - if (roseRunProgram(t, scratch, t->eodIterProgramOffset, som, offset, - match_len, in_anchored, in_catchup, - from_mpv, skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } - - return MO_CONTINUE_MATCHING; -} - -static rose_inline -void cleanupAfterEodMatcher(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - // Flush history to make sure it's consistent. - roseFlushLastByteHistory(t, scratch, offset); -} - static rose_inline int roseRunEodProgram(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch) { @@ -158,37 +84,14 @@ int roseRunEodProgram(const struct RoseEngine *t, u64a offset, static really_inline void roseEodExec_i(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch, const char is_streaming) { + struct hs_scratch *scratch, UNUSED const char is_streaming) { assert(t); assert(scratch->core_info.buf || scratch->core_info.hbuf); assert(!scratch->core_info.buf || !scratch->core_info.hbuf); assert(!can_stop_matching(scratch)); // Run the unconditional EOD program. - if (roseRunEodProgram(t, offset, scratch) == MO_HALT_MATCHING) { - return; - } - - // Run the EOD anchored matcher if there is one. - if (t->ematcherOffset) { - assert(t->ematcherRegionSize); - // Unset the reports we just fired so we don't fire them again below. - char *state = scratch->core_info.state; - mmbit_clear(getRoleState(state), t->rolesWithStateCount); - mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount); - - if (roseEodRunMatcher(t, offset, scratch, is_streaming) == - HWLM_TERMINATE_MATCHING) { - return; - } - - cleanupAfterEodMatcher(t, offset, scratch); - - // Fire any new EOD reports. - if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) { - return; - } - } + roseRunEodProgram(t, offset, scratch); } void roseEodExec(const struct RoseEngine *t, u64a offset, diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index 5387f59f..5d255cf1 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -46,6 +46,13 @@ #include "util/fatbit.h" #include "util/multibit.h" +static rose_inline +hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a som, u64a end, size_t match_len, + char in_anchored, char in_catchup, char from_mpv, + char skip_mpv_catchup); + static rose_inline int roseCheckBenefits(const struct core_info *ci, u64a end, u32 mask_rewind, const u8 *and_mask, const u8 *exp_mask) { @@ -893,6 +900,93 @@ hwlmcb_rv_t roseSuffixesEod(const struct RoseEngine *rose, return HWLM_CONTINUE_MATCHING; } +static rose_inline +int roseEodRunIterator(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + if (!t->eodIterProgramOffset) { + return MO_CONTINUE_MATCHING; + } + + DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset); + + const u64a som = 0; + const size_t match_len = 0; + const char in_anchored = 0; + const char in_catchup = 0; + const char from_mpv = 0; + const char skip_mpv_catchup = 1; + if (roseRunProgram(t, scratch, t->eodIterProgramOffset, som, offset, + match_len, in_anchored, in_catchup, + from_mpv, skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + + return MO_CONTINUE_MATCHING; +} + +static +hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset) { + assert(rose->ematcherOffset); + assert(rose->ematcherRegionSize); + + // Clear role state and active engines, since we have already handled all + // outstanding work there. + DEBUG_PRINTF("clear role state and active leaf array\n"); + char *state = scratch->core_info.state; + mmbit_clear(getRoleState(state), rose->rolesWithStateCount); + mmbit_clear(getActiveLeafArray(rose, state), rose->activeArrayCount); + + const char is_streaming = rose->mode != HS_MODE_BLOCK; + + size_t eod_len; + const u8 *eod_data; + if (!is_streaming) { /* Block */ + eod_data = scratch->core_info.buf; + eod_len = scratch->core_info.len; + } else { /* Streaming */ + eod_len = scratch->core_info.hlen; + eod_data = scratch->core_info.hbuf; + } + + assert(eod_data); + assert(eod_len); + + DEBUG_PRINTF("%zu bytes of eod data to scan at offset %llu\n", eod_len, + offset); + + // If we don't have enough bytes to produce a match from an EOD table scan, + // there's no point scanning. + if (eod_len < rose->eodmatcherMinWidth) { + DEBUG_PRINTF("too short for min width %u\n", rose->eodmatcherMinWidth); + return HWLM_CONTINUE_MATCHING; + } + + // Ensure that we only need scan the last N bytes, where N is the length of + // the eod-anchored matcher region. + size_t adj = eod_len - MIN(eod_len, rose->ematcherRegionSize); + + const struct HWLM *etable = getELiteralMatcher(rose); + hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch, + scratch->tctxt.groups); + + // We may need to fire delayed matches. + if (cleanUpDelayed(rose, scratch, 0, offset) == HWLM_TERMINATE_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + + roseFlushLastByteHistory(rose, scratch, offset); + + // Fire any new EOD reports. + if (roseEodRunIterator(rose, offset, scratch) == MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + static void updateSeqPoint(struct RoseContext *tctxt, u64a offset, const char from_mpv) { @@ -1410,6 +1504,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(MATCHER_EOD) { + if (roseMatcherEod(t, scratch, end) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(END) { DEBUG_PRINTF("finished\n"); return HWLM_CONTINUE_MATCHING; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 6abd77db..c472337d 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -225,6 +225,7 @@ public: case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod; case ROSE_INSTR_SUFFIXES_EOD: return &u.suffixesEod; + case ROSE_INSTR_MATCHER_EOD: return &u.matcherEod; case ROSE_INSTR_END: return &u.end; } assert(0); @@ -273,6 +274,7 @@ public: case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod); case ROSE_INSTR_SUFFIXES_EOD: return sizeof(u.suffixesEod); + case ROSE_INSTR_MATCHER_EOD: return sizeof(u.matcherEod); case ROSE_INSTR_END: return sizeof(u.end); } assert(0); @@ -320,6 +322,7 @@ public: ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; ROSE_STRUCT_ENGINES_EOD enginesEod; ROSE_STRUCT_SUFFIXES_EOD suffixesEod; + ROSE_STRUCT_MATCHER_EOD matcherEod; ROSE_STRUCT_END end; } u; @@ -4000,6 +4003,18 @@ bool hasEodAnchoredSuffix(const RoseBuildImpl &build) { return false; } +static +bool hasEodMatcher(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (build.isInETable(v)) { + DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].idx); + return true; + } + } + return false; +} + /** * Returns the pair (program offset, sparse iter offset). */ @@ -4108,16 +4123,22 @@ void addGeneralEodAnchorProgram(RoseBuildImpl &build, build_context &bc, } } - if (predProgramLists.empty()) { - DEBUG_PRINTF("no eod anchored roles\n"); - return; + if (!predProgramLists.empty()) { + if (!program.empty()) { + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + } + addPredBlocks(bc, predProgramLists, program); } - if (!program.empty()) { - assert(program.back().code() == ROSE_INSTR_END); - program.pop_back(); + if (hasEodMatcher(build)) { + if (!program.empty()) { + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + } + program.emplace_back(ROSE_INSTR_MATCHER_EOD); + program.emplace_back(ROSE_INSTR_END); } - addPredBlocks(bc, predProgramLists, program); } static diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index be43e559..5bcff4fc 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -484,6 +484,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(SUFFIXES_EOD) {} PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(MATCHER_EOD) {} + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(END) { return; } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 4a5521ef..cc3d07b0 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -104,6 +104,9 @@ enum RoseInstructionCode { * so. */ ROSE_INSTR_SUFFIXES_EOD, + /** \brief Run the EOD-anchored HWLM literal matcher. */ + ROSE_INSTR_MATCHER_EOD, + ROSE_INSTR_END //!< End of program. }; @@ -369,6 +372,10 @@ struct ROSE_STRUCT_SUFFIXES_EOD { u8 code; //!< From enum RoseInstructionCode. }; +struct ROSE_STRUCT_MATCHER_EOD { + u8 code; //!< From enum RoseInstructionCode. +}; + struct ROSE_STRUCT_END { u8 code; //!< From enum RoseInstructionCode. };