From 7a6a47672345f7ccb62c160760fe119fcc209e98 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 9 Jun 2016 14:41:15 +1000 Subject: [PATCH] eod: move engine checks into ENGINES_EOD instr --- src/rose/eod.c | 63 ------------------- src/rose/program_runtime.h | 59 ++++++++++++++++++ src/rose/rose_build_bytecode.cpp | 102 ++++++++++++++++++++----------- src/rose/rose_dump.cpp | 6 +- src/rose/rose_internal.h | 3 - src/rose/rose_program.h | 9 +++ 6 files changed, 141 insertions(+), 101 deletions(-) diff --git a/src/rose/eod.c b/src/rose/eod.c index 32702bed..4961a728 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -122,65 +122,6 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset, return MO_CONTINUE_MATCHING; } -/** - * \brief Check for (and deliver) reports from active output-exposed (suffix - * or outfix) NFAs. - * - * \return MO_HALT_MATCHING if the user instructs us to stop. - */ -static rose_inline -int roseCheckNfaEod(const struct RoseEngine *t, struct hs_scratch *scratch, - u64a offset, const char is_streaming) { - if (!t->eodNfaIterOffset) { - DEBUG_PRINTF("no engines that report at EOD\n"); - return MO_CONTINUE_MATCHING; - } - - /* data, len is used for state decompress, should be full available data */ - u8 key = 0; - if (is_streaming) { - const u8 *eod_data = scratch->core_info.hbuf; - size_t eod_len = scratch->core_info.hlen; - key = eod_len ? eod_data[eod_len - 1] : 0; - } - - const u8 *aa = getActiveLeafArray(t, scratch->core_info.state); - const u32 aaCount = t->activeArrayCount; - - const struct mmbit_sparse_iter *it = getByOffset(t, t->eodNfaIterOffset); - assert(ISALIGNED(it)); - - u32 idx = 0; - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state); - qi != MMB_INVALID; - qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) { - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - const struct NFA *nfa = getNfaByInfo(t, info); - - DEBUG_PRINTF("checking nfa %u\n", qi); - assert(nfaAcceptsEod(nfa)); - - char *fstate = scratch->fullState + info->fullStateOffset; - const char *sstate = scratch->core_info.state + info->stateOffset; - - if (is_streaming) { - // Decompress stream state. - nfaExpandState(nfa, fstate, sstate, offset, key); - } - - if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor, - roseReportSomAdaptor, - scratch) == MO_HALT_MATCHING) { - DEBUG_PRINTF("user instructed us to stop\n"); - return MO_HALT_MATCHING; - } - } - - return MO_CONTINUE_MATCHING; -} - static rose_inline void cleanupAfterEodMatcher(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch) { @@ -269,10 +210,6 @@ void roseEodExec_i(const struct RoseEngine *t, u64a offset, return; } - if (roseCheckNfaEod(t, scratch, offset, is_streaming) == MO_HALT_MATCHING) { - return; - } - if (!t->eodIterProgramOffset && !t->ematcherOffset) { DEBUG_PRINTF("no eod accepts\n"); return; diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index a913ae27..a656c715 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -800,6 +800,57 @@ char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { return end >= min_bound && end <= max_bound; } +static rose_inline +hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a offset, + u32 iter_offset) { + const char is_streaming = rose->mode != HS_MODE_BLOCK; + + /* data, len is used for state decompress, should be full available data */ + u8 key = 0; + if (is_streaming) { + const u8 *eod_data = scratch->core_info.hbuf; + size_t eod_len = scratch->core_info.hlen; + key = eod_len ? eod_data[eod_len - 1] : 0; + } + + const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state); + const u32 aaCount = rose->activeArrayCount; + + const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset); + assert(ISALIGNED(it)); + + u32 idx = 0; + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state); + qi != MMB_INVALID; + qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) { + const struct NfaInfo *info = getNfaInfoByQueue(rose, qi); + const struct NFA *nfa = getNfaByInfo(rose, info); + + DEBUG_PRINTF("checking nfa %u\n", qi); + assert(nfaAcceptsEod(nfa)); + + char *fstate = scratch->fullState + info->fullStateOffset; + const char *sstate = scratch->core_info.state + info->stateOffset; + + if (is_streaming) { + // Decompress stream state. + nfaExpandState(nfa, fstate, sstate, offset, key); + } + + if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor, + roseReportSomAdaptor, + scratch) == MO_HALT_MATCHING) { + DEBUG_PRINTF("user instructed us to stop\n"); + return HWLM_TERMINATE_MATCHING; + } + } + + return HWLM_CONTINUE_MATCHING; +} + static void updateSeqPoint(struct RoseContext *tctxt, u64a offset, const char from_mpv) { @@ -1301,6 +1352,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { + if (roseEnginesEod(t, scratch, end, ri->iter_offset) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(END) { DEBUG_PRINTF("finished\n"); return HWLM_CONTINUE_MATCHING; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index ea602017..904f8df9 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -223,6 +223,7 @@ public: case ROSE_INSTR_CHECK_STATE: return &u.checkState; case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin; case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; + case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod; case ROSE_INSTR_END: return &u.end; } assert(0); @@ -269,6 +270,7 @@ public: case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState); case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin); case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); + case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod); case ROSE_INSTR_END: return sizeof(u.end); } assert(0); @@ -314,6 +316,7 @@ public: ROSE_STRUCT_CHECK_STATE checkState; ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin; ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; + ROSE_STRUCT_ENGINES_EOD enginesEod; ROSE_STRUCT_END end; } u; @@ -3532,7 +3535,7 @@ u32 addPredBlocks(build_context &bc, * Returns the pair (program offset, sparse iter offset). */ static -pair makeSparseIterProgram(build_context &bc, +vector makeSparseIterProgram(build_context &bc, map>> &predProgramLists, const vector &root_program, const vector &pre_program) { @@ -3548,7 +3551,7 @@ pair makeSparseIterProgram(build_context &bc, // Add blocks to deal with non-root edges (triggered by sparse iterator or // mmbit_isset checks). This operation will flatten the program up to this // point. - u32 iter_offset = addPredBlocks(bc, predProgramLists, program, false); + addPredBlocks(bc, predProgramLists, program, false); // If we have a root program, replace the END instruction with it. Note // that the root program has already been flattened. @@ -3559,8 +3562,7 @@ pair makeSparseIterProgram(build_context &bc, program.insert(end(program), begin(root_program), end(root_program)); } - applyFinalSpecialisation(program); - return {writeProgram(bc, program), iter_offset}; + return program; } static @@ -3778,8 +3780,9 @@ vector buildLitInitialProgram(RoseBuildImpl &build, } static -u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, - const vector &lit_edges) { +vector buildLiteralProgram(RoseBuildImpl &build, + build_context &bc, u32 final_id, + const vector &lit_edges) { const auto &g = build.g; DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size()); @@ -3831,7 +3834,19 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, // Put it all together. return makeSparseIterProgram(bc, predProgramLists, root_program, - pre_program).first; + pre_program); +} + +static +u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id, + const vector &lit_edges) { + auto program = buildLiteralProgram(build, bc, final_id, lit_edges); + if (program.empty()) { + return 0; + } + // Note: already flattened. + applyFinalSpecialisation(program); + return writeProgram(bc, program); } static @@ -3904,7 +3919,7 @@ pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { const auto &lit_edges = lit_edge_map[finalId]; litPrograms[finalId] = - buildLiteralProgram(build, bc, finalId, lit_edges); + writeLiteralProgram(build, bc, finalId, lit_edges); delayRebuildPrograms[finalId] = buildDelayRebuildProgram(build, bc, finalId); } @@ -4020,33 +4035,53 @@ pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { } static -u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) { - if (build.eod_event_literal_id == MO_INVALID_IDX) { +u32 writeEodProgram(RoseBuildImpl &build, build_context &bc, + u32 eodNfaIterOffset) { + vector program; + + if (build.eod_event_literal_id != MO_INVALID_IDX) { + const RoseGraph &g = build.g; + const auto &lit_info = + build.literal_info.at(build.eod_event_literal_id); + assert(lit_info.delayed_ids.empty()); + assert(!lit_info.squash_group); + assert(!lit_info.requires_benefits); + + // Collect all edges leading into EOD event literal vertices. + vector edge_list; + for (const auto &v : lit_info.vertices) { + for (const auto &e : in_edges_range(v, g)) { + edge_list.push_back(e); + } + } + + // Sort edge list for determinism, prettiness. + sort(begin(edge_list), end(edge_list), + [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].idx, g[target(a, g)].idx) < + tie(g[source(b, g)].idx, g[target(b, g)].idx); + }); + + program = buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); + } + + if (eodNfaIterOffset) { + auto ri = RoseInstruction(ROSE_INSTR_ENGINES_EOD); + ri.u.enginesEod.iter_offset = eodNfaIterOffset; + if (!program.empty()) { + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + } + program.push_back(move(ri)); + program = flattenProgram({program}); + } + + if (program.empty()) { return 0; } - const RoseGraph &g = build.g; - const auto &lit_info = build.literal_info.at(build.eod_event_literal_id); - assert(lit_info.delayed_ids.empty()); - assert(!lit_info.squash_group); - assert(!lit_info.requires_benefits); - - // Collect all edges leading into EOD event literal vertices. - vector edge_list; - for (const auto &v : lit_info.vertices) { - for (const auto &e : in_edges_range(v, g)) { - edge_list.push_back(e); - } - } - - // Sort edge list for determinism, prettiness. - sort(begin(edge_list), end(edge_list), - [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].idx, g[target(a, g)].idx) < - tie(g[source(b, g)].idx, g[target(b, g)].idx); - }); - - return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list); + applyFinalSpecialisation(program); + return writeProgram(bc, program); } static @@ -4210,7 +4245,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { tie(litProgramOffset, litDelayRebuildProgramOffset) = buildLiteralPrograms(*this, bc); - u32 eodProgramOffset = writeEodProgram(*this, bc); + u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset); u32 eodIterProgramOffset; u32 eodIterOffset; tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc); @@ -4412,7 +4447,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->eodProgramOffset = eodProgramOffset; engine->eodIterProgramOffset = eodIterProgramOffset; engine->eodIterOffset = eodIterOffset; - engine->eodNfaIterOffset = eodNfaIterOffset; engine->lastByteHistoryIterOffset = lastByteOffset; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index ad776780..59f7f751 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -476,6 +476,11 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(ENGINES_EOD) { + os << " iter_offset " << ri->iter_offset << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(END) { return; } PROGRAM_NEXT_INSTRUCTION @@ -1022,7 +1027,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, eodProgramOffset); DUMP_U32(t, eodIterProgramOffset); DUMP_U32(t, eodIterOffset); - DUMP_U32(t, eodNfaIterOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); DUMP_U32(t, minWidthExcludingBoundaries); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index bbe0b1b6..2e921542 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -380,9 +380,6 @@ struct RoseEngine { u32 eodIterProgramOffset; // or 0 if no eod iterator program u32 eodIterOffset; // offset to EOD sparse iter or 0 if none - /** \brief Offset to sparse iter over outfix/suffix NFAs that accept EOD. */ - u32 eodNfaIterOffset; - u32 lastByteHistoryIterOffset; // if non-zero /** \brief Minimum number of bytes required to match. */ diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 5c57bf54..b8961117 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -96,6 +96,10 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. + + /** \brief Check outfixes and suffixes for EOD and fire reports if so. */ + ROSE_INSTR_ENGINES_EOD, + ROSE_INSTR_END //!< End of program. }; @@ -352,6 +356,11 @@ struct ROSE_STRUCT_SPARSE_ITER_NEXT { u32 fail_jump; //!< Jump forward this many bytes on failure. }; +struct ROSE_STRUCT_ENGINES_EOD { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. +}; + struct ROSE_STRUCT_END { u8 code; //!< From enum RoseInstructionCode. };