diff --git a/src/rose/match.c b/src/rose/match.c index b2b0238d..9163cba3 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -573,3 +573,39 @@ hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, assert(id < rose->literalCount); return roseRunProgram(rose, scratch, programs[id], real_end, match_len, 0); } + +/** + * \brief Execute a boundary report program. + * + * Returns MO_HALT_MATCHING if the stream is exhausted or the user has + * instructed us to halt, or MO_CONTINUE_MATCHING otherwise. + */ +int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, + u64a stream_offset, struct hs_scratch *scratch) { + DEBUG_PRINTF("running boundary program at offset %u\n", program); + + if (can_stop_matching(scratch)) { + DEBUG_PRINTF("can stop matching\n"); + return MO_HALT_MATCHING; + } + + if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) { + /* we cannot delay the initialization of the som deduper logs any longer + * as we are reporting matches. This is done explicitly as we are + * shortcutting the som handling in the vacuous repeats as we know they + * all come from non-som patterns. */ + fatbit_clear(scratch->deduper.som_log[0]); + fatbit_clear(scratch->deduper.som_log[1]); + scratch->deduper.som_log_dirty = 0; + } + + const size_t match_len = 0; + const char in_anchored = 0; + hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, stream_offset, + match_len, in_anchored); + if (rv == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + + return MO_CONTINUE_MATCHING; +} diff --git a/src/rose/rose.h b/src/rose/rose.h index 5dc57bc2..a9058379 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -131,4 +131,7 @@ void roseEodExec(const struct RoseEngine *t, u64a offset, hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, void *context); +int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, + u64a stream_offset, struct hs_scratch *scratch); + #endif // ROSE_H diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index c2d9bcce..bb11b1a7 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -1941,70 +1941,6 @@ struct DerivedBoundaryReports { set report_at_0_eod_full; }; -static -void reserveBoundaryReports(const BoundaryReports &boundary, - const DerivedBoundaryReports &dboundary, - RoseBoundaryReports *out, u32 *currOffset) { - u32 curr = *currOffset; - curr = ROUNDUP_N(curr, alignof(ReportID)); - memset(out, 0, sizeof(*out)); - - /* report lists are + 1 in size due to terminator */ - if (!boundary.report_at_eod.empty()) { - out->reportEodOffset = curr; - curr += sizeof(ReportID) * (boundary.report_at_eod.size() + 1); - } - if (!boundary.report_at_0.empty()) { - out->reportZeroOffset = curr; - curr += sizeof(ReportID) * (boundary.report_at_0.size() + 1); - } - if (!dboundary.report_at_0_eod_full.empty()) { - out->reportZeroEodOffset = curr; - curr += sizeof(ReportID) * (dboundary.report_at_0_eod_full.size() + 1); - } - - DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size()); - DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); - DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); - - *currOffset = curr; -} - -static -void fillInBoundaryReports(RoseEngine *engine, u32 offset, - const set &rl) { - if (rl.empty()) { - return; - } - - u32 *out = (u32 *)((char *)engine + offset); - assert(ISALIGNED(out)); - - for (ReportID r : rl) { - *out = r; - ++out; - } - - *out = MO_INVALID_IDX; -} - -static -void populateBoundaryReports(RoseEngine *engine, - const BoundaryReports &boundary, - const DerivedBoundaryReports &dboundary, - const RoseBoundaryReports &offsets) { - engine->boundary.reportEodOffset = offsets.reportEodOffset; - engine->boundary.reportZeroOffset = offsets.reportZeroOffset; - engine->boundary.reportZeroEodOffset = offsets.reportZeroEodOffset; - - fillInBoundaryReports(engine, offsets.reportEodOffset, - boundary.report_at_eod); - fillInBoundaryReports(engine, offsets.reportZeroOffset, - boundary.report_at_0); - fillInBoundaryReports(engine, offsets.reportZeroEodOffset, - dboundary.report_at_0_eod_full); -} - static void fillInReportInfo(RoseEngine *engine, u32 reportOffset, const ReportManager &rm, const vector &reports) { @@ -2917,6 +2853,43 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, return program; } +static +u32 writeBoundaryProgram(RoseBuildImpl &build, build_context &bc, + const set &reports) { + if (reports.empty()) { + return 0; + } + + const bool has_som = false; + vector program; + for (const auto &id : reports) { + makeReport(build, bc, id, has_som, program); + } + return writeProgram(bc, flattenProgram({program})); +} + +static +RoseBoundaryReports +makeBoundaryPrograms(RoseBuildImpl &build, build_context &bc, + const BoundaryReports &boundary, + const DerivedBoundaryReports &dboundary) { + RoseBoundaryReports out; + memset(&out, 0, sizeof(out)); + + DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size()); + DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); + DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); + + out.reportEodOffset = + writeBoundaryProgram(build, bc, boundary.report_at_eod); + out.reportZeroOffset = + writeBoundaryProgram(build, bc, boundary.report_at_0); + out.reportZeroEodOffset = + writeBoundaryProgram(build, bc, dboundary.report_at_0_eod_full); + + return out; +} + static void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { const auto &g = build.g; @@ -3788,6 +3761,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.resources.has_anchored = true; } + auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary); + // Build NFAs set no_retrigger_queues; bool mpv_as_outfix; @@ -3928,9 +3903,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 activeLeftCount = leftInfoTable.size(); u32 rosePrefixCount = countRosePrefixes(leftInfoTable); - RoseBoundaryReports boundary_out; - reserveBoundaryReports(boundary, dboundary, &boundary_out, &currOffset); - u32 rev_nfa_table_offset; vector rev_nfa_offsets; prepSomRevNfas(ssm, &rev_nfa_table_offset, &rev_nfa_offsets, &currOffset); @@ -4098,7 +4070,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->asize = verify_u32(asize); engine->ematcherRegionSize = ematcher_region_size; engine->floatingStreamState = verify_u32(floatingStreamStateRequired); - populateBoundaryReports(engine.get(), boundary, dboundary, boundary_out); + + engine->boundary.reportEodOffset = boundary_out.reportEodOffset; + engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset; + engine->boundary.reportZeroEodOffset = boundary_out.reportZeroEodOffset; write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index e9edbc15..37ff9168 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -235,14 +235,17 @@ struct RoseStateOffsets { }; struct RoseBoundaryReports { - u32 reportEodOffset; /**< 0 if no reports list, otherwise offset of - * MO_INVALID_IDX terminated list to report at EOD */ - u32 reportZeroOffset; /**< 0 if no reports list, otherwise offset of - * MO_INVALID_IDX terminated list to report at offset - * 0 */ - u32 reportZeroEodOffset; /**< 0 if no reports list, otherwise offset of - * MO_INVALID_IDX terminated list to report if eod - * is at offset 0. Superset of other lists. */ + /** \brief 0 if no reports list, otherwise offset of program to run to + * deliver reports at EOD. */ + u32 reportEodOffset; + + /** \brief 0 if no reports list, otherwise offset of program to run to + * deliver reports at offset 0. */ + u32 reportZeroOffset; + + /** \brief 0 if no reports list, otherwise offset of program to run to + * deliver reports if EOD is at offset 0. Superset of other programs. */ + u32 reportZeroEodOffset; }; /* NFA Queue Assignment diff --git a/src/runtime.c b/src/runtime.c index 5f357918..8a0cb9f9 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -260,57 +260,6 @@ SomNfaCallback selectOutfixSomAdaptor(const struct RoseEngine *rose) { return is_simple ? outfixSimpleSomSomAdaptor : outfixSomSomAdaptor; } -/** - * \brief Fire callbacks for a boundary report list. - * - * Returns MO_HALT_MATCHING if the user has instructed us to halt, and - * MO_CONTINUE_MATCHING otherwise. - */ - -static never_inline -int processReportList(const struct RoseEngine *rose, u32 base_offset, - u64a stream_offset, hs_scratch_t *scratch) { - DEBUG_PRINTF("running report list at offset %u\n", base_offset); - - if (told_to_stop_matching(scratch)) { - DEBUG_PRINTF("matching has been terminated\n"); - return MO_HALT_MATCHING; - } - - if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) { - /* we cannot delay the initialization of the som deduper logs any longer - * as we are reporting matches. This is done explicitly as we are - * shortcutting the som handling in the vacuous repeats as we know they - * all come from non-som patterns. */ - - fatbit_clear(scratch->deduper.som_log[0]); - fatbit_clear(scratch->deduper.som_log[1]); - scratch->deduper.som_log_dirty = 0; - } - - const ReportID *report = getByOffset(rose, base_offset); - - /* never required to do som as vacuous reports are always external */ - - if (rose->simpleCallback) { - for (; *report != MO_INVALID_IDX; report++) { - int rv = roseSimpleAdaptor(stream_offset, *report, scratch); - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } else { - for (; *report != MO_INVALID_IDX; report++) { - int rv = roseAdaptor(stream_offset, *report, scratch); - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - - return MO_CONTINUE_MATCHING; -} - /** \brief Initialise SOM state. Used in both block and streaming mode. */ static really_inline void initSomState(const struct RoseEngine *rose, char *state) { @@ -488,15 +437,15 @@ hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length, if (!length) { if (rose->boundary.reportZeroEodOffset) { - processReportList(rose, rose->boundary.reportZeroEodOffset, 0, - scratch); + roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0, + scratch); } goto set_retval; } if (rose->boundary.reportZeroOffset) { - int rv = processReportList(rose, rose->boundary.reportZeroOffset, 0, - scratch); + int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset, + 0, scratch); if (rv == MO_HALT_MATCHING) { goto set_retval; } @@ -559,8 +508,8 @@ done_scan: } if (rose->boundary.reportEodOffset) { - processReportList(rose, rose->boundary.reportEodOffset, length, - scratch); + roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length, + scratch); } set_retval: @@ -727,25 +676,28 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, getHistory(state, rose, id->offset), getHistoryAmount(rose, id->offset), id->offset, status, 0); + // Rose program execution (used for some report paths) depends on these + // values being initialised. + scratch->tctxt.lastMatchOffset = 0; + scratch->tctxt.minMatchOffset = id->offset; + if (rose->somLocationCount) { loadSomFromStream(scratch, id->offset); } if (!id->offset) { if (rose->boundary.reportZeroEodOffset) { - int rv = processReportList(rose, rose->boundary.reportZeroEodOffset, - 0, scratch); + int rv = roseRunBoundaryProgram( + rose, rose->boundary.reportZeroEodOffset, 0, scratch); if (rv == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; return; } } } else { if (rose->boundary.reportEodOffset) { - int rv = processReportList(rose, rose->boundary.reportEodOffset, - id->offset, scratch); + int rv = roseRunBoundaryProgram( + rose, rose->boundary.reportEodOffset, id->offset, scratch); if (rv == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; return; } } @@ -978,9 +930,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, if (!id->offset && rose->boundary.reportZeroOffset) { DEBUG_PRINTF("zero reports\n"); - processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); - if (unlikely(can_stop_matching(scratch))) { - DEBUG_PRINTF("stream is broken, halting scan\n"); + int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset, + 0, scratch); + if (rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("halting scan\n"); setStreamStatus(state, scratch->core_info.status); if (told_to_stop_matching(scratch)) { return HS_SCAN_TERMINATED;