diff --git a/src/nfa/callback.h b/src/nfa/callback.h index 0284f1d5..9bdaa8d1 100644 --- a/src/nfa/callback.h +++ b/src/nfa/callback.h @@ -37,24 +37,26 @@ /** \brief The type for an NFA callback. * - * This is a function that takes as arguments the current offset where the - * match occurs, the id of the match and the context pointer that was passed - * into the NFA API function that executed the NFA. + * This is a function that takes as arguments the current start and end offsets + * where the match occurs, the id of the match and the context pointer that was + * passed into the NFA API function that executed the NFA. * - * The offset where the match occurs will be the offset after the character - * that caused the match. Thus, if we have a buffer containing 'abc', then a - * pattern that matches an empty string will have an offset of 0, a pattern - * that matches 'a' will have an offset of 1, and a pattern that matches 'abc' - * will have an offset of 3, which will be a value that is 'beyond' the size of - * the buffer. That is, if we have n characters in the buffer, there are n+1 - * different potential offsets for matches. + * The start offset is the "start of match" (SOM) offset for the match. It is + * only provided by engines that natively support SOM tracking (e.g. Gough). + * + * The end offset will be the offset after the character that caused the match. + * Thus, if we have a buffer containing 'abc', then a pattern that matches an + * empty string will have an offset of 0, a pattern that matches 'a' will have + * an offset of 1, and a pattern that matches 'abc' will have an offset of 3, + * which will be a value that is 'beyond' the size of the buffer. That is, if + * we have n characters in the buffer, there are n+1 different potential + * offsets for matches. * * This function should return an int - currently the possible return values * are 0, which means 'stop running the engine' or non-zero, which means * 'continue matching'. */ -typedef int (*NfaCallback)(u64a from_offset, u64a to_offset, ReportID id, - void *context); +typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context); /** * standard \ref NfaCallback return value indicating that engine execution diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 9a075d17..017a6bf0 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -281,15 +281,14 @@ restart: /* for use by mpv (chained) only */ static -int roseNfaFinalBlastAdaptor(u64a som, u64a offset, ReportID id, - void *context) { +int roseNfaFinalBlastAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; + assert(scratch && scratch->magic == SCRATCH_MAGIC); const struct RoseEngine *t = scratch->core_info.rose; - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", - offset, id); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); - int cb_rv = roseNfaRunProgram(t, scratch, som, offset, id, 1); + int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, 1); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { @@ -449,35 +448,35 @@ char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) { } static -int roseNfaBlastAdaptor(u64a som, u64a offset, ReportID id, void *context) { +int roseNfaBlastAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; - struct RoseContext *tctxt = &scratch->tctxt; + assert(scratch && scratch->magic == SCRATCH_MAGIC); const struct RoseEngine *t = scratch->core_info.rose; - DEBUG_PRINTF("masky got himself a blasted match @%llu id %u !woot!\n", - offset, id); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); const char from_mpv = in_mpv(t, scratch); - int cb_rv = roseNfaRunProgram(t, scratch, som, offset, id, from_mpv); + int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, from_mpv); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(t, tctxt->curr_qi, + return !roseSuffixIsExhausted(t, scratch->tctxt.curr_qi, scratch->core_info.exhaustionVector); } } -int roseNfaAdaptor(u64a from_offset, u64a offset, ReportID id, - void *context) { +int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; - DEBUG_PRINTF("masky got himself a match @%llu id %u !woot!\n", offset, id); + assert(scratch && scratch->magic == SCRATCH_MAGIC); + + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); /* must be a external report as haig cannot directly participate in chain */ - return roseNfaRunProgram(scratch->core_info.rose, scratch, from_offset, - offset, id, 0); + return roseNfaRunProgram(scratch->core_info.rose, scratch, start, end, id, + 0); } static really_inline diff --git a/src/rose/match.c b/src/rose/match.c index eb8def9b..2b05fd76 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -211,8 +211,9 @@ event_enqueued: return HWLM_CONTINUE_MATCHING; } -int roseAnchoredCallback(u64a som, u64a end, u32 id, void *ctx) { +int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { struct hs_scratch *scratch = ctx; + assert(scratch && scratch->magic == SCRATCH_MAGIC); struct RoseContext *tctxt = &scratch->tctxt; struct core_info *ci = &scratch->core_info; const struct RoseEngine *t = ci->rose; @@ -244,7 +245,7 @@ int roseAnchoredCallback(u64a som, u64a end, u32 id, void *ctx) { const u32 *programs = getByOffset(t, t->litProgramOffset); assert(id < t->literalCount); const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED; - if (roseRunProgram(t, scratch, programs[id], som, real_end, match_len, + if (roseRunProgram(t, scratch, programs[id], start, real_end, match_len, flags) == HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(scratch)); DEBUG_PRINTF("caller requested termination\n"); @@ -647,11 +648,12 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, return MO_CONTINUE_MATCHING; } -int roseReportAdaptor(u64a som, u64a offset, ReportID id, void *context) { - DEBUG_PRINTF("som=%llu, offset=%llu, id=%u\n", som, offset, id); +int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; assert(scratch && scratch->magic == SCRATCH_MAGIC); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); + const struct RoseEngine *rose = scratch->core_info.rose; // Our match ID is the program offset. @@ -659,7 +661,7 @@ int roseReportAdaptor(u64a som, u64a offset, ReportID id, void *context) { const size_t match_len = 0; // Unused in this path. const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; hwlmcb_rv_t rv = - roseRunProgram(rose, scratch, program, som, offset, match_len, flags); + roseRunProgram(rose, scratch, program, start, end, match_len, flags); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } diff --git a/src/rose/match.h b/src/rose/match.h index 49afa588..b69ff158 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -48,7 +48,7 @@ /* Callbacks, defined in catchup.c */ -int roseNfaAdaptor(u64a from_offset, u64a offset, ReportID id, void *context); +int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context); /* Callbacks, defined in match.c */ @@ -56,7 +56,7 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctx); hwlmcb_rv_t roseFloatingCallback(size_t start, size_t end, u32 id, void *ctx); hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, void *ctx); -int roseAnchoredCallback(u64a som, u64a end, u32 id, void *ctx); +int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx); /* Common code, used all over Rose runtime */ diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 7669103f..23532d40 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -33,10 +33,11 @@ #include "program_runtime.h" -int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, +int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id, void *context) { + assert(context); u64a *som = context; - *som = MIN(*som, from_offset); + *som = MIN(*som, start); return MO_CONTINUE_MATCHING; } diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index e90395fb..fe71772e 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -702,8 +702,8 @@ int roseCheckLookaround(const struct RoseEngine *t, return 1; } -int roseNfaEarliestSom(u64a from_offset, u64a offset, ReportID id, - void *context); +int roseNfaEarliestSom(u64a start, u64a end, ReportID id, void *context); + static rose_inline u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch, const u32 qi, UNUSED const u32 leftfixLag) { diff --git a/src/rose/rose.h b/src/rose/rose.h index ecf16854..280e3bd5 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -49,7 +49,7 @@ void roseStreamEodExec(const struct RoseEngine *t, u64a offset, hwlmcb_rv_t rosePureLiteralCallback(size_t start, size_t end, u32 id, void *context); -int roseReportAdaptor(u64a som, u64a offset, ReportID id, void *context); +int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context); int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, u64a stream_offset, struct hs_scratch *scratch); diff --git a/src/som/som_runtime.c b/src/som/som_runtime.c index b9972b2c..1a868efc 100644 --- a/src/som/som_runtime.c +++ b/src/som/som_runtime.c @@ -87,14 +87,14 @@ char ok_and_mark_if_unset(u8 *som_store_valid, struct fatbit *som_set_now, } static -int somRevCallback(UNUSED u64a som, u64a offset, ReportID id, void *ctx) { - DEBUG_PRINTF("offset=%llu, id=%u\n", offset, id); +int somRevCallback(UNUSED u64a start, u64a end, ReportID id, void *ctx) { + DEBUG_PRINTF("offset=%llu, id=%u\n", end, id); // We use the id to store the offset adjustment (for assertions like a // leading \b or multiline mode). assert(id <= 1); u64a *from_offset = ctx; - LIMIT_TO_AT_MOST(from_offset, offset + id); + LIMIT_TO_AT_MOST(from_offset, end + id); return 1; // continue matching. }