From b2ebdac642d4d1b16fcbdea7ce9d300d19488f67 Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Thu, 10 Dec 2015 11:41:47 +1100 Subject: [PATCH] rose: Extend program to handle literals, iterators - cleanups - add sparse iter instructions - merge "root" and "sparse iter" programs together - move program execution to new file program_runtime.h - simplify EOD execution --- CMakeLists.txt | 1 + src/rose/eod.c | 84 +-- src/rose/match.c | 1173 +----------------------------- src/rose/match.h | 5 - src/rose/program_runtime.h | 1081 +++++++++++++++++++++++++++ src/rose/rose.h | 48 +- src/rose/rose_build_bytecode.cpp | 417 ++++++----- src/rose/rose_build_compile.cpp | 7 + src/rose/rose_build_dump.cpp | 1 - src/rose/rose_dump.cpp | 116 ++- src/rose/rose_internal.h | 34 +- src/rose/rose_program.h | 70 +- 12 files changed, 1534 insertions(+), 1503 deletions(-) create mode 100644 src/rose/program_runtime.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f10e5cb5..4034b14b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -467,6 +467,7 @@ set (hs_exec_SRCS src/rose/match.h src/rose/match.c src/rose/miracle.h + src/rose/program_runtime.h src/rose/runtime.h src/rose/rose.h src/rose/rose_internal.h diff --git a/src/rose/eod.c b/src/rose/eod.c index dec07b54..014b51ca 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -28,6 +28,7 @@ #include "catchup.h" #include "match.h" +#include "program_runtime.h" #include "rose.h" #include "util/fatbit.h" @@ -107,43 +108,18 @@ hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset, } static rose_inline -int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset, +int roseEodRunIterator(const struct RoseEngine *t, u64a offset, struct hs_scratch *scratch) { - if (!t->eodIterOffset) { + if (!t->eodIterProgramOffset) { return MO_CONTINUE_MATCHING; } - DEBUG_PRINTF("running eod iterator at offset %u\n", t->eodIterOffset); + DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset); - const u32 *programTable = getByOffset(t, t->eodProgramTableOffset); - const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset); - assert(ISALIGNED(programTable)); - assert(ISALIGNED(it)); - - // Sparse iterator state was allocated earlier - struct mmbit_sparse_state *s = scratch->sparse_iter_state; - struct fatbit *handled_roles = scratch->handled_roles; - - const u32 numStates = t->rolesWithStateCount; - - void *role_state = getRoleState(state); - u32 idx = 0; - u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s); - - fatbit_clear(handled_roles); - - int work_done = 0; // not read from in this path. - - for (; i != MMB_INVALID; - i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { - DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx); - u32 programOffset = programTable[idx]; - u64a som = 0; - if (roseRunRoleProgram(t, programOffset, offset, &som, - &(scratch->tctxt), - &work_done) == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } + int work_done = 0; + if (roseRunProgram(t, t->eodIterProgramOffset, offset, &(scratch->tctxt), 0, + &work_done) == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; } return MO_CONTINUE_MATCHING; @@ -236,6 +212,27 @@ void roseCheckEodSuffixes(const struct RoseEngine *t, u8 *state, u64a offset, } } +static rose_inline +int roseRunEodProgram(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + if (!t->eodProgramOffset) { + return MO_CONTINUE_MATCHING; + } + + DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); + + // There should be no pending delayed literals. + assert(!scratch->tctxt.filledDelayedSlots); + + int work_done = 0; + if (roseRunProgram(t, t->eodProgramOffset, offset, &scratch->tctxt, 0, + &work_done) == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + + return MO_CONTINUE_MATCHING; +} + static really_inline void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, struct hs_scratch *scratch, const char is_streaming) { @@ -244,31 +241,20 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, assert(!scratch->core_info.buf || !scratch->core_info.hbuf); assert(!can_stop_matching(scratch)); - // Fire the special EOD event literal. - if (t->hasEodEventLiteral) { - DEBUG_PRINTF("firing eod event id %u at offset %llu\n", - t->eodLiteralId, offset); - const struct core_info *ci = &scratch->core_info; - size_t len = ci->buf ? ci->len : ci->hlen; - assert(len || !ci->buf); /* len may be 0 if no history is required - * (bounds checks only can lead to this) */ - - roseRunEvent(len, t->eodLiteralId, &scratch->tctxt); - if (can_stop_matching(scratch)) { - DEBUG_PRINTF("user told us to stop\n"); - return; - } + // Run the unconditional EOD program. + if (roseRunEodProgram(t, offset, scratch) == MO_HALT_MATCHING) { + return; } roseCheckNfaEod(t, state, scratch, offset, is_streaming); - if (!t->eodIterOffset && !t->ematcherOffset) { + if (!t->eodIterProgramOffset && !t->ematcherOffset) { DEBUG_PRINTF("no eod accepts\n"); return; } // Handle pending EOD reports. - int itrv = roseEodRunIterator(t, state, offset, scratch); + int itrv = roseEodRunIterator(t, offset, scratch); if (itrv == MO_HALT_MATCHING) { return; } @@ -288,7 +274,7 @@ void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset, cleanupAfterEodMatcher(t, state, offset, scratch); // Fire any new EOD reports. - roseEodRunIterator(t, state, offset, scratch); + roseEodRunIterator(t, offset, scratch); roseCheckEodSuffixes(t, state, offset, scratch); } diff --git a/src/rose/match.c b/src/rose/match.c index 591abcfb..1c688aab 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -31,6 +31,7 @@ #include "infix.h" #include "match.h" #include "miracle.h" +#include "program_runtime.h" #include "rose_program.h" #include "rose.h" #include "som/som_runtime.h" @@ -219,8 +220,7 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx\n", - id, tl->minDepth, tl->groups); + DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups); pushDelayedMatches(tl, real_end, tctx); @@ -230,86 +230,6 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, return tctx->groups; } -static really_inline -hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, s64a loc, - char is_mpv, char in_anchored, - char in_catchup) { - struct RoseContext *tctxt = &scratch->tctxt; - u8 *aa = getActiveLeafArray(t, tctxt->state); - struct fatbit *activeQueues = scratch->aqa; - u32 aaCount = t->activeArrayCount; - u32 qCount = t->queueCount; - - struct mq *q = &scratch->queues[qi]; - DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n", - q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset); - if (q_cur_loc(q) == loc) { - /* too many tops enqueued at the one spot; need to flatten this queue. - * We can use the full catchups as it will short circuit as we are - * already at this location. It also saves waking everybody up */ - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExec(q->nfa, q, loc); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else if (!in_catchup) { - if (is_mpv) { - tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (loc + scratch->core_info.buf_offset - <= tctxt->minNonMpvMatchOffset) { - DEBUG_PRINTF("flushing chained\n"); - if (roseCatchUpMPV(t, tctxt->state, loc, scratch) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - goto done_queue_empty; - } - } - - if (roseCatchUpTo(t, tctxt->state, loc + scratch->core_info.buf_offset, - scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } else { - /* we must be a chained nfa */ - assert(is_mpv); - DEBUG_PRINTF("flushing chained\n"); - tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (roseCatchUpMPV(t, tctxt->state, loc, scratch) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } -done_queue_empty: - if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, tctxt); - nfaQueueInitState(q->nfa, q); - pushQueueAt(q, 0, MQE_START, loc); - fatbit_set(activeQueues, qCount, qi); - } - - assert(!isQueueFull(q)); - - if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { - if (!scratch->core_info.broken) { - scratch->core_info.broken = BROKEN_EXHAUSTED; - } - tctxt->groups = 0; - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; -} - -static really_inline -hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, s64a loc, - char in_anchored) { - return ensureQueueFlushed_i(t, scratch, qi, loc, 0, in_anchored, 0); -} - static really_inline hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, s64a loc, @@ -318,73 +238,6 @@ hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, in_chained); } -static rose_inline -hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, - u32 qi, u32 top, u64a som, - u64a end, struct RoseContext *tctxt, - char in_anchored) { - DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); - - u8 *aa = getActiveLeafArray(t, tctxt->state); - struct hs_scratch *scratch = tctxtToScratch(tctxt); - const u32 aaCount = t->activeArrayCount; - const u32 qCount = t->queueCount; - struct mq *q = &scratch->queues[qi]; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - const struct NFA *nfa = getNfaByInfo(t, info); - - struct core_info *ci = &scratch->core_info; - s64a loc = (s64a)end - ci->buf_offset; - assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); - - if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, tctxt); - nfaQueueInitState(nfa, q); - pushQueueAt(q, 0, MQE_START, loc); - fatbit_set(scratch->aqa, qCount, qi); - } else if (info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - /* nfa only needs one top; we can go home now */ - return HWLM_CONTINUE_MATCHING; - } else if (!fatbit_set(scratch->aqa, qCount, qi)) { - initQueue(q, qi, t, tctxt); - loadStreamState(nfa, q, 0); - pushQueueAt(q, 0, MQE_START, 0); - } else if (isQueueFull(q)) { - DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); - if (info->eod) { - /* can catch up suffix independently no pq */ - q->context = NULL; - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else if (ensureQueueFlushed(t, scratch, qi, loc, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - - assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); - pushQueueSom(q, top, loc, som); - - if (q_cur_loc(q) == (s64a)ci->len && !info->eod) { - /* we may not run the nfa; need to ensure state is fine */ - DEBUG_PRINTF("empty run\n"); - pushQueueNoMerge(q, MQE_END, loc); - char alive = nfaQueueExec(nfa, q, loc); - if (alive) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else { - mmbit_unset(aa, aaCount, qi); - fatbit_unset(scratch->aqa, qCount, qi); - } - } - - return HWLM_CONTINUE_MATCHING; -} - static rose_inline void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId, u64a end) { @@ -432,44 +285,6 @@ void recordAnchoredLiteralMatch(struct RoseContext *tctxt, u32 literal_id, mmbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx); } -/* handles the firing of external matches */ -static rose_inline -hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, u8 *state, ReportID id, - u64a end, struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - assert(end == tctxt->minMatchOffset); - DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end); - updateLastMatchOffset(tctxt, end); - - int cb_rv = tctxt->cb(end, id, tctxt->userCtx); - if (cb_rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } - - if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { - if (!scratch->core_info.broken) { - scratch->core_info.broken = BROKEN_EXHAUSTED; - } - tctxt->groups = 0; - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; -} - hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r, u64a end, struct RoseContext *tctxt, char in_anchored, char in_catchup) { @@ -556,906 +371,6 @@ event_enqueued: return HWLM_CONTINUE_MATCHING; } -/* catches up engines enough to ensure any earlier mpv triggers are enqueued - * and then adds the trigger to the mpv queue. Must not be called during catch - * up */ -static rose_inline -hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, - u8 *state, ReportID r, u64a end, - struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - if (roseCatchUpMpvFeeders(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0); -} - -static rose_inline -hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, u8 *state, u64a end, - struct RoseContext *tctxt, char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - // In SOM processing, we may be able to limit or entirely avoid catchup. - - DEBUG_PRINTF("entry\n"); - - if (end == tctxt->minMatchOffset) { - DEBUG_PRINTF("already caught up\n"); - return HWLM_CONTINUE_MATCHING; - } - - DEBUG_PRINTF("catching up all NFAs\n"); - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - updateMinMatchOffset(tctxt, end); - return HWLM_CONTINUE_MATCHING; -} - -static really_inline -hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, u8 *state, ReportID id, - u64a end, struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end, - tctxt->minMatchOffset); - - // Reach into reports and handle internal reports that just manipulate SOM - // slots ourselves, rather than going through the callback. - - if (roseSomCatchup(t, state, end, tctxt, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - const struct internal_report *ri = getInternalReport(t, id); - handleSomInternal(scratch, ri, end); - - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, u8 *state, - ReportID id, u64a start, u64a end, - struct RoseContext *tctxt, char in_anchored) { - if (roseCatchUpTo(t, state, end, tctxtToScratch(tctxt), in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id, - start, end); - DEBUG_PRINTF(" last match %llu\n", tctxt->lastMatchOffset); - assert(end == tctxt->minMatchOffset); - - updateLastMatchOffset(tctxt, end); - int cb_rv = tctxt->cb_som(start, end, id, tctxt->userCtx); - if (cb_rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { - return HWLM_CONTINUE_MATCHING; - } - - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - if (isAllExhausted(t, ci->exhaustionVector)) { - if (!ci->broken) { - ci->broken = BROKEN_EXHAUSTED; - } - tctxt->groups = 0; - DEBUG_PRINTF("termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, u8 *state, ReportID id, - u64a start, u64a end, struct RoseContext *tctxt, - char in_anchored) { - DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n", - id, start, end, tctxt->minMatchOffset); - - // Reach into reports and handle internal reports that just manipulate SOM - // slots ourselves, rather than going through the callback. - - if (roseSomCatchup(t, state, end, tctxt, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - const struct internal_report *ri = getInternalReport(t, id); - setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end); - return HWLM_CONTINUE_MATCHING; -} - -static rose_inline -char rosePrefixCheckMiracles(const struct RoseEngine *t, - const struct LeftNfaInfo *left, - struct core_info *ci, struct mq *q, u64a end) { - if (left->transient) { - // Miracles won't help us with transient leftfix engines; they only - // scan for a limited time anyway. - return 1; - } - - if (!left->stopTable) { - return 1; - } - - DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex); - - const s64a begin_loc = q_cur_loc(q); - const s64a end_loc = end - ci->buf_offset; - - s64a miracle_loc; - if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { - goto found_miracle; - } - - if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc, - &miracle_loc)) { - goto found_miracle; - } - - return 1; - -found_miracle: - DEBUG_PRINTF("miracle at %lld\n", miracle_loc); - assert(miracle_loc >= begin_loc); - - // If we're a prefix, then a miracle effectively results in us needing to - // re-init our state and start fresh. - if (!left->infix) { - if (miracle_loc != begin_loc) { - DEBUG_PRINTF("re-init prefix state\n"); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, miracle_loc); - pushQueueAt(q, 1, MQE_TOP, miracle_loc); - nfaQueueInitState(q->nfa, q); - } - return 1; - } - - // Otherwise, we're an infix. Remove tops before the miracle from the queue - // and re-init at that location. - - q_skip_forward_to(q, miracle_loc); - - if (q_last_type(q) == MQE_START) { - DEBUG_PRINTF("miracle caused infix to die\n"); - return 0; - } - - DEBUG_PRINTF("re-init infix state\n"); - assert(q->items[q->cur].type == MQE_START); - q->items[q->cur].location = miracle_loc; - nfaQueueInitState(q->nfa, q); - - return 1; -} - -static really_inline -char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, - ReportID leftfixReport, u64a end, - struct RoseContext *tctxt) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - struct core_info *ci = &scratch->core_info; - - u32 ri = queueToLeftIndex(t, qi); - const struct LeftNfaInfo *left = getLeftTable(t) + ri; - - DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", - (left->transient ? "transient" : "active"), - (left->infix ? "infix" : "prefix"), - ri, qi, leftfixLag, left->maxLag); - - assert(leftfixLag <= left->maxLag); - - struct mq *q = scratch->queues + qi; - u32 qCount = t->queueCount; - u32 arCount = t->activeLeftCount; - - if (!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, ri)) { - DEBUG_PRINTF("engine is dead nothing to see here\n"); - return 0; - } - - if (unlikely(end < leftfixLag)) { - assert(0); /* lag is the literal length */ - return 0; - } - - if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset - && !fatbit_isset(scratch->aqa, qCount, qi) - && isZombie(t, tctxt->state, left)) { - DEBUG_PRINTF("zombie\n"); - return 1; - } - - if (!fatbit_set(scratch->aqa, qCount, qi)) { - DEBUG_PRINTF("initing q %u\n", qi); - initRoseQueue(t, qi, left, tctxt); - if (ci->buf_offset) { // there have been writes before us! - s32 sp; - if (left->transient) { - sp = -(s32)ci->hlen; - } else { - sp = -(s32)loadRoseDelay(t, tctxt->state, left); - } - - /* transient nfas are always started fresh -> state not maintained - * at stream boundary */ - - pushQueueAt(q, 0, MQE_START, sp); - if (left->infix || (ci->buf_offset + sp > 0 && !left->transient)) { - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 1, MQE_TOP, sp); - nfaQueueInitState(q->nfa, q); - } - } else { // first write ever - pushQueueAt(q, 0, MQE_START, 0); - pushQueueAt(q, 1, MQE_TOP, 0); - nfaQueueInitState(q->nfa, q); - } - } - - s64a loc = (s64a)end - ci->buf_offset - leftfixLag; - assert(loc >= q_cur_loc(q)); - assert(leftfixReport != MO_INVALID_IDX); - - if (left->transient) { - s64a start_loc = loc - left->transient; - if (q_cur_loc(q) < start_loc) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, start_loc); - pushQueueAt(q, 1, MQE_TOP, start_loc); - nfaQueueInitState(q->nfa, q); - } - } - - if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { - if (left->infix) { - if (infixTooOld(q, loc)) { - DEBUG_PRINTF("infix %u died of old age\n", ri); - scratch->tctxt.groups &= left->squash_mask; - mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); - return 0; - } - - reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - } - - if (!rosePrefixCheckMiracles(t, left, ci, q, end)) { - DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); - scratch->tctxt.groups &= left->squash_mask; - mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); - return 0; - } - -#ifdef DEBUG - debugQueue(q); -#endif - - pushQueueNoMerge(q, MQE_END, loc); - - char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); - if (!rv) { /* nfa is dead */ - DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); - mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); - assert(!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, - ri)); - tctxt->groups &= left->squash_mask; - return 0; - } - - // Queue must have next start loc before we call nfaInAcceptState. - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - - DEBUG_PRINTF("checking for report %u\n", leftfixReport); - DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); - return rv == MO_MATCHES_PENDING; - } else { - DEBUG_PRINTF("checking for report %u\n", leftfixReport); - char rv = nfaInAcceptState(q->nfa, leftfixReport, q); - DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); - return rv; - } -} - -static rose_inline -void roseSetRole(const struct RoseEngine *t, u8 *state, - struct RoseContext *tctxt, u32 stateIndex, u8 depth) { - DEBUG_PRINTF("state idx=%u, depth=%u\n", stateIndex, depth); - mmbit_set(getRoleState(state), t->rolesWithStateCount, stateIndex); - update_depth(tctxt, depth); -} - -static rose_inline -void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, - u32 topEvent, u8 cancel, struct RoseContext *tctxt) { - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - s64a loc = (s64a)end - ci->buf_offset; - - u32 ri = queueToLeftIndex(t, qi); - assert(topEvent < MQE_INVALID); - - const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); - assert(!left->transient); - - DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); - - struct mq *q = tctxtToScratch(tctxt)->queues + qi; - const struct NfaInfo *info = getNfaInfoByQueue(t, qi); - - u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state); - const u32 arCount = t->activeLeftCount; - char alive = mmbit_set(activeLeftArray, arCount, ri); - - if (alive && info->no_retrigger) { - DEBUG_PRINTF("yawn\n"); - return; - } - - struct fatbit *aqa = tctxtToScratch(tctxt)->aqa; - const u32 qCount = t->queueCount; - - if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && - !fatbit_isset(aqa, qCount, qi) && isZombie(t, tctxt->state, left)) { - DEBUG_PRINTF("yawn - zombie\n"); - return; - } - - if (cancel) { - DEBUG_PRINTF("dominating top: (re)init\n"); - fatbit_set(aqa, qCount, qi); - initRoseQueue(t, qi, left, tctxt); - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (!fatbit_set(aqa, qCount, qi)) { - DEBUG_PRINTF("initing %u\n", qi); - initRoseQueue(t, qi, left, tctxt); - if (alive) { - s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left); - pushQueueAt(q, 0, MQE_START, sp); - loadStreamState(q->nfa, q, sp); - } else { - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } - } else if (!alive) { - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - nfaQueueInitState(q->nfa, q); - } else if (isQueueFull(q)) { - reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); - - if (isQueueFull(q)) { - /* still full - reduceQueue did nothing */ - DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, - q->end - q->cur); - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); - - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } - } - - pushQueueSom(q, topEvent, loc, start); -} - -static really_inline -int reachHasBit(const u8 *reach, u8 c) { - return !!(reach[c / 8U] & (u8)1U << (c % 8U)); -} - -/** - * \brief Scan around a literal, checking that that "lookaround" reach masks - * are satisfied. - */ -static rose_inline -int roseCheckLookaround(const struct RoseEngine *t, u32 lookaroundIndex, - u32 lookaroundCount, u64a end, - struct RoseContext *tctxt) { - assert(lookaroundIndex != MO_INVALID_IDX); - assert(lookaroundCount > 0); - - const struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, - ci->buf_offset, ci->buf_offset + ci->len); - - const u8 *base = (const u8 *)t; - const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); - const s8 *look = look_base + lookaroundIndex; - const s8 *look_end = look + lookaroundCount; - assert(look < look_end); - - const u8 *reach_base = base + t->lookaroundReachOffset; - const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; - - // The following code assumes that the lookaround structures are ordered by - // increasing offset. - - const s64a base_offset = end - ci->buf_offset; - DEBUG_PRINTF("base_offset=%lld\n", base_offset); - DEBUG_PRINTF("first look has offset %d\n", *look); - - // If our first check tells us we need to look at an offset before the - // start of the stream, this role cannot match. - if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) { - DEBUG_PRINTF("too early, fail\n"); - return 0; - } - - // Skip over offsets that are before the history buffer. - do { - s64a offset = base_offset + *look; - if (offset >= -(s64a)ci->hlen) { - goto in_history; - } - DEBUG_PRINTF("look=%d before history\n", *look); - look++; - reach += REACH_BITVECTOR_LEN; - } while (look < look_end); - - // History buffer. - DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { - in_history: - ; - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= 0) { - DEBUG_PRINTF("in buffer\n"); - goto in_buffer; - } - - assert(offset >= -(s64a)ci->hlen && offset < 0); - u8 c = ci->hbuf[ci->hlen + offset]; - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - // Current buffer. - DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); - for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { - in_buffer: - ; - s64a offset = base_offset + *look; - DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); - - if (offset >= (s64a)ci->len) { - DEBUG_PRINTF("in the future\n"); - break; - } - - assert(offset >= 0 && offset < (s64a)ci->len); - u8 c = ci->buf[offset]; - if (!reachHasBit(reach, c)) { - DEBUG_PRINTF("char 0x%02x failed reach check\n", c); - return 0; - } - } - - DEBUG_PRINTF("OK :)\n"); - return 1; -} - -static -int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, - void *context) { - u64a *som = context; - *som = MIN(*som, from_offset); - return MO_CONTINUE_MATCHING; -} - -static rose_inline -u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, - UNUSED const u32 leftfixLag, - struct RoseContext *tctxt) { - u32 ri = queueToLeftIndex(t, qi); - - UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; - - DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", - left->transient ? "transient" : "active", ri, qi, - leftfixLag, left->maxLag); - - assert(leftfixLag <= left->maxLag); - - struct mq *q = tctxtToScratch(tctxt)->queues + qi; - - u64a start = ~0ULL; - - /* switch the callback + context for a fun one */ - q->som_cb = roseNfaEarliestSom; - q->context = &start; - - nfaReportCurrentMatches(q->nfa, q); - - /* restore the old callback + context */ - q->som_cb = roseNfaSomAdaptor; - q->context = NULL; - DEBUG_PRINTF("earliest som is %llu\n", start); - return start; -} - -static rose_inline -char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { - assert(max_bound <= ROSE_BOUND_INF); - assert(min_bound <= max_bound); - - if (end < min_bound) { - return 0; - } - return max_bound == ROSE_BOUND_INF || end <= max_bound; -} - -#define PROGRAM_CASE(name) \ - case ROSE_INSTR_##name: { \ - DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \ - const struct ROSE_STRUCT_##name *ri = \ - (const struct ROSE_STRUCT_##name *)pc; - -#define PROGRAM_NEXT_INSTRUCTION \ - pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ - break; \ - } - -static really_inline -hwlmcb_rv_t roseRunRoleProgram_i(const struct RoseEngine *t, u32 programOffset, - u64a end, u64a *som, struct RoseContext *tctxt, - char in_anchored, int *work_done) { - DEBUG_PRINTF("program begins at offset %u\n", programOffset); - - assert(programOffset); - assert(programOffset < t->size); - - const char *pc = getByOffset(t, programOffset); - - assert(*(const u8 *)pc != ROSE_INSTR_END); - - for (;;) { - assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); - u8 code = *(const u8 *)pc; - assert(code <= ROSE_INSTR_END); - - switch ((enum RoseInstructionCode)code) { - PROGRAM_CASE(ANCHORED_DELAY) { - if (in_anchored && end > t->floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("delay until playback\n"); - update_depth(tctxt, ri->depth); - tctxt->groups |= ri->groups; - *work_done = 1; - assert(ri->done_jump); // must progress - pc += ri->done_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_ONLY_EOD) { - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - if (end != ci->buf_offset + ci->len) { - DEBUG_PRINTF("should only match at end of data\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_BOUNDS) { - if (!in_anchored && - !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { - DEBUG_PRINTF("failed root bounds check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_NOT_HANDLED) { - struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles; - if (fatbit_set(handled, t->handledKeyCount, ri->key)) { - DEBUG_PRINTF("key %u already set\n", ri->key); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LOOKAROUND) { - if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) { - DEBUG_PRINTF("failed lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(CHECK_LEFTFIX) { - if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end, - tctxt)) { - DEBUG_PRINTF("failed lookaround check\n"); - assert(ri->fail_jump); // must progress - pc += ri->fail_jump; - continue; - } - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_ADJUST) { - assert(ri->distance <= end); - *som = end - ri->distance; - DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, *som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SOM_LEFTFIX) { - *som = roseGetHaigSom(t, ri->queue, ri->lag, tctxt); - DEBUG_PRINTF("som from leftfix is %llu\n", *som); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_INFIX) { - roseTriggerInfix(t, *som, end, ri->queue, ri->event, ri->cancel, - tctxt); - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(TRIGGER_SUFFIX) { - if (roseHandleSuffixTrigger(t, ri->queue, ri->event, *som, end, - tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT) { - if (roseHandleMatch(t, tctxt->state, ri->report, end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_CHAIN) { - if (roseCatchUpAndHandleChainMatch(t, tctxt->state, ri->report, - end, tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_EOD) { - if (tctxt->cb(end, ri->report, tctxt->userCtx) == - MO_HALT_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_INT) { - if (roseHandleSom(t, tctxt->state, ri->report, end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM) { - if (roseHandleSomSom(t, tctxt->state, ri->report, *som, end, - tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_SOM_KNOWN) { - if (roseHandleSomMatch(t, tctxt->state, ri->report, *som, end, - tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_STATE) { - roseSetRole(t, tctxt->state, tctxt, ri->index, ri->depth); - *work_done = 1; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(SET_GROUPS) { - tctxt->groups |= ri->groups; - DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, - tctxt->groups); - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(END) { - DEBUG_PRINTF("finished\n"); - return HWLM_CONTINUE_MATCHING; - } - PROGRAM_NEXT_INSTRUCTION - } - } - - assert(0); // unreachable - return HWLM_CONTINUE_MATCHING; -} - -#undef PROGRAM_CASE -#undef PROGRAM_NEXT_INSTRUCTION - -hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset, - u64a end, u64a *som, struct RoseContext *tctxt, - int *work_done) { - return roseRunRoleProgram_i(t, programOffset, end, som, tctxt, 0, - work_done); -} - -static really_inline -void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) { - assert(tl->squashesGroup); - - // we should be squashing a single group - assert(popcount64(tl->groups) == 1); - - DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n", - ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups); - - tctxt->groups &= ~tl->groups; -} - -// Run the sparse iterator for this literal and use that to discover which -// roles to consider. -/* Note: uses the stashed sparse iter state; cannot be called from - * anybody else who is using it */ -/* Note: uses the handled role mmbit; cannot be called from - * anybody else who is using it (nobody else should be) */ -/* non-root roles should not occur in any anchored context */ -static really_inline -hwlmcb_rv_t roseWalkSparseIterator(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - /* assert(!tctxt->in_anchored); */ - /* assert(!tctxt->in_anch_playback); */ - const u32 *iterProgram = getByOffset(t, tl->iterProgramOffset); - const struct mmbit_sparse_iter *it = getByOffset(t, tl->iterOffset); - assert(ISALIGNED(iterProgram)); - assert(ISALIGNED(it)); - - // Sparse iterator state was allocated earlier - struct mmbit_sparse_state *s = tctxtToScratch(tctxt)->sparse_iter_state; - struct fatbit *handled_roles = tctxtToScratch(tctxt)->handled_roles; - - const u32 numStates = t->rolesWithStateCount; - - void *role_state = getRoleState(tctxt->state); - u32 idx = 0; - int work_done = 0; // set to 1 if we actually process any roles - u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s); - - fatbit_clear(handled_roles); - - for (; i != MMB_INVALID; - i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) { - u32 programOffset = iterProgram[idx]; - DEBUG_PRINTF("pred state %u (iter idx=%u) is on -> program %u\n", i, - idx, programOffset); - - // If this bit is switched on in the sparse iterator, it must be - // driving a program. - assert(programOffset); - - u64a som = 0ULL; - if (roseRunRoleProgram_i(t, programOffset, end, &som, tctxt, 0, - &work_done) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } - - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - - return HWLM_CONTINUE_MATCHING; -} - -// Walk the set of root roles (roles with depth 1) associated with this literal -// and set them on. -static really_inline -char roseWalkRootRoles_i(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt, char in_anchored) { - if (!tl->rootProgramOffset) { - return 1; - } - - DEBUG_PRINTF("running literal root program at %u\n", tl->rootProgramOffset); - - u64a som = 0; - int work_done = 0; - - if (roseRunRoleProgram_i(t, tl->rootProgramOffset, end, &som, tctxt, - in_anchored, - &work_done) == HWLM_TERMINATE_MATCHING) { - return 0; - } - - // If we've actually handled any roles, we might need to apply this - // literal's squash mask to our groups as well. - if (work_done && tl->squashesGroup) { - roseSquashGroup(tctxt, tl); - } - - return 1; -} - -static never_inline -char roseWalkRootRoles_A(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - return roseWalkRootRoles_i(t, tl, end, tctxt, 1); -} - -static never_inline -char roseWalkRootRoles_N(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt) { - return roseWalkRootRoles_i(t, tl, end, tctxt, 0); -} - -static really_inline -char roseWalkRootRoles(const struct RoseEngine *t, - const struct RoseLiteral *tl, u64a end, - struct RoseContext *tctxt, char in_anchored, - char in_anch_playback) { - assert(!in_anch_playback || tl->rootProgramOffset); - if (!in_anch_playback && !tl->rootProgramOffset) { - return 1; - } - - if (in_anchored) { - return roseWalkRootRoles_A(t, tl, end, tctxt); - } else { - return roseWalkRootRoles_N(t, tl, end, tctxt); - } -} - /* handles catchup, som, cb, etc */ static really_inline hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, u8 *state, @@ -1553,31 +468,33 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx) { assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - assert(tl->rootProgramOffset); + assert(tl->programOffset); assert(!tl->delay_mask); - DEBUG_PRINTF("literal id=%u, minDepth=%u, groups=0x%016llx\n", id, - tl->minDepth, tl->groups); + DEBUG_PRINTF("literal id=%u, groups=0x%016llx\n", id, tl->groups); if (real_end <= t->floatingMinLiteralMatchOffset) { roseFlushLastByteHistory(t, state, real_end, tctxt); tctxt->lastEndOffset = real_end; } - /* anchored literals are root only */ - if (!roseWalkRootRoles(t, tl, real_end, tctxt, 1, 0)) { - rv = HWLM_TERMINATE_MATCHING; - } - - DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth, - tctxt->groups); - - if (rv == HWLM_TERMINATE_MATCHING) { + int work_done = 0; + if (roseRunProgram(t, tl->programOffset, real_end, tctxt, 1, &work_done) == + HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(tctxtToScratch(tctxt))); DEBUG_PRINTF("caller requested termination\n"); return MO_HALT_MATCHING; } + // If we've actually handled any roles, we might need to apply this + // literal's squash mask to our groups as well. + if (work_done && tl->squashesGroup) { + roseSquashGroup(tctxt, tl); + } + + DEBUG_PRINTF("DONE depth=%u, groups=0x%016llx\n", tctxt->depth, + tctxt->groups); + if (real_end > t->floatingMinLiteralMatchOffset) { recordAnchoredLiteralMatch(tctxt, id, real_end); } @@ -1623,8 +540,7 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, assert(id < t->literalCount); const struct RoseLiteral *tl = &getLiteralTable(t)[id]; - DEBUG_PRINTF("lit id=%u, minDepth=%u, groups=0x%016llx\n", id, tl->minDepth, - tl->groups); + DEBUG_PRINTF("lit id=%u, groups=0x%016llx\n", id, tl->groups); if (do_group_check && !(tl->groups & tctxt->groups)) { DEBUG_PRINTF("IGNORE: none of this literal's groups are set.\n"); @@ -1643,28 +559,21 @@ hwlmcb_rv_t roseProcessMatch_i(const struct RoseEngine *t, u64a end, u32 id, return HWLM_CONTINUE_MATCHING; } - if (tl->minDepth > tctxt->depth) { - DEBUG_PRINTF("IGNORE: minDepth=%u > %u\n", tl->minDepth, tctxt->depth); - goto root_roles; - } + int work_done = 0; - /* the depth checks will normally prevent roles without a spare iterator - * from reaching here (root roles) (and only root roles should be seen - * during anch play back). */ - assert(tl->iterOffset == ROSE_OFFSET_INVALID || !in_anch_playback); - if (tl->iterOffset != ROSE_OFFSET_INVALID && !in_anch_playback) { - hwlmcb_rv_t rv = roseWalkSparseIterator(t, tl, end, tctxt); - - if (rv == HWLM_TERMINATE_MATCHING) { + if (tl->programOffset) { + DEBUG_PRINTF("running program at %u\n", tl->programOffset); + if (roseRunProgram(t, tl->programOffset, end, tctxt, 0, &work_done) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } + } -root_roles: - // Process "root roles", i.e. depth 1 roles for this literal - if (!roseWalkRootRoles(t, tl, end, tctxt, 0 /* in_anchored */, - in_anch_playback)) { - return HWLM_TERMINATE_MATCHING; + // If we've actually handled any roles, we might need to apply this + // literal's squash mask to our groups as well. + if (work_done && tl->squashesGroup) { + roseSquashGroup(tctxt, tl); } return HWLM_CONTINUE_MATCHING; @@ -1966,31 +875,3 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctxt) { DEBUG_PRINTF("user requested halt\n"); return HWLM_TERMINATE_MATCHING; } - -// Specialised cut-down roseCallback for running ROSE_EVENT "literals", like the -// EOD one. -void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt) { - const struct RoseEngine *t = tctxt->t; - struct core_info *ci = &tctxtToScratch(tctxt)->core_info; - u64a real_end = ci->buf_offset - ci->hlen + end; - - DEBUG_PRINTF("EVENT id=%u offset=%llu\n", id, real_end); - - // Caller should guard against broken stream. - assert(!can_stop_matching(tctxtToScratch(tctxt))); - - // Shouldn't be here if we're a real literal with benefits. - assert(id >= t->nonbenefits_base_id); - - // At the moment, this path is only used for the EOD event. - assert(id == t->eodLiteralId); - - // There should be no pending delayed literals. - assert(!tctxt->filledDelayedSlots); - - // Note: we throw away the return value. - roseProcessMatch_i(t, real_end, id, tctxt, 0, 0, 0); - - DEBUG_PRINTF("DONE depth=%hhu, groups=0x%016llx\n", tctxt->depth, - tctxt->groups); -} diff --git a/src/rose/match.h b/src/rose/match.h index 6bcf781e..86e22183 100644 --- a/src/rose/match.h +++ b/src/rose/match.h @@ -55,7 +55,6 @@ hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctx); hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id, void *ctx); int roseAnchoredCallback(u64a end, u32 id, void *ctx); -void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt); /* Common code, used all over Rose runtime */ @@ -299,8 +298,4 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state, scratch->sparse_iter_state); } -hwlmcb_rv_t roseRunRoleProgram(const struct RoseEngine *t, u32 programOffset, - u64a end, u64a *som, struct RoseContext *tctxt, - int *work_done); - #endif diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h new file mode 100644 index 00000000..08dbff1f --- /dev/null +++ b/src/rose/program_runtime.h @@ -0,0 +1,1081 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef PROGRAM_RUNTIME_H +#define PROGRAM_RUNTIME_H + +#include "catchup.h" +#include "counting_miracle.h" +#include "infix.h" +#include "match.h" +#include "miracle.h" +#include "rose.h" +#include "rose_internal.h" +#include "rose_program.h" +#include "rose_types.h" +#include "runtime.h" +#include "scratch.h" +#include "ue2common.h" +#include "util/fatbit.h" +#include "util/multibit.h" + +static rose_inline +char rosePrefixCheckMiracles(const struct RoseEngine *t, + const struct LeftNfaInfo *left, + struct core_info *ci, struct mq *q, u64a end) { + if (left->transient) { + // Miracles won't help us with transient leftfix engines; they only + // scan for a limited time anyway. + return 1; + } + + if (!left->stopTable) { + return 1; + } + + DEBUG_PRINTF("looking for miracle on queue %u\n", q->nfa->queueIndex); + + const s64a begin_loc = q_cur_loc(q); + const s64a end_loc = end - ci->buf_offset; + + s64a miracle_loc; + if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) { + goto found_miracle; + } + + if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc, + &miracle_loc)) { + goto found_miracle; + } + + return 1; + +found_miracle: + DEBUG_PRINTF("miracle at %lld\n", miracle_loc); + assert(miracle_loc >= begin_loc); + + // If we're a prefix, then a miracle effectively results in us needing to + // re-init our state and start fresh. + if (!left->infix) { + if (miracle_loc != begin_loc) { + DEBUG_PRINTF("re-init prefix state\n"); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, miracle_loc); + pushQueueAt(q, 1, MQE_TOP, miracle_loc); + nfaQueueInitState(q->nfa, q); + } + return 1; + } + + // Otherwise, we're an infix. Remove tops before the miracle from the queue + // and re-init at that location. + + q_skip_forward_to(q, miracle_loc); + + if (q_last_type(q) == MQE_START) { + DEBUG_PRINTF("miracle caused infix to die\n"); + return 0; + } + + DEBUG_PRINTF("re-init infix state\n"); + assert(q->items[q->cur].type == MQE_START); + q->items[q->cur].location = miracle_loc; + nfaQueueInitState(q->nfa, q); + + return 1; +} + +static really_inline +hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, s64a loc, + char is_mpv, char in_anchored, + char in_catchup) { + struct RoseContext *tctxt = &scratch->tctxt; + u8 *aa = getActiveLeafArray(t, tctxt->state); + struct fatbit *activeQueues = scratch->aqa; + u32 aaCount = t->activeArrayCount; + u32 qCount = t->queueCount; + + struct mq *q = &scratch->queues[qi]; + DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n", + q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset); + if (q_cur_loc(q) == loc) { + /* too many tops enqueued at the one spot; need to flatten this queue. + * We can use the full catchups as it will short circuit as we are + * already at this location. It also saves waking everybody up */ + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExec(q->nfa, q, loc); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else if (!in_catchup) { + if (is_mpv) { + tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ + if (loc + scratch->core_info.buf_offset + <= tctxt->minNonMpvMatchOffset) { + DEBUG_PRINTF("flushing chained\n"); + if (roseCatchUpMPV(t, tctxt->state, loc, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + goto done_queue_empty; + } + } + + if (roseCatchUpTo(t, tctxt->state, loc + scratch->core_info.buf_offset, + scratch, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } else { + /* we must be a chained nfa */ + assert(is_mpv); + DEBUG_PRINTF("flushing chained\n"); + tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ + if (roseCatchUpMPV(t, tctxt->state, loc, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } +done_queue_empty: + if (!mmbit_set(aa, aaCount, qi)) { + initQueue(q, qi, t, tctxt); + nfaQueueInitState(q->nfa, q); + pushQueueAt(q, 0, MQE_START, loc); + fatbit_set(activeQueues, qCount, qi); + } + + assert(!isQueueFull(q)); + + if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { + if (!scratch->core_info.broken) { + scratch->core_info.broken = BROKEN_EXHAUSTED; + } + tctxt->groups = 0; + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, s64a loc, + char in_anchored) { + return ensureQueueFlushed_i(t, scratch, qi, loc, 0, in_anchored, 0); +} + +static rose_inline +hwlmcb_rv_t roseHandleSuffixTrigger(const struct RoseEngine *t, + u32 qi, u32 top, u64a som, + u64a end, struct RoseContext *tctxt, + char in_anchored) { + DEBUG_PRINTF("suffix qi=%u, top event=%u\n", qi, top); + + u8 *aa = getActiveLeafArray(t, tctxt->state); + struct hs_scratch *scratch = tctxtToScratch(tctxt); + const u32 aaCount = t->activeArrayCount; + const u32 qCount = t->queueCount; + struct mq *q = &scratch->queues[qi]; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + const struct NFA *nfa = getNfaByInfo(t, info); + + struct core_info *ci = &scratch->core_info; + s64a loc = (s64a)end - ci->buf_offset; + assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); + + if (!mmbit_set(aa, aaCount, qi)) { + initQueue(q, qi, t, tctxt); + nfaQueueInitState(nfa, q); + pushQueueAt(q, 0, MQE_START, loc); + fatbit_set(scratch->aqa, qCount, qi); + } else if (info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + /* nfa only needs one top; we can go home now */ + return HWLM_CONTINUE_MATCHING; + } else if (!fatbit_set(scratch->aqa, qCount, qi)) { + initQueue(q, qi, t, tctxt); + loadStreamState(nfa, q, 0); + pushQueueAt(q, 0, MQE_START, 0); + } else if (isQueueFull(q)) { + DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); + if (info->eod) { + /* can catch up suffix independently no pq */ + q->context = NULL; + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else if (ensureQueueFlushed(t, scratch, qi, loc, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + + assert(top == MQE_TOP || (top >= MQE_TOP_FIRST && top < MQE_INVALID)); + pushQueueSom(q, top, loc, som); + + if (q_cur_loc(q) == (s64a)ci->len && !info->eod) { + /* we may not run the nfa; need to ensure state is fine */ + DEBUG_PRINTF("empty run\n"); + pushQueueNoMerge(q, MQE_END, loc); + char alive = nfaQueueExec(nfa, q, loc); + if (alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else { + mmbit_unset(aa, aaCount, qi); + fatbit_unset(scratch->aqa, qCount, qi); + } + } + + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +char roseTestLeftfix(const struct RoseEngine *t, u32 qi, u32 leftfixLag, + ReportID leftfixReport, u64a end, + struct RoseContext *tctxt) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct core_info *ci = &scratch->core_info; + + u32 ri = queueToLeftIndex(t, qi); + const struct LeftNfaInfo *left = getLeftTable(t) + ri; + + DEBUG_PRINTF("testing %s %s %u/%u with lag %u (maxLag=%u)\n", + (left->transient ? "transient" : "active"), + (left->infix ? "infix" : "prefix"), + ri, qi, leftfixLag, left->maxLag); + + assert(leftfixLag <= left->maxLag); + + struct mq *q = scratch->queues + qi; + u32 qCount = t->queueCount; + u32 arCount = t->activeLeftCount; + + if (!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, ri)) { + DEBUG_PRINTF("engine is dead nothing to see here\n"); + return 0; + } + + if (unlikely(end < leftfixLag)) { + assert(0); /* lag is the literal length */ + return 0; + } + + if (nfaSupportsZombie(getNfaByQueue(t, qi)) && ci->buf_offset + && !fatbit_isset(scratch->aqa, qCount, qi) + && isZombie(t, tctxt->state, left)) { + DEBUG_PRINTF("zombie\n"); + return 1; + } + + if (!fatbit_set(scratch->aqa, qCount, qi)) { + DEBUG_PRINTF("initing q %u\n", qi); + initRoseQueue(t, qi, left, tctxt); + if (ci->buf_offset) { // there have been writes before us! + s32 sp; + if (left->transient) { + sp = -(s32)ci->hlen; + } else { + sp = -(s32)loadRoseDelay(t, tctxt->state, left); + } + + /* transient nfas are always started fresh -> state not maintained + * at stream boundary */ + + pushQueueAt(q, 0, MQE_START, sp); + if (left->infix || (ci->buf_offset + sp > 0 && !left->transient)) { + loadStreamState(q->nfa, q, sp); + } else { + pushQueueAt(q, 1, MQE_TOP, sp); + nfaQueueInitState(q->nfa, q); + } + } else { // first write ever + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + nfaQueueInitState(q->nfa, q); + } + } + + s64a loc = (s64a)end - ci->buf_offset - leftfixLag; + assert(loc >= q_cur_loc(q)); + assert(leftfixReport != MO_INVALID_IDX); + + if (left->transient) { + s64a start_loc = loc - left->transient; + if (q_cur_loc(q) < start_loc) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, start_loc); + pushQueueAt(q, 1, MQE_TOP, start_loc); + nfaQueueInitState(q->nfa, q); + } + } + + if (q_cur_loc(q) < loc || q_last_type(q) != MQE_START) { + if (left->infix) { + if (infixTooOld(q, loc)) { + DEBUG_PRINTF("infix %u died of old age\n", ri); + scratch->tctxt.groups &= left->squash_mask; + mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); + return 0; + } + + reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + } + + if (!rosePrefixCheckMiracles(t, left, ci, q, end)) { + DEBUG_PRINTF("leftfix %u died due to miracle\n", ri); + scratch->tctxt.groups &= left->squash_mask; + mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); + return 0; + } + +#ifdef DEBUG + debugQueue(q); +#endif + + pushQueueNoMerge(q, MQE_END, loc); + + char rv = nfaQueueExecRose(q->nfa, q, leftfixReport); + if (!rv) { /* nfa is dead */ + DEBUG_PRINTF("leftfix %u died while trying to catch up\n", ri); + mmbit_unset(getActiveLeftArray(t, tctxt->state), arCount, ri); + assert(!mmbit_isset(getActiveLeftArray(t, tctxt->state), arCount, + ri)); + tctxt->groups &= left->squash_mask; + return 0; + } + + // Queue must have next start loc before we call nfaInAcceptState. + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); + return rv == MO_MATCHES_PENDING; + } else { + DEBUG_PRINTF("checking for report %u\n", leftfixReport); + char rv = nfaInAcceptState(q->nfa, leftfixReport, q); + DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv); + return rv; + } +} + +static rose_inline +void roseSetRole(const struct RoseEngine *t, u8 *state, + struct RoseContext *tctxt, u32 stateIndex, u8 depth) { + DEBUG_PRINTF("state idx=%u, depth=%u\n", stateIndex, depth); + mmbit_set(getRoleState(state), t->rolesWithStateCount, stateIndex); + update_depth(tctxt, depth); +} + +static rose_inline +void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, + u32 topEvent, u8 cancel, struct RoseContext *tctxt) { + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + s64a loc = (s64a)end - ci->buf_offset; + + u32 ri = queueToLeftIndex(t, qi); + assert(topEvent < MQE_INVALID); + + const struct LeftNfaInfo *left = getLeftInfoByQueue(t, qi); + assert(!left->transient); + + DEBUG_PRINTF("rose %u (qi=%u) event %u\n", ri, qi, topEvent); + + struct mq *q = tctxtToScratch(tctxt)->queues + qi; + const struct NfaInfo *info = getNfaInfoByQueue(t, qi); + + u8 *activeLeftArray = getActiveLeftArray(t, tctxt->state); + const u32 arCount = t->activeLeftCount; + char alive = mmbit_set(activeLeftArray, arCount, ri); + + if (alive && info->no_retrigger) { + DEBUG_PRINTF("yawn\n"); + return; + } + + struct fatbit *aqa = tctxtToScratch(tctxt)->aqa; + const u32 qCount = t->queueCount; + + if (alive && nfaSupportsZombie(getNfaByInfo(t, info)) && ci->buf_offset && + !fatbit_isset(aqa, qCount, qi) && isZombie(t, tctxt->state, left)) { + DEBUG_PRINTF("yawn - zombie\n"); + return; + } + + if (cancel) { + DEBUG_PRINTF("dominating top: (re)init\n"); + fatbit_set(aqa, qCount, qi); + initRoseQueue(t, qi, left, tctxt); + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (!fatbit_set(aqa, qCount, qi)) { + DEBUG_PRINTF("initing %u\n", qi); + initRoseQueue(t, qi, left, tctxt); + if (alive) { + s32 sp = -(s32)loadRoseDelay(t, tctxt->state, left); + pushQueueAt(q, 0, MQE_START, sp); + loadStreamState(q->nfa, q, sp); + } else { + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } + } else if (!alive) { + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + nfaQueueInitState(q->nfa, q); + } else if (isQueueFull(q)) { + reduceQueue(q, loc, left->maxQueueLen, q->nfa->maxWidth); + + if (isQueueFull(q)) { + /* still full - reduceQueue did nothing */ + DEBUG_PRINTF("queue %u full (%u items) -> catching up nfa\n", qi, + q->end - q->cur); + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX); + + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } + } + + pushQueueSom(q, topEvent, loc, start); +} + +/* handles the firing of external matches */ +static rose_inline +hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, u8 *state, ReportID id, + u64a end, struct RoseContext *tctxt, + char in_anchored) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + if (roseCatchUpTo(t, state, end, scratch, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + assert(end == tctxt->minMatchOffset); + DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end); + updateLastMatchOffset(tctxt, end); + + int cb_rv = tctxt->cb(end, id, tctxt->userCtx); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + if (isAllExhausted(t, scratch->core_info.exhaustionVector)) { + if (!scratch->core_info.broken) { + scratch->core_info.broken = BROKEN_EXHAUSTED; + } + tctxt->groups = 0; + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + +/* catches up engines enough to ensure any earlier mpv triggers are enqueued + * and then adds the trigger to the mpv queue. Must not be called during catch + * up */ +static rose_inline +hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, + u8 *state, ReportID r, u64a end, + struct RoseContext *tctxt, + char in_anchored) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + if (roseCatchUpMpvFeeders(t, state, end, scratch, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0); +} + +static rose_inline +hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, u8 *state, u64a end, + struct RoseContext *tctxt, char in_anchored) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + // In SOM processing, we may be able to limit or entirely avoid catchup. + + DEBUG_PRINTF("entry\n"); + + if (end == tctxt->minMatchOffset) { + DEBUG_PRINTF("already caught up\n"); + return HWLM_CONTINUE_MATCHING; + } + + DEBUG_PRINTF("catching up all NFAs\n"); + if (roseCatchUpTo(t, state, end, scratch, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + updateMinMatchOffset(tctxt, end); + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, u8 *state, ReportID id, + u64a end, struct RoseContext *tctxt, + char in_anchored) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end, + tctxt->minMatchOffset); + + // Reach into reports and handle internal reports that just manipulate SOM + // slots ourselves, rather than going through the callback. + + if (roseSomCatchup(t, state, end, tctxt, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + const struct internal_report *ri = getInternalReport(t, id); + handleSomInternal(scratch, ri, end); + + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, u8 *state, + ReportID id, u64a start, u64a end, + struct RoseContext *tctxt, char in_anchored) { + if (roseCatchUpTo(t, state, end, tctxtToScratch(tctxt), in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id, + start, end); + DEBUG_PRINTF(" last match %llu\n", tctxt->lastMatchOffset); + assert(end == tctxt->minMatchOffset); + + updateLastMatchOffset(tctxt, end); + int cb_rv = tctxt->cb_som(start, end, id, tctxt->userCtx); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + if (isAllExhausted(t, ci->exhaustionVector)) { + if (!ci->broken) { + ci->broken = BROKEN_EXHAUSTED; + } + tctxt->groups = 0; + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + +static rose_inline +hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, u8 *state, ReportID id, + u64a start, u64a end, struct RoseContext *tctxt, + char in_anchored) { + DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n", + id, start, end, tctxt->minMatchOffset); + + // Reach into reports and handle internal reports that just manipulate SOM + // slots ourselves, rather than going through the callback. + + if (roseSomCatchup(t, state, end, tctxt, in_anchored) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + + const struct internal_report *ri = getInternalReport(t, id); + setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end); + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +int reachHasBit(const u8 *reach, u8 c) { + return !!(reach[c / 8U] & (u8)1U << (c % 8U)); +} + +/** + * \brief Scan around a literal, checking that that "lookaround" reach masks + * are satisfied. + */ +static rose_inline +int roseCheckLookaround(const struct RoseEngine *t, u32 lookaroundIndex, + u32 lookaroundCount, u64a end, + struct RoseContext *tctxt) { + assert(lookaroundIndex != MO_INVALID_IDX); + assert(lookaroundCount > 0); + + const struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end, + ci->buf_offset, ci->buf_offset + ci->len); + + const u8 *base = (const u8 *)t; + const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset); + const s8 *look = look_base + lookaroundIndex; + const s8 *look_end = look + lookaroundCount; + assert(look < look_end); + + const u8 *reach_base = base + t->lookaroundReachOffset; + const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN; + + // The following code assumes that the lookaround structures are ordered by + // increasing offset. + + const s64a base_offset = end - ci->buf_offset; + DEBUG_PRINTF("base_offset=%lld\n", base_offset); + DEBUG_PRINTF("first look has offset %d\n", *look); + + // If our first check tells us we need to look at an offset before the + // start of the stream, this role cannot match. + if (unlikely(*look < 0 && (u64a)(0 - *look) > end)) { + DEBUG_PRINTF("too early, fail\n"); + return 0; + } + + // Skip over offsets that are before the history buffer. + do { + s64a offset = base_offset + *look; + if (offset >= -(s64a)ci->hlen) { + goto in_history; + } + DEBUG_PRINTF("look=%d before history\n", *look); + look++; + reach += REACH_BITVECTOR_LEN; + } while (look < look_end); + + // History buffer. + DEBUG_PRINTF("scan history (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { + in_history: + ; + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= 0) { + DEBUG_PRINTF("in buffer\n"); + goto in_buffer; + } + + assert(offset >= -(s64a)ci->hlen && offset < 0); + u8 c = ci->hbuf[ci->hlen + offset]; + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + // Current buffer. + DEBUG_PRINTF("scan buffer (%zu looks left)\n", look_end - look); + for (; look < look_end; ++look, reach += REACH_BITVECTOR_LEN) { + in_buffer: + ; + s64a offset = base_offset + *look; + DEBUG_PRINTF("reach=%p, rel offset=%lld\n", reach, offset); + + if (offset >= (s64a)ci->len) { + DEBUG_PRINTF("in the future\n"); + break; + } + + assert(offset >= 0 && offset < (s64a)ci->len); + u8 c = ci->buf[offset]; + if (!reachHasBit(reach, c)) { + DEBUG_PRINTF("char 0x%02x failed reach check\n", c); + return 0; + } + } + + DEBUG_PRINTF("OK :)\n"); + return 1; +} + +static +int roseNfaEarliestSom(u64a from_offset, UNUSED u64a offset, UNUSED ReportID id, + void *context) { + u64a *som = context; + *som = MIN(*som, from_offset); + return MO_CONTINUE_MATCHING; +} + +static rose_inline +u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, + UNUSED const u32 leftfixLag, + struct RoseContext *tctxt) { + u32 ri = queueToLeftIndex(t, qi); + + UNUSED const struct LeftNfaInfo *left = getLeftTable(t) + ri; + + DEBUG_PRINTF("testing %s prefix %u/%u with lag %u (maxLag=%u)\n", + left->transient ? "transient" : "active", ri, qi, + leftfixLag, left->maxLag); + + assert(leftfixLag <= left->maxLag); + + struct mq *q = tctxtToScratch(tctxt)->queues + qi; + + u64a start = ~0ULL; + + /* switch the callback + context for a fun one */ + q->som_cb = roseNfaEarliestSom; + q->context = &start; + + nfaReportCurrentMatches(q->nfa, q); + + /* restore the old callback + context */ + q->som_cb = roseNfaSomAdaptor; + q->context = NULL; + DEBUG_PRINTF("earliest som is %llu\n", start); + return start; +} + +static rose_inline +char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + if (end < min_bound) { + return 0; + } + return max_bound == ROSE_BOUND_INF || end <= max_bound; +} + + +#define PROGRAM_CASE(name) \ + case ROSE_INSTR_##name: { \ + DEBUG_PRINTF("instruction: " #name " (%u)\n", ROSE_INSTR_##name); \ + const struct ROSE_STRUCT_##name *ri = \ + (const struct ROSE_STRUCT_##name *)pc; + +#define PROGRAM_NEXT_INSTRUCTION \ + pc += ROUNDUP_N(sizeof(*ri), ROSE_INSTR_MIN_ALIGN); \ + break; \ + } + +static really_inline +hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, + u64a end, struct RoseContext *tctxt, + char in_anchored, int *work_done) { + DEBUG_PRINTF("program begins at offset %u\n", programOffset); + + assert(programOffset); + assert(programOffset < t->size); + + const char *pc_base = getByOffset(t, programOffset); + const char *pc = pc_base; + + u64a som = 0; + + assert(*(const u8 *)pc != ROSE_INSTR_END); + + for (;;) { + assert(ISALIGNED_N(pc, ROSE_INSTR_MIN_ALIGN)); + u8 code = *(const u8 *)pc; + assert(code <= ROSE_INSTR_END); + + switch ((enum RoseInstructionCode)code) { + PROGRAM_CASE(ANCHORED_DELAY) { + if (in_anchored && end > t->floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("delay until playback\n"); + update_depth(tctxt, ri->depth); + tctxt->groups |= ri->groups; + *work_done = 1; + assert(ri->done_jump); // must progress + pc += ri->done_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_DEPTH) { + DEBUG_PRINTF("current depth %u, check min depth %u\n", + tctxt->depth, ri->min_depth); + if (ri->min_depth > tctxt->depth) { + DEBUG_PRINTF("failed depth check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_ONLY_EOD) { + struct core_info *ci = &tctxtToScratch(tctxt)->core_info; + if (end != ci->buf_offset + ci->len) { + DEBUG_PRINTF("should only match at end of data\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_BOUNDS) { + if (!in_anchored && + !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { + DEBUG_PRINTF("failed root bounds check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_NOT_HANDLED) { + struct fatbit *handled = tctxtToScratch(tctxt)->handled_roles; + if (fatbit_set(handled, t->handledKeyCount, ri->key)) { + DEBUG_PRINTF("key %u already set\n", ri->key); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LOOKAROUND) { + if (!roseCheckLookaround(t, ri->index, ri->count, end, tctxt)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_LEFTFIX) { + if (!roseTestLeftfix(t, ri->queue, ri->lag, ri->report, end, + tctxt)) { + DEBUG_PRINTF("failed lookaround check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ADJUST) { + assert(ri->distance <= end); + som = end - ri->distance; + DEBUG_PRINTF("som is (end - %u) = %llu\n", ri->distance, som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_LEFTFIX) { + som = roseGetHaigSom(t, ri->queue, ri->lag, tctxt); + DEBUG_PRINTF("som from leftfix is %llu\n", som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_INFIX) { + roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel, + tctxt); + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(TRIGGER_SUFFIX) { + if (roseHandleSuffixTrigger(t, ri->queue, ri->event, som, end, + tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + if (roseHandleMatch(t, tctxt->state, ri->report, end, tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_CHAIN) { + if (roseCatchUpAndHandleChainMatch(t, tctxt->state, ri->report, + end, tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EOD) { + if (tctxt->cb(end, ri->report, tctxt->userCtx) == + MO_HALT_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_INT) { + if (roseHandleSom(t, tctxt->state, ri->report, end, tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM) { + if (roseHandleSomSom(t, tctxt->state, ri->report, som, end, + tctxt, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_KNOWN) { + if (roseHandleSomMatch(t, tctxt->state, ri->report, som, end, + tctxt, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_STATE) { + roseSetRole(t, tctxt->state, tctxt, ri->index, ri->depth); + *work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SET_GROUPS) { + tctxt->groups |= ri->groups; + DEBUG_PRINTF("set groups 0x%llx -> 0x%llx\n", ri->groups, + tctxt->groups); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_BEGIN) { + DEBUG_PRINTF("iter_offset=%u\n", ri->iter_offset); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct mmbit_sparse_state *s = scratch->sparse_iter_state; + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(getRoleState(tctxt->state), + t->rolesWithStateCount, &idx, + it, s); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + + fatbit_clear(scratch->handled_roles); + + const u32 *jumps = getByOffset(t, ri->jump_table); + DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, + jumps[idx]); + pc = pc_base + jumps[idx]; + continue; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_NEXT) { + DEBUG_PRINTF("iter_offset=%u, state=%u\n", ri->iter_offset, + ri->state); + const struct mmbit_sparse_iter *it = + getByOffset(t, ri->iter_offset); + assert(ISALIGNED(it)); + + struct hs_scratch *scratch = tctxtToScratch(tctxt); + struct mmbit_sparse_state *s = scratch->sparse_iter_state; + + u32 idx = 0; + u32 i = mmbit_sparse_iter_next(getRoleState(tctxt->state), + t->rolesWithStateCount, + ri->state, &idx, it, s); + if (i == MMB_INVALID) { + DEBUG_PRINTF("no more states in sparse iter are on\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + + const u32 *jumps = getByOffset(t, ri->jump_table); + DEBUG_PRINTF("state %u (idx=%u) is on, jump to %u\n", i, idx, + jumps[idx]); + pc = pc_base + jumps[idx]; + continue; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(END) { + DEBUG_PRINTF("finished\n"); + return HWLM_CONTINUE_MATCHING; + } + PROGRAM_NEXT_INSTRUCTION + } + } + + assert(0); // unreachable + return HWLM_CONTINUE_MATCHING; +} + +#undef PROGRAM_CASE +#undef PROGRAM_NEXT_INSTRUCTION + +static rose_inline +void roseSquashGroup(struct RoseContext *tctxt, const struct RoseLiteral *tl) { + assert(tl->squashesGroup); + + // we should be squashing a single group + assert(popcount64(tl->groups) == 1); + + DEBUG_PRINTF("apply squash mask 0x%016llx, groups 0x%016llx -> 0x%016llx\n", + ~tl->groups, tctxt->groups, tctxt->groups & ~tl->groups); + + tctxt->groups &= ~tl->groups; +} + +#endif // PROGRAM_RUNTIME_H diff --git a/src/rose/rose.h b/src/rose/rose.h index 6bebdf10..49144988 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -45,6 +45,39 @@ void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch, RoseCallback callback, RoseCallbackSom som_callback, void *context); +static really_inline +int roseBlockHasEodWork(const struct RoseEngine *t, + struct hs_scratch *scratch) { + if (t->ematcherOffset) { + DEBUG_PRINTF("eod matcher to run\n"); + return 1; + } + + if (t->eodProgramOffset) { + DEBUG_PRINTF("has eod program\n"); + return 1; + } + + void *state = scratch->core_info.state; + if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + DEBUG_PRINTF("active outfix/suffix engines\n"); + return 1; + } + + if (t->eodIterOffset) { + u32 idx; + const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset); + struct mmbit_sparse_state *s = scratch->sparse_iter_state; + if (mmbit_sparse_iter_begin(getRoleState(state), t->rolesWithStateCount, + &idx, it, s) != MMB_INVALID) { + DEBUG_PRINTF("eod iter has states on\n"); + return 1; + } + } + + return 0; +} + /* assumes core_info in scratch has been init to point to data */ static really_inline void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, @@ -77,19 +110,8 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch, return; } - struct mmbit_sparse_state *s = scratch->sparse_iter_state; - const u32 numStates = t->rolesWithStateCount; - u8 *state = (u8 *)scratch->core_info.state; - void *role_state = getRoleState(state); - u32 idx = 0; - const struct mmbit_sparse_iter *it - = (const void *)((const u8 *)t + t->eodIterOffset); - - if (!t->ematcherOffset && !t->hasEodEventLiteral - && !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount) - && (!t->eodIterOffset - || mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s) - == MMB_INVALID)) { + if (!roseBlockHasEodWork(t, scratch)) { + DEBUG_PRINTF("no eod work\n"); return; } diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 708d3c8a..0a0318d3 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -170,6 +170,7 @@ public: const void *get() const { switch (code()) { + case ROSE_INSTR_CHECK_DEPTH: return &u.checkDepth; case ROSE_INSTR_CHECK_ONLY_EOD: return &u.checkOnlyEod; case ROSE_INSTR_CHECK_BOUNDS: return &u.checkBounds; case ROSE_INSTR_CHECK_NOT_HANDLED: return &u.checkNotHandled; @@ -188,6 +189,8 @@ public: case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; case ROSE_INSTR_SET_STATE: return &u.setState; case ROSE_INSTR_SET_GROUPS: return &u.setGroups; + case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin; + case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext; case ROSE_INSTR_END: return &u.end; } assert(0); @@ -196,6 +199,7 @@ public: size_t length() const { switch (code()) { + case ROSE_INSTR_CHECK_DEPTH: return sizeof(u.checkDepth); case ROSE_INSTR_CHECK_ONLY_EOD: return sizeof(u.checkOnlyEod); case ROSE_INSTR_CHECK_BOUNDS: return sizeof(u.checkBounds); case ROSE_INSTR_CHECK_NOT_HANDLED: return sizeof(u.checkNotHandled); @@ -214,12 +218,15 @@ public: case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); case ROSE_INSTR_SET_STATE: return sizeof(u.setState); case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); + case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin); + case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); case ROSE_INSTR_END: return sizeof(u.end); } return 0; } union { + ROSE_STRUCT_CHECK_DEPTH checkDepth; ROSE_STRUCT_CHECK_ONLY_EOD checkOnlyEod; ROSE_STRUCT_CHECK_BOUNDS checkBounds; ROSE_STRUCT_CHECK_NOT_HANDLED checkNotHandled; @@ -238,6 +245,8 @@ public: ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; ROSE_STRUCT_SET_STATE setState; ROSE_STRUCT_SET_GROUPS setGroups; + ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin; + ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; ROSE_STRUCT_END end; } u; }; @@ -2565,7 +2574,7 @@ getLiteralInfoByFinalId(const RoseBuildImpl &build, u32 final_id) { */ static vector -flattenRoleProgram(const vector> &programs) { +flattenProgram(const vector> &programs) { vector out; vector offsets; // offset of each instruction (bytes) @@ -2601,6 +2610,10 @@ flattenRoleProgram(const vector> &programs) { assert(targets[i] > offsets[i]); // jumps always progress ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; break; + case ROSE_INSTR_CHECK_DEPTH: + assert(targets[i] > offsets[i]); + ri.u.checkDepth.fail_jump = targets[i] - offsets[i]; + break; case ROSE_INSTR_CHECK_ONLY_EOD: assert(targets[i] > offsets[i]); ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; @@ -2630,9 +2643,13 @@ flattenRoleProgram(const vector> &programs) { } static -u32 writeRoleProgram(build_context &bc, vector &program) { - DEBUG_PRINTF("writing %zu instructions\n", program.size()); +u32 writeProgram(build_context &bc, vector &program) { + if (program.empty()) { + DEBUG_PRINTF("no program\n"); + return 0; + } + DEBUG_PRINTF("writing %zu instructions\n", program.size()); u32 programOffset = 0; for (const auto &ri : program) { u32 offset = @@ -2696,32 +2713,6 @@ bool hasEodAnchors(const RoseBuildImpl &tbi, const build_context &bc, return false; } -/* creates (and adds to rose) a sparse iterator visiting pred states/roles, - * returns a pair: - * - the offset of the itermap - * - the offset for the sparse iterator. - */ -static -pair addPredSparseIter(build_context &bc, - const map &predPrograms) { - vector keys; - vector programTable; - for (const auto &elem : predPrograms) { - keys.push_back(elem.first); - programTable.push_back(elem.second); - } - - vector iter; - mmbBuildSparseIterator(iter, keys, bc.numStates); - assert(!iter.empty()); - DEBUG_PRINTF("iter size = %zu\n", iter.size()); - - u32 iterOffset = addIteratorToTable(bc, iter); - u32 programTableOffset = - add_to_engine_blob(bc, begin(programTable), end(programTable)); - return make_pair(programTableOffset, iterOffset); -} - static void fillLookaroundTables(char *look_base, char *reach_base, const vector &look_vec) { @@ -2770,7 +2761,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, * literal entry */ const auto &lit_infos = getLiteralInfoByFinalId(tbi, final_id); const rose_literal_info &arb_lit_info = **lit_infos.begin(); - const auto &vertices = arb_lit_info.vertices; literalTable.push_back(RoseLiteral()); RoseLiteral &tl = literalTable.back(); @@ -2784,11 +2774,6 @@ void createLiteralEntry(const RoseBuildImpl &tbi, build_context &bc, assert(tl.groups || tbi.literals.right.at(literalId).table == ROSE_ANCHORED || tbi.literals.right.at(literalId).table == ROSE_EVENT); - // Minimum depth based on this literal's roles. - tl.minDepth = calcMinDepth(bc.depths, vertices); - - DEBUG_PRINTF("lit %u: role minDepth=%u\n", final_id, tl.minDepth); - // If this literal squashes its group behind it, store that data too tl.squashesGroup = arb_lit_info.squash_group; @@ -3150,8 +3135,8 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, } static -vector makeRoleProgram(RoseBuildImpl &build, build_context &bc, - const RoseEdge &e) { +vector makeProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e) { const RoseGraph &g = build.g; auto v = target(e, g); @@ -3185,69 +3170,6 @@ vector makeRoleProgram(RoseBuildImpl &build, build_context &bc, return program; } -static -void findRootEdges(const RoseBuildImpl &build, RoseVertex src, - map> &root_edges_map) { - const auto &g = build.g; - for (const auto &e : out_edges_range(src, g)) { - const auto &v = target(e, g); - if (build.hasDirectFinalId(v)) { - continue; // Skip direct reports. - } - for (auto lit_id : g[v].literals) { - assert(lit_id < build.literal_info.size()); - u32 final_id = build.literal_info.at(lit_id).final_id; - if (final_id != MO_INVALID_IDX) { - root_edges_map[final_id].insert(e); - } - } - } -} - -static -void buildRootRolePrograms(RoseBuildImpl &build, build_context &bc, - vector &literalTable) { - const auto &g = build.g; - - map> root_edges_map; // lit id -> root edges - findRootEdges(build, build.root, root_edges_map); - findRootEdges(build, build.anchored_root, root_edges_map); - - for (u32 id = 0; id < literalTable.size(); id++) { - const auto &root_edges = root_edges_map[id]; - DEBUG_PRINTF("lit %u has %zu root edges\n", id, root_edges.size()); - - // Sort edges by (source, target) vertex indices to ensure - // deterministic program construction. - vector ordered_edges(begin(root_edges), end(root_edges)); - sort(begin(ordered_edges), end(ordered_edges), - [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].idx, g[target(a, g)].idx) < - tie(g[source(b, g)].idx, g[target(b, g)].idx); - }); - - vector> root_prog; - for (const auto &e : ordered_edges) { - DEBUG_PRINTF("edge (%zu,%zu)\n", g[source(e, g)].idx, - g[target(e, g)].idx); - auto role_prog = makeRoleProgram(build, bc, e); - if (role_prog.empty()) { - continue; - } - root_prog.push_back(role_prog); - } - - RoseLiteral &tl = literalTable[id]; - if (root_prog.empty()) { - tl.rootProgramOffset = 0; - continue; - } - - auto final_program = flattenRoleProgram(root_prog); - tl.rootProgramOffset = writeRoleProgram(bc, final_program); - } -} - static void assignStateIndices(const RoseBuildImpl &build, build_context &bc) { const auto &g = build.g; @@ -3399,13 +3321,12 @@ void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, } static -vector makeSparseIterProgram(RoseBuildImpl &build, - build_context &bc, - const RoseEdge &e) { +vector makePredProgram(RoseBuildImpl &build, build_context &bc, + const RoseEdge &e) { const RoseGraph &g = build.g; const RoseVertex v = target(e, g); - auto program = makeRoleProgram(build, bc, e); + auto program = makeProgram(build, bc, e); if (hasGreaterInDegree(1, v, g)) { // Only necessary when there is more than one pred. @@ -3415,75 +3336,215 @@ vector makeSparseIterProgram(RoseBuildImpl &build, return program; } +/** + * Returns the pair (program offset, sparse iter offset). + */ static -void buildLitSparseIter(RoseBuildImpl &build, build_context &bc, - vector &verts, RoseLiteral &tl) { - const auto &g = build.g; +pair makeSparseIterProgram(build_context &bc, + map>> &predProgramLists, + const vector &verts, + const vector &root_program) { + vector program; + u32 iter_offset = 0; - if (verts.empty()) { - // This literal has no non-root roles => no sparse iter - tl.iterOffset = ROSE_OFFSET_INVALID; - tl.iterProgramOffset = 0; - return; + if (!predProgramLists.empty()) { + // First, add the iterator itself. + vector keys; + for (const auto &elem : predProgramLists) { + keys.push_back(elem.first); + } + DEBUG_PRINTF("%zu keys: %s\n", keys.size(), + as_string_list(keys).c_str()); + + vector iter; + mmbBuildSparseIterator(iter, keys, bc.numStates); + assert(!iter.empty()); + iter_offset = addIteratorToTable(bc, iter); + + // Construct our program, starting with the SPARSE_ITER_BEGIN + // instruction, keeping track of the jump offset for each sub-program. + vector jump_table; + u32 curr_offset = 0; + + // Add a pre-check for min depth, if it's useful. + if (!verts.empty()) { + u32 min_depth = calcMinDepth(bc.depths, verts); + if (min_depth > 1) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_DEPTH); + ri.u.checkDepth.min_depth = min_depth; + program.push_back(ri); + curr_offset = ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + } + + program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN)); + curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + + for (const auto &e : predProgramLists) { + DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), + curr_offset); + jump_table.push_back(curr_offset); + auto subprog = flattenProgram(e.second); + + if (e.first != keys.back()) { + // For all but the last subprogram, replace the END instruction + // with a SPARSE_ITER_NEXT. + assert(!subprog.empty()); + assert(subprog.back().code() == ROSE_INSTR_END); + subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT); + } + + for (const auto &ri : subprog) { + program.push_back(ri); + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + } + + const u32 end_offset = curr_offset - ROUNDUP_N(program.back().length(), + ROSE_INSTR_MIN_ALIGN); + + // Write the jump table into the bytecode. + const u32 jump_table_offset = + add_to_engine_blob(bc, begin(jump_table), end(jump_table)); + + // Fix up the instruction operands. + auto keys_it = begin(keys); + curr_offset = 0; + for (size_t i = 0; i < program.size(); i++) { + auto &ri = program[i]; + switch (ri.code()) { + case ROSE_INSTR_CHECK_DEPTH: + ri.u.checkDepth.fail_jump = end_offset - curr_offset; + break; + case ROSE_INSTR_SPARSE_ITER_BEGIN: + ri.u.sparseIterBegin.iter_offset = iter_offset; + ri.u.sparseIterBegin.jump_table = jump_table_offset; + ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset; + break; + case ROSE_INSTR_SPARSE_ITER_NEXT: + ri.u.sparseIterNext.iter_offset = iter_offset; + ri.u.sparseIterNext.jump_table = jump_table_offset; + assert(keys_it != end(keys)); + ri.u.sparseIterNext.state = *keys_it++; + ri.u.sparseIterNext.fail_jump = end_offset - curr_offset; + break; + default: + break; + } + curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } } - // Deterministic ordering. - sort(begin(verts), end(verts), - [&g](RoseVertex a, RoseVertex b) { return g[a].idx < g[b].idx; }); + // If we have a root program, replace the END instruction with it. Note + // that the root program has already been flattened. + if (!root_program.empty()) { + if (!program.empty()) { + assert(program.back().code() == ROSE_INSTR_END); + program.pop_back(); + } + program.insert(end(program), begin(root_program), end(root_program)); + } + + return {writeProgram(bc, program), iter_offset}; +} + +static +u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, + const vector &lit_edges) { + const auto &g = build.g; + + DEBUG_PRINTF("%zu lit edges\n", lit_edges.size()); // pred state id -> list of programs map>> predProgramLists; + vector nonroot_verts; - for (const auto &v : verts) { - DEBUG_PRINTF("vertex %zu\n", g[v].idx); - for (const auto &e : in_edges_range(v, g)) { - const auto &u = source(e, g); - if (build.isAnyStart(u)) { - continue; // Root roles are not handled with sparse iterator. - } - - assert(contains(bc.roleStateIndices, u)); - u32 pred_state = bc.roleStateIndices.at(u); - - DEBUG_PRINTF("pred %zu (state %u)\n", g[u].idx, pred_state); - - auto program = makeSparseIterProgram(build, bc, e); - predProgramLists[pred_state].push_back(program); + // Construct sparse iter sub-programs. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (build.isAnyStart(u)) { + continue; // Root roles are not handled with sparse iterator. } + DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].idx, + g[target(e, g)].idx); + assert(contains(bc.roleStateIndices, u)); + u32 pred_state = bc.roleStateIndices.at(u); + auto program = makePredProgram(build, bc, e); + predProgramLists[pred_state].push_back(program); + nonroot_verts.push_back(target(e, g)); } - map predPrograms; - for (const auto &e : predProgramLists) { - auto program = flattenRoleProgram(e.second); - u32 offset = writeRoleProgram(bc, program); - predPrograms.emplace(e.first, offset); + // Construct sub-program for handling root roles. + vector> root_programs; + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (!build.isAnyStart(u)) { + continue; + } + DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].idx, g[target(e, g)].idx); + auto role_prog = makeProgram(build, bc, e); + if (role_prog.empty()) { + continue; + } + root_programs.push_back(role_prog); } - tie(tl.iterProgramOffset, tl.iterOffset) = - addPredSparseIter(bc, predPrograms); + vector root_program; + if (!root_programs.empty()) { + root_program = flattenProgram(root_programs); + } + + // Put it all together. + return makeSparseIterProgram(bc, predProgramLists, nonroot_verts, + root_program).first; } -// Build sparse iterators for literals. static -void buildSparseIter(RoseBuildImpl &build, build_context &bc, - vector &literalTable) { - const RoseGraph &g = build.g; +map> findEdgesByLiteral(const RoseBuildImpl &build) { + // Use a set of edges while building the map to cull duplicates. + map> unique_lit_edge_map; - // Find all our non-root roles. - ue2::unordered_map> litNonRootVertices; - for (const auto &v : vertices_range(g)) { - if (build.isRootSuccessor(v)) { + const auto &g = build.g; + for (const auto &e : edges_range(g)) { + const auto &v = target(e, g); + if (build.hasDirectFinalId(v)) { + // Skip direct reports, which do not have RoseLiteral entries. continue; } for (const auto &lit_id : g[v].literals) { + assert(lit_id < build.literal_info.size()); u32 final_id = build.literal_info.at(lit_id).final_id; - litNonRootVertices[final_id].push_back(v); + if (final_id != MO_INVALID_IDX) { + unique_lit_edge_map[final_id].insert(e); + } } } + // Build output map, sorting edges by (source, target) vertex index. + map> lit_edge_map; + for (const auto &m : unique_lit_edge_map) { + auto edge_list = vector(begin(m.second), end(m.second)); + sort(begin(edge_list), end(edge_list), + [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].idx, g[target(a, g)].idx) < + tie(g[source(b, g)].idx, g[target(b, g)].idx); + }); + lit_edge_map.emplace(m.first, edge_list); + } + + return lit_edge_map; +} + +/** \brief Build the interpreter program for each literal. */ +static +void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc, + vector &literalTable) { + auto lit_edge_map = findEdgesByLiteral(build); + for (u32 finalId = 0; finalId != literalTable.size(); ++finalId) { - buildLitSparseIter(build, bc, litNonRootVertices[finalId], - literalTable[finalId]); + const auto &lit_edges = lit_edge_map[finalId]; + u32 offset = buildLiteralProgram(build, bc, lit_edges); + literalTable[finalId].programOffset = offset; } } @@ -3514,9 +3575,11 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, return program; } -/* returns a pair containing the iter map offset and iter offset */ +/** + * Returns the pair (program offset, sparse iter offset). + */ static -pair buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) { +pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { const RoseGraph &g = build.g; // pred state id -> list of programs @@ -3546,15 +3609,35 @@ pair buildEodAnchorRoles(RoseBuildImpl &build, build_context &bc) { return {0, 0}; } - map predPrograms; - for (const auto &e : predProgramLists) { - DEBUG_PRINTF("pred %u has %zu programs\n", e.first, e.second.size()); - auto program = flattenRoleProgram(e.second); - u32 offset = writeRoleProgram(bc, program); - predPrograms.emplace(e.first, offset); + return makeSparseIterProgram(bc, predProgramLists, {}, {}); +} + +static +u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) { + if (build.eod_event_literal_id == MO_INVALID_IDX) { + return 0; } - return addPredSparseIter(bc, predPrograms); + const RoseGraph &g = build.g; + const auto &lit_info = build.literal_info.at(build.eod_event_literal_id); + assert(lit_info.delayed_ids.empty()); + assert(!lit_info.squash_group); + assert(!lit_info.requires_benefits); + + // Collect all edges leading into EOD event literal vertices. + vector edge_list; + for (const auto &v : lit_info.vertices) { + insert(&edge_list, edge_list.end(), in_edges(v, g)); + } + + // Sort edge list for determinism, prettiness. + sort(begin(edge_list), end(edge_list), + [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].idx, g[target(a, g)].idx) < + tie(g[source(b, g)].idx, g[target(b, g)].idx); + }); + + return buildLiteralProgram(build, bc, edge_list); } static @@ -3742,11 +3825,12 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { vector literalTable; buildLiteralTable(*this, bc, literalTable); - buildSparseIter(*this, bc, literalTable); + buildLiteralPrograms(*this, bc, literalTable); + u32 eodProgramOffset = writeEodProgram(*this, bc); + u32 eodIterProgramOffset; u32 eodIterOffset; - u32 eodProgramTableOffset; - tie(eodProgramTableOffset, eodIterOffset) = buildEodAnchorRoles(*this, bc); + tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc); vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); @@ -3758,9 +3842,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { throw ResourceLimitError(); } - // Write root programs for literals into the engine blob. - buildRootRolePrograms(*this, bc, literalTable); - u32 amatcherOffset = 0; u32 fmatcherOffset = 0; u32 ematcherOffset = 0; @@ -3968,8 +4049,9 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { = anchoredReportInverseMapOffset; engine->multidirectOffset = multidirectOffset; + engine->eodProgramOffset = eodProgramOffset; + engine->eodIterProgramOffset = eodIterProgramOffset; engine->eodIterOffset = eodIterOffset; - engine->eodProgramTableOffset = eodProgramTableOffset; engine->lastByteHistoryIterOffset = lastByteOffset; @@ -4038,13 +4120,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { write_out(&engine->state_init, (char *)engine.get(), state_scatter, state_scatter_aux_offset); - if (eod_event_literal_id != MO_INVALID_IDX) { - engine->hasEodEventLiteral = 1; - DEBUG_PRINTF("eod literal id=%u, final_id=%u\n", eod_event_literal_id, - literal_info.at(eod_event_literal_id).final_id); - engine->eodLiteralId = literal_info.at(eod_event_literal_id).final_id; - } - if (anchoredIsMulti(*engine)) { DEBUG_PRINTF("multiple anchored dfas\n"); engine->maxSafeAnchoredDROffset = 1; diff --git a/src/rose/rose_build_compile.cpp b/src/rose/rose_build_compile.cpp index 2a31a65a..2a3fe540 100644 --- a/src/rose/rose_build_compile.cpp +++ b/src/rose/rose_build_compile.cpp @@ -274,6 +274,13 @@ void allocateFinalLiteralId(RoseBuildImpl &tbi) { continue; } + // The special EOD event literal has its own program and does not need + // a real literal ID. + if (i == tbi.eod_event_literal_id) { + assert(tbi.eod_event_literal_id != MO_INVALID_IDX); + continue; + } + const rose_literal_info &info = tbi.literal_info[i]; if (info.requires_benefits) { assert(!tbi.isDelayed(i)); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index d8048eee..96ff7734 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -34,7 +34,6 @@ #include "rose_build_impl.h" #include "rose/rose_dump.h" #include "rose_internal.h" -#include "rose_program.h" #include "ue2common.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_dump.h" diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index aa13a627..3f355287 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -130,12 +130,6 @@ size_t literalsWithPredicate(const RoseEngine *t, Predicate pred) { return count_if(tl, tl_end, pred); } -static -size_t literalsWithDepth(const RoseEngine *t, u8 depth) { - return literalsWithPredicate( - t, [&depth](const RoseLiteral &l) { return l.minDepth == depth; }); -} - static size_t literalsInGroups(const RoseEngine *t, u32 from, u32 to) { rose_group mask = ~((1ULL << from) - 1); @@ -195,7 +189,7 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, } static -void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { +void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { const char *pc_base = pc; for (;;) { u8 code = *(const u8 *)pc; @@ -209,6 +203,12 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_DEPTH) { + os << " min_depth " << u32{ri->min_depth} << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_ONLY_EOD) { os << " fail_jump +" << ri->fail_jump << endl; } @@ -309,6 +309,21 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SPARSE_ITER_BEGIN) { + os << " iter_offset " << ri->iter_offset << endl; + os << " jump_table " << ri->jump_table << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SPARSE_ITER_NEXT) { + os << " iter_offset " << ri->iter_offset << endl; + os << " jump_table " << ri->jump_table << endl; + os << " state " << ri->state << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(END) { return; } PROGRAM_NEXT_INSTRUCTION @@ -323,30 +338,6 @@ void dumpRoleProgram(ofstream &os, const RoseEngine *t, const char *pc) { #undef PROGRAM_CASE #undef PROGRAM_NEXT_INSTRUCTION -static -void dumpSparseIterPrograms(ofstream &os, const RoseEngine *t, u32 iterOffset, - u32 programTableOffset) { - const auto *it = - (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, iterOffset); - const u32 *programTable = - (const u32 *)loadFromByteCodeOffset(t, programTableOffset); - - // Construct a full multibit. - const u32 total_bits = t->rolesWithStateCount; - const vector bits(mmbit_size(total_bits), u8{0xff}); - - struct mmbit_sparse_state s[MAX_SPARSE_ITER_STATES]; - u32 idx = 0; - for (u32 i = mmbit_sparse_iter_begin(bits.data(), total_bits, &idx, it, s); - i != MMB_INVALID; - i = mmbit_sparse_iter_next(bits.data(), total_bits, i, &idx, it, s)) { - u32 programOffset = programTable[idx]; - os << "Sparse Iter Program " << idx << " triggered by state " << i - << " @ " << programOffset << ":" << endl; - dumpRoleProgram(os, t, (const char *)t + programOffset); - } -} - static void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); @@ -359,18 +350,11 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { os << "Literal " << i << endl; os << "---------------" << endl; - if (lit->rootProgramOffset) { - os << "Root Program @ " << lit->rootProgramOffset << ":" << endl; - dumpRoleProgram(os, t, base + lit->rootProgramOffset); + if (lit->programOffset) { + os << "Program @ " << lit->programOffset << ":" << endl; + dumpProgram(os, t, base + lit->programOffset); } else { - os << "" << endl; - } - - if (lit->iterOffset != ROSE_OFFSET_INVALID) { - dumpSparseIterPrograms(os, t, lit->iterOffset, - lit->iterProgramOffset); - } else { - os << "" << endl; + os << "" << endl; } os << endl; @@ -382,12 +366,23 @@ void dumpRoseLitPrograms(const RoseEngine *t, const string &filename) { static void dumpRoseEodPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); + const char *base = (const char *)t; - if (t->eodIterOffset) { - dumpSparseIterPrograms(os, t, t->eodIterOffset, - t->eodProgramTableOffset); + os << "Unconditional EOD Program:" << endl; + + if (t->eodProgramOffset) { + dumpProgram(os, t, base + t->eodProgramOffset); + os << endl; } else { - os << "" << endl; + os << "" << endl; + } + + os << "Sparse Iter EOD Program:" << endl; + + if (t->eodIterProgramOffset) { + dumpProgram(os, t, base + t->eodIterProgramOffset); + } else { + os << "" << endl; } os.close(); @@ -766,33 +761,15 @@ void roseDumpText(const RoseEngine *t, FILE *f) { literalsWithPredicate( t, [](const RoseLiteral &l) { return l.squashesGroup != 0; })); fprintf(f, " - with benefits : %u\n", t->nonbenefits_base_id); - fprintf(f, " - with root program : %zu\n", - literalsWithPredicate(t, [](const RoseLiteral &l) { - return l.rootProgramOffset != 0; - })); - fprintf(f, " - with sparse iter : %zu\n", - literalsWithPredicate(t, [](const RoseLiteral &l) { - return l.iterOffset != ROSE_OFFSET_INVALID; - })); + fprintf(f, " - with program : %zu\n", + literalsWithPredicate( + t, [](const RoseLiteral &l) { return l.programOffset != 0; })); fprintf(f, " - in groups ::\n"); fprintf(f, " + weak : %zu\n", literalsInGroups(t, 0, t->group_weak_end)); fprintf(f, " + general : %zu\n", literalsInGroups(t, t->group_weak_end, sizeof(u64a) * 8)); - u32 depth1 = literalsWithDepth(t, 1); - u32 depth2 = literalsWithDepth(t, 2); - u32 depth3 = literalsWithDepth(t, 3); - u32 depth4 = literalsWithDepth(t, 4); - u32 depthN = t->literalCount - (depth1 + depth2 + depth3 + depth4); - - fprintf(f, "\nLiteral depths:\n"); - fprintf(f, " minimum depth 1 : %u\n", depth1); - fprintf(f, " minimum depth 2 : %u\n", depth2); - fprintf(f, " minimum depth 3 : %u\n", depth3); - fprintf(f, " minimum depth 4 : %u\n", depth4); - fprintf(f, " minimum depth >4 : %u\n", depthN); - fprintf(f, "\n"); fprintf(f, " minWidth : %u\n", t->minWidth); fprintf(f, " minWidthExcludingBoundaries : %u\n", @@ -840,7 +817,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U8(t, hasFloatingDirectReports); DUMP_U8(t, noFloatingRoots); DUMP_U8(t, requiresEodCheck); - DUMP_U8(t, hasEodEventLiteral); DUMP_U8(t, hasOutfixesInSmallBlock); DUMP_U8(t, runtimeImpl); DUMP_U8(t, mpvTriggeredByLeaf); @@ -882,8 +858,9 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, roseCount); DUMP_U32(t, lookaroundTableOffset); DUMP_U32(t, lookaroundReachOffset); + DUMP_U32(t, eodProgramOffset); + DUMP_U32(t, eodIterProgramOffset); DUMP_U32(t, eodIterOffset); - DUMP_U32(t, eodProgramTableOffset); DUMP_U32(t, lastByteHistoryIterOffset); DUMP_U32(t, minWidth); DUMP_U32(t, minWidthExcludingBoundaries); @@ -940,7 +917,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, somRevOffsetOffset); DUMP_U32(t, group_weak_end); DUMP_U32(t, floatingStreamState); - DUMP_U32(t, eodLiteralId); fprintf(f, "}\n"); fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine)); } diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 7aae2f22..6234bb21 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -76,38 +76,15 @@ ReportID literalToReport(u32 id) { /** \brief Structure representing a literal. */ struct RoseLiteral { /** - * \brief Role program to run unconditionally when this literal is seen. + * \brief Program to run when this literal is seen. * * Offset is relative to RoseEngine, or zero for no program. */ - u32 rootProgramOffset; - - /** - * \brief Offset of sparse iterator (mmbit_sparse_iter pointer) over - * predecessor states. - * - * Offset is relative to RoseEngine, set to ROSE_OFFSET_INVALID for no - * iterator. - */ - u32 iterOffset; - - /** - * \brief Table of role programs to run when triggered by the sparse - * iterator, indexed by dense sparse iter index. - * - * Offset is relative to RoseEngine, zero for no programs. - */ - u32 iterProgramOffset; + u32 programOffset; /** \brief Bitset of groups that cause this literal to fire. */ rose_group groups; - /** - * \brief The minimum depth of this literal in the Rose graph (for depths - * greater than 1). - */ - u8 minDepth; - /** * \brief True if this literal switches off its group behind it when it * sets a role. @@ -382,7 +359,6 @@ struct RoseEngine { u8 noFloatingRoots; /* only need to run the anchored table if something * matched in the anchored table */ u8 requiresEodCheck; /* stuff happens at eod time */ - u8 hasEodEventLiteral; // fires a ROSE_EVENT literal at eod time. u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even in small block scans. */ u8 runtimeImpl; /**< can we just run the floating table or a single outfix? @@ -448,8 +424,9 @@ struct RoseEngine { u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32 * bytes each) */ - u32 eodIterOffset; // or 0 if no eod iterator - u32 eodProgramTableOffset; + u32 eodProgramOffset; //!< Unconditional EOD program, otherwise 0. + u32 eodIterProgramOffset; // or 0 if no eod iterator program + u32 eodIterOffset; // offset to EOD sparse iter or 0 if none u32 lastByteHistoryIterOffset; // if non-zero @@ -512,7 +489,6 @@ struct RoseEngine { u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ u32 group_weak_end; /* end of weak groups, debugging only */ u32 floatingStreamState; // size in bytes - u32 eodLiteralId; // literal ID for eod ROSE_EVENT if used, otherwise 0. struct scatter_full_plan state_init; }; diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index ee747b9d..f7028c72 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -42,6 +42,7 @@ /** \brief Role program instruction opcodes. */ enum RoseInstructionCode { ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + ROSE_INSTR_CHECK_DEPTH, //!< Check minimum graph depth. ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". @@ -59,43 +60,51 @@ enum RoseInstructionCode { ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. ROSE_INSTR_SET_STATE, //!< Switch a state index on. ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. + ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. + ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. ROSE_INSTR_END //!< End of program. }; struct ROSE_STRUCT_ANCHORED_DELAY { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u8 depth; //!< Depth for this state. rose_group groups; //!< Bitmask. u32 done_jump; //!< Jump forward this many bytes if successful. }; +struct ROSE_STRUCT_CHECK_DEPTH { + u8 code; //!< From enum RoseInstructionCode. + u8 min_depth; //!< Minimum depth of this literal in the Rose graph. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_ONLY_EOD { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_CHECK_BOUNDS { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 min_bound; //!< Min distance from zero. u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF). u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_CHECK_NOT_HANDLED { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 key; //!< Key in the "handled_roles" fatbit in scratch. u32 fail_jump; //!< Jump forward this many bytes if we have seen key before. }; struct ROSE_STRUCT_CHECK_LOOKAROUND { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 index; u32 count; u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_CHECK_LEFTFIX { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue of leftfix to check. u32 lag; //!< Lag of leftfix for this case. ReportID report; //!< ReportID of leftfix to check. @@ -103,72 +112,95 @@ struct ROSE_STRUCT_CHECK_LEFTFIX { }; struct ROSE_STRUCT_SOM_ADJUST { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 distance; //!< Distance to EOM. }; struct ROSE_STRUCT_SOM_LEFTFIX { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue index of leftfix providing SOM. u32 lag; //!< Lag of leftfix for this case. }; struct ROSE_STRUCT_TRIGGER_INFIX { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u8 cancel; //!< Cancels previous top event. u32 queue; //!< Queue index of infix. u32 event; //!< Queue event, from MQE_*. }; struct ROSE_STRUCT_TRIGGER_SUFFIX { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u32 queue; //!< Queue index of suffix. u32 event; //!< Queue event, from MQE_*. }; struct ROSE_STRUCT_REPORT { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_CHAIN { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_EOD { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_SOM_INT { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_SOM { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_REPORT_SOM_KNOWN { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. ReportID report; }; struct ROSE_STRUCT_SET_STATE { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. u8 depth; //!< Depth for this state. u32 index; //!< State index in multibit. }; struct ROSE_STRUCT_SET_GROUPS { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. rose_group groups; //!< Bitmask. }; +/** + * Note that the offsets in the jump table are always relative to the start of + * the program, not the current instruction. + */ +struct ROSE_STRUCT_SPARSE_ITER_BEGIN { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 jump_table; //!< Offset of jump table indexed by sparse iterator. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +/** + * Note that the offsets in the jump table are always relative to the start of + * the program, not the current instruction. + */ +struct ROSE_STRUCT_SPARSE_ITER_NEXT { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 jump_table; //!< Offset of jump table indexed by sparse iterator. + u32 state; // Current state index. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_END { - u8 code; //!< From enum RoseRoleInstructionCode. + u8 code; //!< From enum RoseInstructionCode. }; #endif // ROSE_ROSE_PROGRAM_H