/* * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "util/join.h" #include /** \file * \brief Limex Execution Engine Or: * How I Learned To Stop Worrying And Love The Preprocessor * * Version 2.0: now with X-Macros, so you get line numbers in your debugger. */ #if !defined(SIZE) || !defined(STATE_T) # error Must define SIZE and STATE_T in includer. #endif #define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE) #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) #define TESTEOD_FN JOIN(moNfaTestEod, SIZE) #define TESTEOD_REV_FN JOIN(moNfaRevTestEod, SIZE) #define INITIAL_FN JOIN(moNfaInitial, SIZE) #define TOP_FN JOIN(moNfaTop, SIZE) #define TOPN_FN JOIN(moNfaTopN, SIZE) #define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE) #define COMPRESS_FN JOIN(moNfaCompressState, SIZE) #define EXPAND_FN JOIN(moNfaExpandState, SIZE) #define COMPRESS_REPEATS_FN JOIN(LIMEX_API_ROOT, _Compress_Repeats) #define EXPAND_REPEATS_FN JOIN(LIMEX_API_ROOT, _Expand_Repeats) #define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE) #define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) #define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) #define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel) #define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions) #define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream) #define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream) #define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB) #define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First) #define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent) #define CONTEXT_T JOIN(NFAContext, SIZE) #define EXCEPTION_T JOIN(struct NFAException, SIZE) #define LOAD_STATE JOIN(load_, STATE_T) #define STORE_STATE JOIN(store_, STATE_T) #define AND_STATE JOIN(and_, STATE_T) #define ANDNOT_STATE JOIN(andnot_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) #define TESTBIT_STATE JOIN(testbit_, STATE_T) #define CLEARBIT_STATE JOIN(clearbit_, STATE_T) #define ZERO_STATE JOIN(zero_, STATE_T) #define ISNONZERO_STATE JOIN(isNonZero_, STATE_T) #define ISZERO_STATE JOIN(isZero_, STATE_T) #define NOTEQ_STATE JOIN(noteq_, STATE_T) // Pick an appropriate diffrich function for this platform. #ifdef ARCH_64_BIT #define DIFFRICH_STATE JOIN(diffrich64_, STATE_T) #else #define DIFFRICH_STATE JOIN(diffrich_, STATE_T) #endif #define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) #define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE) // Acceleration and exception masks: we load them on the fly for really big // models. #if SIZE < 256 #define ACCEL_MASK accelMask #define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask #define EXCEPTION_MASK exceptionMask #else #define ACCEL_MASK LOAD_STATE(&limex->accel) #define ACCEL_AND_FRIENDS_MASK LOAD_STATE(&limex->accel_and_friends) #define EXCEPTION_MASK LOAD_STATE(&limex->exceptionMask) #endif // Run exception processing, if necessary. Returns 0 if scanning should // continue, 1 if an accept was fired and the user instructed us to halt. static really_inline char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, const ReportID *exReports, STATE_T s, const STATE_T emask, size_t i, u64a offset, STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx, const char flags, const char in_rev, const char first_match) { STATE_T estate = AND_STATE(s, emask); u32 diffmask = DIFFRICH_STATE(ZERO_STATE, estate); if (likely(!diffmask)) { return 0; // No exceptions to process. } if (first_match && i) { STATE_T acceptMask = LOAD_STATE(&limex->accept); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { DEBUG_PRINTF("first match at %zu\n", i); DEBUG_PRINTF("for nfa %p\n", limex); assert(final_loc); STORE_STATE(&ctx->s, s); *final_loc = i; return 1; // Halt matching. } } u64a callback_offset = i + offset; char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags; int rv = JOIN(processExceptional, SIZE)( pass_state, pass_estate, diffmask, succ, limex, exceptions, exReports, callback_offset, ctx, in_rev, localflags); if (rv == PE_RV_HALT) { return 1; // Halt matching. } return 0; } static really_inline size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, UNUSED const IMPL_NFA_T *limex, const u8 *accelTable, const union AccelAux *accelAux, const u8 *input, size_t i, size_t length) { size_t j; #if SIZE < 128 // For small cases, we pass the state by value. j = JOIN(doAccel, SIZE)(s, accelMask, accelTable, accelAux, input, i, length); #else j = JOIN(doAccel, SIZE)(&s, limex, accelTable, accelAux, input, i, length); #endif assert(j >= i); assert(i <= length); return j; } static really_inline char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset, const char flags, u64a *final_loc, const char first_match) { const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex)); #if SIZE < 256 const STATE_T accelMask = LOAD_STATE(&limex->accel); const STATE_T accel_and_friendsMask = LOAD_STATE(&limex->accel_and_friends); const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask); #endif const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset); const union AccelAux *accelAux = (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); STATE_T s = LOAD_STATE(&ctx->s); /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ size_t i = 0; size_t min_accel_offset = 0; if (!limex->accelCount || length < ACCEL_MIN_LEN) { min_accel_offset = length; goto without_accel; } else { goto with_accel; } without_accel: for (; i != min_accel_offset; i++) { DUMP_INPUT(i); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); STORE_STATE(&ctx->s, s); return MO_CONTINUE_MATCHING; } u8 c = input[i]; STATE_T succ; NFA_EXEC_GET_LIM_SUCC(STATE_T); if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 0, first_match)) { return MO_HALT_MATCHING; } s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]])); } with_accel: for (; i != length; i++) { DUMP_INPUT(i); if (i + 16 <= length && ISZERO_STATE(ANDNOT_STATE(ACCEL_AND_FRIENDS_MASK, s))) { DEBUG_PRINTF("current states are all accelerable\n"); assert(i + 16 <= length); size_t post_idx = RUN_ACCEL_FN(s, ACCEL_MASK, limex, accelTable, accelAux, input, i, length); if (post_idx != i) { /* squashing any friends as they may no longer be valid; * offset back off should ensure they weren't doing anything * important */ s = AND_STATE(ACCEL_MASK, s); } if (i && post_idx < min_accel_offset + BAD_ACCEL_DIST) { min_accel_offset = post_idx + BIG_ACCEL_PENALTY; } else { min_accel_offset = post_idx + SMALL_ACCEL_PENALTY; } if (min_accel_offset >= length - ACCEL_MIN_LEN) { min_accel_offset = length; } DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", post_idx - i, min_accel_offset - post_idx, length - post_idx); i = post_idx; if (i == length) { break; /* all chars eaten, break out of loop */ } goto without_accel; } u8 c = input[i]; STATE_T succ; NFA_EXEC_GET_LIM_SUCC(STATE_T); if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 0, first_match)) { return MO_HALT_MATCHING; } s = AND_STATE(succ, LOAD_STATE(&reach[limex->reachMap[c]])); } STORE_STATE(&ctx->s, s); if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { STATE_T acceptMask = LOAD_STATE(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); const u32 acceptCount = limex->acceptCount; STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { if (first_match) { STORE_STATE(&ctx->s, s); assert(final_loc); *final_loc = length; return MO_HALT_MATCHING; } else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, acceptTable, acceptCount, offset + length, ctx->callback, ctx->context)) { return MO_HALT_MATCHING; } } } if (first_match) { assert(final_loc); *final_loc = length; } return MO_CONTINUE_MATCHING; } static never_inline char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset) { const STATE_T *reach = (const STATE_T *)((const char *)limex + sizeof(*limex)); #if SIZE < 256 const STATE_T exceptionMask = LOAD_STATE(&limex->exceptionMask); #endif const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); const ReportID *exReports = getExReports(limex); STATE_T s = LOAD_STATE(&ctx->s); /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ const char flags = CALLBACK_OUTPUT; u64a *final_loc = NULL; for (size_t i = length; i != 0; i--) { DUMP_INPUT(i-1); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); STORE_STATE(&ctx->s, s); return MO_CONTINUE_MATCHING; } u8 c = input[i-1]; STATE_T succ; NFA_EXEC_GET_LIM_SUCC(STATE_T); if (RUN_EXCEPTIONS_FN(limex, exceptions, exReports, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 1, 0)) { return MO_HALT_MATCHING; } s = AND_STATE(succ, reach[limex->reachMap[c]]); } STORE_STATE(&ctx->s, s); STATE_T acceptMask = LOAD_STATE(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); const u32 acceptCount = limex->acceptCount; assert(flags & CALLBACK_OUTPUT); if (acceptCount) { STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { if (PROCESS_ACCEPTS_NOSQUASH_FN(&ctx->s, acceptTable, acceptCount, offset, ctx->callback, ctx->context)) { return MO_HALT_MATCHING; } } } return MO_CONTINUE_MATCHING; } static really_inline void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, u64a offset) { if (!limex->repeatCount) { return; } STATE_T s = LOAD_STATE(src); if (ISZERO_STATE(AND_STATE(s, LOAD_STATE(&limex->repeatCyclicMask)))) { DEBUG_PRINTF("no cyclics are on\n"); return; } const union RepeatControl *ctrl = getRepeatControlBaseConst((const char *)src, sizeof(STATE_T)); char *state_base = (char *)dest + limex->stateSize; for (u32 i = 0; i < limex->repeatCount; i++) { DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); if (!TESTBIT_STATE(&s, info->cyclicState)) { DEBUG_PRINTF("is dead\n"); continue; } const struct RepeatInfo *repeat = getRepeatInfo(info); if (repeatHasMatch(repeat, &ctrl[i], state_base + info->stateOffset, offset) == REPEAT_STALE) { DEBUG_PRINTF("is stale, clearing state\n"); CLEARBIT_STATE(&s, info->cyclicState); continue; } DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n", info->packedCtrlOffset); repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i], offset); } STORE_STATE(src, s); } char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n, const struct mq *q, s64a loc) { void *dest = q->streamState; void *src = q->state; u8 key = queue_prev_byte(q, loc); const IMPL_NFA_T *limex = getImplNfa(n); COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc); COMPRESS_FN(limex, dest, src, key); return 0; } static really_inline void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, u64a offset) { if (!limex->repeatCount) { return; } // Note: state has already been expanded into 'dest'. const STATE_T cyclics = AND_STATE(LOAD_STATE(dest), LOAD_STATE(&limex->repeatCyclicMask)); if (ISZERO_STATE(cyclics)) { DEBUG_PRINTF("no cyclics are on\n"); return; } union RepeatControl *ctrl = getRepeatControlBase((char *)dest, sizeof(STATE_T)); const char *state_base = (const char *)src + limex->stateSize; for (u32 i = 0; i < limex->repeatCount; i++) { DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); if (!TESTBIT_STATE(&cyclics, info->cyclicState)) { DEBUG_PRINTF("is dead\n"); continue; } DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n", info->packedCtrlOffset); const struct RepeatInfo *repeat = getRepeatInfo(info); repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset, &ctrl[i]); } } char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest, const void *src, u64a offset, u8 key) { const IMPL_NFA_T *limex = getImplNfa(n); EXPAND_FN(limex, dest, src, key); EXPAND_REPEATS_FN(limex, dest, src, offset); return 0; } char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) { STORE_STATE(q->state, ZERO_STATE); // Zero every bounded repeat control block in state. const IMPL_NFA_T *limex = getImplNfa(n); union RepeatControl *ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); for (u32 i = 0; i < limex->repeatCount; i++) { memset(&ctrl[i], 0, sizeof(*ctrl)); } return 0; } char JOIN(LIMEX_API_ROOT, _initCompressedState)(const struct NFA *n, u64a offset, void *state, u8 key) { const IMPL_NFA_T *limex = getImplNfa(n); STATE_T s = INITIAL_FN(limex, !!offset); if (ISZERO_STATE(s)) { DEBUG_PRINTF("state went to zero\n"); return 0; } // NFA is still active, compress its state and ship it out. COMPRESS_FN(limex, state, &s, key); // Zero every packed bounded repeat control block in stream state. char *repeat_region = (char *)state + limex->stateSize; for (u32 i = 0; i < limex->repeatCount; i++) { const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); const struct RepeatInfo *repeat = getRepeatInfo(info); memset(repeat_region + info->packedCtrlOffset, 0, repeat->packedCtrlSize); } return 1; } // Helper for history buffer scans, which catch up the NFA state but don't emit // matches. static never_inline void STREAMSILENT_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset) { const char first_match = 0; UNUSED char rv = STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT, NULL, first_match); assert(rv != MO_HALT_MATCHING); } static never_inline char STREAMCB_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset) { const char first_match = 0; assert(ISALIGNED_CL(ctx)); return STREAM_FN(limex, input, length, ctx, offset, CALLBACK_OUTPUT, NULL, first_match); } static never_inline char STREAMFIRST_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset, u64a *final_loc) { const char first_match = 1; // Run to first match and stop, no callbacks. return STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT, final_loc, first_match); } // Common code for handling the current event on the queue. static really_inline void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, struct mq *q, struct CONTEXT_T *ctx, u64a sp) { #define DEFINE_CASE(ee) \ case ee: \ DEBUG_PRINTF(#ee "\n"); u32 e = q->items[q->cur].type; switch (e) { DEFINE_CASE(MQE_TOP) STORE_STATE(&ctx->s, TOP_FN(limex, !!sp, LOAD_STATE(&ctx->s))); break; DEFINE_CASE(MQE_START) break; DEFINE_CASE(MQE_END) break; default: assert(e >= MQE_TOP_FIRST); assert(e < MQE_INVALID); DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST)); STORE_STATE(&ctx->s, TOPN_FN(limex, LOAD_STATE(&ctx->s), e - MQE_TOP_FIRST)); } #undef DEFINE_CASE } // "Classic" queue call, used by outfixes char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { const IMPL_NFA_T *limex = getImplNfa(n); if (q->report_current) { char rv = REPORTCURRENT_FN(limex, q); q->report_current = 0; if (rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } if (q->cur == q->end) { return 1; } assert(q->cur + 1 < q->end); /* require at least two items */ struct CONTEXT_T ctx; ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = q->cb; ctx.context = q->context; STORE_STATE(&ctx.cached_estate, ZERO_STATE); ctx.cached_br = 0; assert(q->items[q->cur].location >= 0); DEBUG_PRINTF("LOAD STATE\n"); STORE_STATE(&ctx.s, LOAD_STATE(q->state)); assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; u64a sp = offset + q->items[q->cur].location; u64a end_abs = offset + end; q->cur++; while (q->cur < q->end && sp <= end_abs) { u64a ep = offset + q->items[q->cur].location; ep = MIN(ep, end_abs); assert(ep >= sp); assert(sp >= offset); // We no longer do history buffer scans here. if (sp >= ep) { goto scan_done; } /* do main buffer region */ DEBUG_PRINTF("MAIN BUFFER SCAN\n"); assert(ep - offset <= q->length); if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp) == MO_HALT_MATCHING) { STORE_STATE(q->state, ZERO_STATE); return 0; } DEBUG_PRINTF("SCAN DONE\n"); scan_done: sp = ep; if (sp != offset + q->items[q->cur].location) { assert(q->cur); DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n", sp, end_abs, offset); assert(sp == end_abs); q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); STORE_STATE(q->state, LOAD_STATE(&ctx.s)); return MO_ALIVE; } JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); q->cur++; } EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); STORE_STATE(q->state, LOAD_STATE(&ctx.s)); if (q->cur != q->end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; return MO_ALIVE; } return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); } /* used by suffix execution in Rose */ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { const IMPL_NFA_T *limex = getImplNfa(n); if (q->report_current) { char rv = REPORTCURRENT_FN(limex, q); q->report_current = 0; if (rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } if (q->cur == q->end) { return 1; } assert(q->cur + 1 < q->end); /* require at least two items */ struct CONTEXT_T ctx; ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = q->cb; ctx.context = q->context; STORE_STATE(&ctx.cached_estate, ZERO_STATE); ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); STORE_STATE(&ctx.s, LOAD_STATE(q->state)); assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; u64a sp = offset + q->items[q->cur].location; u64a end_abs = offset + end; q->cur++; while (q->cur < q->end && sp <= end_abs) { u64a ep = offset + q->items[q->cur].location; DEBUG_PRINTF("sp = %llu, ep = %llu, end_abs = %llu\n", sp, ep, end_abs); ep = MIN(ep, end_abs); assert(ep >= sp); if (sp < offset) { DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); assert(offset - sp <= q->hlength); u64a local_ep = MIN(offset, ep); u64a final_look = 0; /* we are starting inside the history buffer */ if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset, local_ep - sp, &ctx, sp, &final_look) == MO_HALT_MATCHING) { DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu " "offset:%llu\n", final_look, sp, end_abs, offset); assert(q->cur); q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; STORE_STATE(q->state, LOAD_STATE(&ctx.s)); return MO_MATCHES_PENDING; } sp = local_ep; } if (sp >= ep) { goto scan_done; } /* do main buffer region */ u64a final_look = 0; assert(ep - offset <= q->length); if (STREAMFIRST_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp, &final_look) == MO_HALT_MATCHING) { DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu offset:%llu\n", final_look, sp, end_abs, offset); assert(q->cur); q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; STORE_STATE(q->state, LOAD_STATE(&ctx.s)); return MO_MATCHES_PENDING; } scan_done: sp = ep; if (sp != offset + q->items[q->cur].location) { assert(q->cur); DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n", sp, end_abs, offset); assert(sp == end_abs); q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); STORE_STATE(q->state, LOAD_STATE(&ctx.s)); return MO_ALIVE; } JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); q->cur++; } EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); STORE_STATE(q->state, LOAD_STATE(&ctx.s)); if (q->cur != q->end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; return MO_ALIVE; } return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); } // Used for execution Rose prefix/infixes. char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, ReportID report) { const IMPL_NFA_T *limex = getImplNfa(n); if (q->cur == q->end) { return 1; } assert(q->cur + 1 < q->end); /* require at least two items */ struct CONTEXT_T ctx; ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); ctx.repeat_state = q->streamState + limex->stateSize; ctx.callback = NULL; ctx.context = NULL; STORE_STATE(&ctx.cached_estate, ZERO_STATE); ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); STORE_STATE(&ctx.s, LOAD_STATE(q->state)); assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; u64a sp = offset + q->items[q->cur].location; q->cur++; while (q->cur < q->end) { u64a ep = offset + q->items[q->cur].location; if (n->maxWidth) { if (ep - sp > n->maxWidth) { sp = ep - n->maxWidth; STORE_STATE(&ctx.s, INITIAL_FN(limex, !!sp)); } } assert(ep >= sp); if (sp < offset) { DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); assert(offset - sp <= q->hlength); u64a local_ep = MIN(offset, ep); /* we are starting inside the history buffer */ STREAMSILENT_FN(limex, q->history + q->hlength + sp - offset, local_ep - sp, &ctx, sp); sp = local_ep; } if (sp >= ep) { goto scan_done; } /* do main buffer region */ DEBUG_PRINTF("MAIN BUFFER SCAN\n"); assert(ep - offset <= q->length); STREAMSILENT_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp); DEBUG_PRINTF("SCAN DONE\n"); scan_done: sp = ep; JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); q->cur++; } EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END, nfa is %s\n", ISNONZERO_STATE(ctx.s) ? "still alive" : "dead"); STORE_STATE(q->state, LOAD_STATE(&ctx.s)); if (JOIN(limexInAccept, SIZE)(limex, LOAD_STATE(&ctx.s), ctx.repeat_ctrl, ctx.repeat_state, sp + 1, report)) { return MO_MATCHES_PENDING; } return ISNONZERO_STATE(LOAD_STATE(&ctx.s)); } char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, const char *streamState, u64a offset, NfaCallback callback, void *context) { assert(n && state); const IMPL_NFA_T *limex = getImplNfa(n); const STATE_T *sptr = (const STATE_T *)state; const union RepeatControl *repeat_ctrl = getRepeatControlBaseConst(state, sizeof(STATE_T)); const char *repeat_state = streamState + limex->stateSize; return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, 1, callback, context); } char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { const IMPL_NFA_T *limex = getImplNfa(n); REPORTCURRENT_FN(limex, q); return 1; } // Block mode reverse scan. char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, const u8 *buf, size_t buflen, const u8 *hbuf, size_t hlen, NfaCallback cb, void *context) { assert(buf || hbuf); assert(buflen || hlen); struct CONTEXT_T ctx; ctx.repeat_ctrl = NULL; ctx.repeat_state = NULL; ctx.callback = cb; ctx.context = context; STORE_STATE(&ctx.cached_estate, ZERO_STATE); ctx.cached_br = 0; const IMPL_NFA_T *limex = getImplNfa(n); STORE_STATE(&ctx.s, INITIAL_FN(limex, 0)); // always anchored // 'buf' may be null, for example when we're scanning at EOD time. if (buflen) { assert(buf); DEBUG_PRINTF("MAIN BUFFER SCAN, %zu bytes\n", buflen); offset -= buflen; REV_STREAM_FN(limex, buf, buflen, &ctx, offset); } if (hlen) { assert(hbuf); DEBUG_PRINTF("HISTORY BUFFER SCAN, %zu bytes\n", hlen); offset -= hlen; REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset); } if (offset == 0 && ISNONZERO_STATE(LOAD_STATE(&ctx.s))) { TESTEOD_REV_FN(limex, &ctx.s, offset, cb, context); } // NOTE: return value is unused. return 0; } char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, ReportID report, struct mq *q) { assert(nfa && q); assert(q->state && q->streamState); const IMPL_NFA_T *limex = getImplNfa(nfa); union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; STATE_T state = LOAD_STATE(q->state); u64a offset = q->offset + q_last_loc(q) + 1; return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, offset, report); } char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { assert(nfa && q); assert(q->state && q->streamState); const IMPL_NFA_T *limex = getImplNfa(nfa); union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; STATE_T state = LOAD_STATE(q->state); u64a offset = q->offset + q_last_loc(q) + 1; return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, offset); } enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( const struct NFA *nfa, struct mq *q, s64a loc) { assert(nfa->flags & NFA_ZOMBIE); const IMPL_NFA_T *limex = getImplNfa(nfa); STATE_T state = LOAD_STATE(q->state); STATE_T zmask = LOAD_STATE(&limex->zombieMask); if (limex->repeatCount) { u64a offset = q->offset + loc + 1; union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state); } if (ISNONZERO_STATE(AND_STATE(state, zmask))) { return NFA_ZOMBIE_ALWAYS_YES; } return NFA_ZOMBIE_NO; } #undef TESTEOD_FN #undef TESTEOD_REV_FN #undef INITIAL_FN #undef TOP_FN #undef TOPN_FN #undef REPORTCURRENT_FN #undef COMPRESS_FN #undef EXPAND_FN #undef COMPRESS_REPEATS_FN #undef EXPAND_REPEATS_FN #undef PROCESS_ACCEPTS_FN #undef PROCESS_ACCEPTS_NOSQUASH_FN #undef GET_NFA_REPEAT_INFO_FN #undef RUN_ACCEL_FN #undef RUN_EXCEPTIONS_FN #undef REV_STREAM_FN #undef STREAM_FN #undef STREAMCB_FN #undef STREAMFIRST_FN #undef STREAMSILENT_FN #undef CONTEXT_T #undef EXCEPTION_T #undef LOAD_STATE #undef STORE_STATE #undef AND_STATE #undef ANDNOT_STATE #undef OR_STATE #undef TESTBIT_STATE #undef CLEARBIT_STATE #undef ZERO_STATE #undef ISNONZERO_STATE #undef ISZERO_STATE #undef NOTEQ_STATE #undef DIFFRICH_STATE #undef INLINE_ATTR_INT #undef IMPL_NFA_T #undef SQUASH_UNTUG_BR_FN #undef ACCEL_MASK #undef ACCEL_AND_FRIENDS_MASK #undef EXCEPTION_MASK // Parameters. #undef SIZE #undef STATE_T #undef LIMEX_API_ROOT