/* * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** \file * \brief Large Bounded Repeat (LBR) engine: runtime impl X-macros. */ #include "util/join.h" #define ENGINE_EXEC_NAME JOIN(nfaExecLbr, ENGINE_ROOT_NAME) #define EXEC_FN JOIN(lbrExec, ENGINE_ROOT_NAME) #define FWDSCAN_FN JOIN(lbrFwdScan, ENGINE_ROOT_NAME) #define REVSCAN_FN JOIN(lbrRevScan, ENGINE_ROOT_NAME) char JOIN(ENGINE_EXEC_NAME, _queueCompressState)(const struct NFA *nfa, const struct mq *q, s64a loc) { assert(nfa && q); assert(isLbrType(nfa->type)); DEBUG_PRINTF("entry, q->offset=%llu, loc=%lld\n", q->offset, loc); const struct lbr_common *l = getImplNfa(nfa); const struct lbr_state *lstate = (const struct lbr_state *)q->state; u64a offset = q->offset + loc; lbrCompressState(l, offset, lstate, q->streamState); return 0; } char JOIN(ENGINE_EXEC_NAME, _expandState)(const struct NFA *nfa, void *dest, const void *src, u64a offset, UNUSED u8 key) { assert(nfa); assert(isLbrType(nfa->type)); DEBUG_PRINTF("entry, offset=%llu\n", offset); const struct lbr_common *l = getImplNfa(nfa); struct lbr_state *lstate = (struct lbr_state *)dest; lbrExpandState(l, offset, src, lstate); return 0; } char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa, struct mq *q) { assert(nfa && q); assert(isLbrType(nfa->type)); const struct lbr_common *l = getImplNfa(nfa); u64a offset = q_cur_offset(q); DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset); q->cb(0, offset, l->report, q->context); return 0; } char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa, ReportID report, struct mq *q) { assert(nfa && q); assert(isLbrType(nfa->type)); DEBUG_PRINTF("entry\n"); const struct lbr_common *l = getImplNfa(nfa); const struct RepeatInfo *info = getRepeatInfo(l); const struct lbr_state *lstate = (const struct lbr_state *)q->state; if (repeatIsDead(info, lstate)) { DEBUG_PRINTF("repeat is dead\n"); return 0; } u64a offset = q->offset + q_last_loc(q); return lbrInAccept(l, lstate, q->streamState, offset, report); } char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { assert(nfa && q); assert(isLbrType(nfa->type)); DEBUG_PRINTF("entry\n"); const struct lbr_common *l = getImplNfa(nfa); return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q); } char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa, struct mq *q) { assert(nfa && q); assert(isLbrType(nfa->type)); DEBUG_PRINTF("entry\n"); const struct lbr_common *l = getImplNfa(nfa); const struct RepeatInfo *info = getRepeatInfo(l); assert(q->state); struct lbr_state *lstate = (struct lbr_state *)q->state; assert(ISALIGNED(lstate)); lstate->lastEscape = 0; clearRepeat(info, lstate); return 0; } char JOIN(ENGINE_EXEC_NAME, _initCompressedState)(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { assert(nfa && state); assert(isLbrType(nfa->type)); DEBUG_PRINTF("entry\n"); const struct lbr_common *l = getImplNfa(nfa); const struct RepeatInfo *info = getRepeatInfo(l); struct lbr_state lstate; // temp control block on stack. clearRepeat(info, &lstate); lbrTop(l, &lstate, state, offset); lbrCompressState(l, offset, &lstate, state); return 1; // LBR is alive } // FIXME: this function could be much simpler for a Dot LBR, as all it needs to // do is find the next top. static really_inline char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q, s64a end) { const struct lbr_common *l = getImplNfa(nfa); const struct RepeatInfo *info = getRepeatInfo(l); const u64a offset = q->offset; struct lbr_state *lstate = (struct lbr_state *)q->state; assert(ISALIGNED(lstate)); assert(repeatIsDead(info, lstate)); assert(q->cur < q->end); DEBUG_PRINTF("entry, end=%lld, offset=%llu, lastEscape=%llu\n", end, offset, lstate->lastEscape); while (1) { // Find the next top with location >= the last escape we saw. for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) { u32 event = q_cur_type(q); if ((event == MQE_TOP || event == MQE_TOP_FIRST) && q_cur_offset(q) >= lstate->lastEscape) { goto found_top; } DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q)); } // No more tops, we're done. break; found_top:; assert(q->cur < q->end); u64a sp = q_cur_offset(q); u64a first_match = sp + info->repeatMin; DEBUG_PRINTF("first possible match is at %llu\n", first_match); u64a ep = MIN(MIN(end, (s64a)q->length) + offset, first_match); if (ep > sp && sp >= offset) { size_t eloc = 0; DEBUG_PRINTF("rev b%llu e%llu/%zu\n", sp - offset, ep - offset, q->length); assert(ep - offset <= q->length); if (REVSCAN_FN(nfa, q->buffer, sp - offset, ep - offset, &eloc)) { DEBUG_PRINTF("escape found at %llu\n", offset + eloc); lstate->lastEscape = eloc; q->cur++; continue; } } lbrTop(l, lstate, q->streamState, sp); return 1; } DEBUG_PRINTF("exhausted queue\n"); return 0; } static really_inline char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q, s64a end, enum MatchMode mode) { assert(nfa && q); assert(isLbrType(nfa->type)); const struct lbr_common *l = getImplNfa(nfa); const struct RepeatInfo *info = getRepeatInfo(l); struct lbr_state *lstate = (struct lbr_state *)q->state; assert(ISALIGNED(lstate)); if (q->report_current) { DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q)); int rv = q->cb(0, q_cur_offset(q), l->report, q->context); q->report_current = 0; if (rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } if (q->cur == q->end) { return 1; } assert(q->cur + 1 < q->end); /* require at least two items */ assert(q_cur_type(q) == MQE_START); u64a sp = q_cur_offset(q); q->cur++; DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset); while (q->cur < q->end) { DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), q_cur_offset(q)); assert(sp >= q->offset); // not in history if (repeatIsDead(info, lstate)) { DEBUG_PRINTF("repeat is currently dead, skipping scan\n"); goto scan_done; } u64a ep = q_cur_offset(q); ep = MIN(ep, q->offset + end); if (sp < ep) { size_t eloc = 0; char escape_found = 0; DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep); assert(sp >= q->offset && ep >= q->offset); if (FWDSCAN_FN(nfa, q->buffer, sp - q->offset, ep - q->offset, &eloc)) { escape_found = 1; ep = q->offset + eloc; DEBUG_PRINTF("escape found at %llu\n", ep); assert(ep >= sp); } assert(sp <= ep); if (mode == STOP_AT_MATCH) { size_t mloc; if (lbrFindMatch(l, sp, ep, lstate, q->streamState, &mloc)) { DEBUG_PRINTF("storing match at %llu\n", sp + mloc); q->cur--; assert(q->cur < MAX_MQE_LEN); q->items[q->cur].type = MQE_START; q->items[q->cur].location = (s64a)(sp - q->offset) + mloc; return MO_MATCHES_PENDING; } } else { assert(mode == CALLBACK_OUTPUT); char rv = lbrMatchLoop(l, sp, ep, lstate, q->streamState, q->cb, q->context); if (rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } assert(rv == MO_CONTINUE_MATCHING); } // cppcheck-suppress knownConditionTrueFalse if (escape_found) { DEBUG_PRINTF("clearing repeat due to escape\n"); clearRepeat(info, lstate); } } scan_done: if (q_cur_loc(q) > end) { q->cur--; assert(q->cur < MAX_MQE_LEN); q->items[q->cur].type = MQE_START; q->items[q->cur].location = end; return MO_ALIVE; } if (repeatIsDead(info, lstate)) { if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, end)) { assert(repeatIsDead(info, lstate)); if (q->cur < q->end && q_cur_loc(q) > end) { q->cur--; assert(q->cur < MAX_MQE_LEN); q->items[q->cur].type = MQE_START; q->items[q->cur].location = end; return MO_ALIVE; } return 0; } DEBUG_PRINTF("cur offset = %llu\n", q_cur_offset(q)); } else { switch (q_cur_type(q)) { case MQE_TOP: case MQE_TOP_FIRST: lbrTop(l, lstate, q->streamState, q_cur_offset(q)); break; case MQE_START: case MQE_END: break; default: DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q)); assert(0); break; } } sp = q_cur_offset(q); q->cur++; } return lbrIsAlive(l, lstate, q->streamState, sp); } char JOIN(ENGINE_EXEC_NAME, _Q)(const struct NFA *nfa, struct mq *q, s64a end) { DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end); return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, CALLBACK_OUTPUT); } char JOIN(ENGINE_EXEC_NAME, _Q2)(const struct NFA *nfa, struct mq *q, s64a end) { DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end); return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, STOP_AT_MATCH); } static really_inline void JOIN(ENGINE_EXEC_NAME, _StreamSilent)(const struct NFA *nfa, struct mq *q, const u8 *buf, size_t length) { const struct lbr_common *l = getImplNfa(nfa); const struct RepeatInfo *info = getRepeatInfo(l); struct lbr_state *lstate = (struct lbr_state *)q->state; assert(ISALIGNED(lstate)); assert(!repeatIsDead(info, lstate)); // This call doesn't produce matches, so we elide the lbrMatchLoop call // entirely and just do escape scans to maintain the repeat. size_t eloc = 0; char escaped = FWDSCAN_FN(nfa, buf, 0, length, &eloc); // cppcheck-suppress knownConditionTrueFalse if (escaped) { assert(eloc < length); DEBUG_PRINTF("escape found at %zu, clearing repeat\n", eloc); clearRepeat(info, lstate); } } // Rose infix path. char JOIN(ENGINE_EXEC_NAME, _QR)(const struct NFA *nfa, struct mq *q, ReportID report) { assert(nfa && q); assert(isLbrType(nfa->type)); if (q->cur == q->end) { return 1; } assert(q->cur + 1 < q->end); /* require at least two items */ assert(q_cur_type(q) == MQE_START); u64a sp = q_cur_offset(q); q->cur++; DEBUG_PRINTF("sp=%llu\n", sp); const struct lbr_common *l = getImplNfa(nfa); const struct RepeatInfo *info = getRepeatInfo(l); struct lbr_state *lstate = (struct lbr_state *)q->state; assert(ISALIGNED(lstate)); const s64a lastLoc = q_last_loc(q); while (q->cur < q->end) { DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), q_cur_offset(q)); if (repeatIsDead(info, lstate)) { DEBUG_PRINTF("repeat is dead\n"); goto scan_done; } u64a ep = q_cur_offset(q); if (sp < q->offset) { DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); assert(q->offset - sp <= q->hlength); u64a local_ep = MIN(q->offset, ep); const u8 *ptr = q->history + q->hlength + sp - q->offset; JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, local_ep - sp); sp = local_ep; } if (repeatIsDead(info, lstate)) { DEBUG_PRINTF("repeat is dead\n"); goto scan_done; } if (sp < ep) { DEBUG_PRINTF("MAIN BUFFER SCAN\n"); assert(ep - q->offset <= q->length); const u8 *ptr = q->buffer + sp - q->offset; JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, ep - sp); } if (repeatIsDead(info, lstate)) { scan_done: if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, lastLoc)) { assert(repeatIsDead(info, lstate)); assert(q->cur == q->end); return 0; } } else { switch (q_cur_type(q)) { case MQE_TOP: case MQE_TOP_FIRST: lbrTop(l, lstate, q->streamState, q_cur_offset(q)); break; case MQE_START: case MQE_END: break; default: DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q)); assert(0); break; } } sp = q_cur_offset(q); q->cur++; } if (repeatIsDead(info, lstate)) { DEBUG_PRINTF("repeat is dead\n"); return 0; } if (lbrInAccept(l, lstate, q->streamState, sp, report)) { return MO_MATCHES_PENDING; } return lbrIsActive(l, lstate, q->streamState, sp); } #undef ENGINE_EXEC_NAME #undef EXEC_FN #undef FWDSCAN_FN #undef REVSCAN_FN #undef ENGINE_ROOT_NAME