mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
465 lines
16 KiB
C
465 lines
16 KiB
C
/*
|
|
* Copyright (c) 2015-2016, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/** \file
|
|
* \brief Large Bounded Repeat (LBR) engine: runtime impl X-macros.
|
|
*/
|
|
|
|
#include "util/join.h"
|
|
|
|
#define ENGINE_EXEC_NAME JOIN(nfaExecLbr, ENGINE_ROOT_NAME)
|
|
#define EXEC_FN JOIN(lbrExec, ENGINE_ROOT_NAME)
|
|
#define FWDSCAN_FN JOIN(lbrFwdScan, ENGINE_ROOT_NAME)
|
|
#define REVSCAN_FN JOIN(lbrRevScan, ENGINE_ROOT_NAME)
|
|
|
|
char JOIN(ENGINE_EXEC_NAME, _queueCompressState)(const struct NFA *nfa,
|
|
const struct mq *q, s64a loc) {
|
|
assert(nfa && q);
|
|
assert(isLbrType(nfa->type));
|
|
DEBUG_PRINTF("entry, q->offset=%llu, loc=%lld\n", q->offset, loc);
|
|
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
const struct lbr_state *lstate = (const struct lbr_state *)q->state;
|
|
|
|
u64a offset = q->offset + loc;
|
|
lbrCompressState(l, offset, lstate, q->streamState);
|
|
return 0;
|
|
}
|
|
|
|
char JOIN(ENGINE_EXEC_NAME, _expandState)(const struct NFA *nfa, void *dest,
|
|
const void *src, u64a offset,
|
|
UNUSED u8 key) {
|
|
assert(nfa);
|
|
assert(isLbrType(nfa->type));
|
|
DEBUG_PRINTF("entry, offset=%llu\n", offset);
|
|
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
struct lbr_state *lstate = (struct lbr_state *)dest;
|
|
lbrExpandState(l, offset, src, lstate);
|
|
return 0;
|
|
}
|
|
|
|
char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa,
|
|
struct mq *q) {
|
|
assert(nfa && q);
|
|
assert(isLbrType(nfa->type));
|
|
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
u64a offset = q_cur_offset(q);
|
|
DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset);
|
|
q->cb(0, offset, l->report, q->context);
|
|
return 0;
|
|
}
|
|
|
|
char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
|
|
ReportID report, struct mq *q) {
|
|
assert(nfa && q);
|
|
assert(isLbrType(nfa->type));
|
|
DEBUG_PRINTF("entry\n");
|
|
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
const struct RepeatInfo *info = getRepeatInfo(l);
|
|
const struct lbr_state *lstate = (const struct lbr_state *)q->state;
|
|
if (repeatIsDead(info, lstate)) {
|
|
DEBUG_PRINTF("repeat is dead\n");
|
|
return 0;
|
|
}
|
|
|
|
u64a offset = q->offset + q_last_loc(q);
|
|
return lbrInAccept(l, lstate, q->streamState, offset, report);
|
|
}
|
|
|
|
char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
|
|
assert(nfa && q);
|
|
assert(isLbrType(nfa->type));
|
|
DEBUG_PRINTF("entry\n");
|
|
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
|
|
}
|
|
|
|
char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
|
|
struct mq *q) {
|
|
assert(nfa && q);
|
|
assert(isLbrType(nfa->type));
|
|
DEBUG_PRINTF("entry\n");
|
|
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
const struct RepeatInfo *info = getRepeatInfo(l);
|
|
|
|
assert(q->state);
|
|
struct lbr_state *lstate = (struct lbr_state *)q->state;
|
|
assert(ISALIGNED(lstate));
|
|
|
|
lstate->lastEscape = 0;
|
|
clearRepeat(info, lstate);
|
|
|
|
return 0;
|
|
}
|
|
|
|
char JOIN(ENGINE_EXEC_NAME, _initCompressedState)(const struct NFA *nfa,
|
|
u64a offset,
|
|
void *state, UNUSED u8 key) {
|
|
assert(nfa && state);
|
|
assert(isLbrType(nfa->type));
|
|
DEBUG_PRINTF("entry\n");
|
|
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
const struct RepeatInfo *info = getRepeatInfo(l);
|
|
struct lbr_state lstate; // temp control block on stack.
|
|
clearRepeat(info, &lstate);
|
|
lbrTop(l, &lstate, state, offset);
|
|
lbrCompressState(l, offset, &lstate, state);
|
|
|
|
return 1; // LBR is alive
|
|
}
|
|
|
|
// FIXME: this function could be much simpler for a Dot LBR, as all it needs to
|
|
// do is find the next top.
|
|
static really_inline
|
|
char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q,
|
|
s64a end) {
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
const struct RepeatInfo *info = getRepeatInfo(l);
|
|
|
|
const u64a offset = q->offset;
|
|
struct lbr_state *lstate = (struct lbr_state *)q->state;
|
|
assert(ISALIGNED(lstate));
|
|
|
|
assert(repeatIsDead(info, lstate));
|
|
assert(q->cur < q->end);
|
|
|
|
DEBUG_PRINTF("entry, end=%lld, offset=%llu, lastEscape=%llu\n", end,
|
|
offset, lstate->lastEscape);
|
|
|
|
while (1) {
|
|
// Find the next top with location >= the last escape we saw.
|
|
for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) {
|
|
u32 event = q_cur_type(q);
|
|
if ((event == MQE_TOP || event == MQE_TOP_FIRST) &&
|
|
q_cur_offset(q) >= lstate->lastEscape) {
|
|
goto found_top;
|
|
}
|
|
DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q));
|
|
}
|
|
|
|
// No more tops, we're done.
|
|
break;
|
|
|
|
found_top:;
|
|
assert(q->cur < q->end);
|
|
|
|
u64a sp = q_cur_offset(q);
|
|
u64a first_match = sp + info->repeatMin;
|
|
DEBUG_PRINTF("first possible match is at %llu\n", first_match);
|
|
|
|
u64a ep = MIN(MIN(end, (s64a)q->length) + offset, first_match);
|
|
if (ep > sp && sp >= offset) {
|
|
size_t eloc = 0;
|
|
DEBUG_PRINTF("rev b%llu e%llu/%zu\n", sp - offset, ep - offset,
|
|
q->length);
|
|
assert(ep - offset <= q->length);
|
|
if (REVSCAN_FN(nfa, q->buffer, sp - offset, ep - offset, &eloc)) {
|
|
DEBUG_PRINTF("escape found at %llu\n", offset + eloc);
|
|
lstate->lastEscape = eloc;
|
|
q->cur++;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
lbrTop(l, lstate, q->streamState, sp);
|
|
return 1;
|
|
}
|
|
|
|
DEBUG_PRINTF("exhausted queue\n");
|
|
return 0;
|
|
}
|
|
|
|
static really_inline
|
|
char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q,
|
|
s64a end, enum MatchMode mode) {
|
|
assert(nfa && q);
|
|
assert(isLbrType(nfa->type));
|
|
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
const struct RepeatInfo *info = getRepeatInfo(l);
|
|
|
|
struct lbr_state *lstate = (struct lbr_state *)q->state;
|
|
assert(ISALIGNED(lstate));
|
|
|
|
|
|
if (q->report_current) {
|
|
DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q));
|
|
int rv = q->cb(0, q_cur_offset(q), l->report, q->context);
|
|
q->report_current = 0;
|
|
if (rv == MO_HALT_MATCHING) {
|
|
return MO_HALT_MATCHING;
|
|
}
|
|
}
|
|
|
|
if (q->cur == q->end) {
|
|
return 1;
|
|
}
|
|
|
|
assert(q->cur + 1 < q->end); /* require at least two items */
|
|
assert(q_cur_type(q) == MQE_START);
|
|
u64a sp = q_cur_offset(q);
|
|
q->cur++;
|
|
DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset);
|
|
|
|
while (q->cur < q->end) {
|
|
DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
|
|
q_cur_offset(q));
|
|
|
|
assert(sp >= q->offset); // not in history
|
|
|
|
if (repeatIsDead(info, lstate)) {
|
|
DEBUG_PRINTF("repeat is currently dead, skipping scan\n");
|
|
goto scan_done;
|
|
}
|
|
|
|
u64a ep = q_cur_offset(q);
|
|
ep = MIN(ep, q->offset + end);
|
|
if (sp < ep) {
|
|
size_t eloc = 0;
|
|
char escape_found = 0;
|
|
DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep);
|
|
assert(sp >= q->offset && ep >= q->offset);
|
|
if (FWDSCAN_FN(nfa, q->buffer, sp - q->offset, ep - q->offset, &eloc)) {
|
|
escape_found = 1;
|
|
ep = q->offset + eloc;
|
|
DEBUG_PRINTF("escape found at %llu\n", ep);
|
|
assert(ep >= sp);
|
|
}
|
|
|
|
assert(sp <= ep);
|
|
|
|
if (mode == STOP_AT_MATCH) {
|
|
size_t mloc;
|
|
if (lbrFindMatch(l, sp, ep, lstate, q->streamState, &mloc)) {
|
|
DEBUG_PRINTF("storing match at %llu\n", sp + mloc);
|
|
q->cur--;
|
|
assert(q->cur < MAX_MQE_LEN);
|
|
q->items[q->cur].type = MQE_START;
|
|
q->items[q->cur].location = (s64a)(sp - q->offset) + mloc;
|
|
return MO_MATCHES_PENDING;
|
|
}
|
|
} else {
|
|
assert(mode == CALLBACK_OUTPUT);
|
|
char rv = lbrMatchLoop(l, sp, ep, lstate, q->streamState, q->cb,
|
|
q->context);
|
|
if (rv == MO_HALT_MATCHING) {
|
|
return MO_HALT_MATCHING;
|
|
}
|
|
assert(rv == MO_CONTINUE_MATCHING);
|
|
}
|
|
|
|
// cppcheck-suppress knownConditionTrueFalse
|
|
if (escape_found) {
|
|
DEBUG_PRINTF("clearing repeat due to escape\n");
|
|
clearRepeat(info, lstate);
|
|
}
|
|
}
|
|
|
|
scan_done:
|
|
if (q_cur_loc(q) > end) {
|
|
q->cur--;
|
|
assert(q->cur < MAX_MQE_LEN);
|
|
q->items[q->cur].type = MQE_START;
|
|
q->items[q->cur].location = end;
|
|
return MO_ALIVE;
|
|
}
|
|
|
|
if (repeatIsDead(info, lstate)) {
|
|
if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, end)) {
|
|
assert(repeatIsDead(info, lstate));
|
|
if (q->cur < q->end && q_cur_loc(q) > end) {
|
|
q->cur--;
|
|
assert(q->cur < MAX_MQE_LEN);
|
|
q->items[q->cur].type = MQE_START;
|
|
q->items[q->cur].location = end;
|
|
return MO_ALIVE;
|
|
}
|
|
return 0;
|
|
}
|
|
DEBUG_PRINTF("cur offset = %llu\n", q_cur_offset(q));
|
|
} else {
|
|
switch (q_cur_type(q)) {
|
|
case MQE_TOP:
|
|
case MQE_TOP_FIRST:
|
|
lbrTop(l, lstate, q->streamState, q_cur_offset(q));
|
|
break;
|
|
case MQE_START:
|
|
case MQE_END:
|
|
break;
|
|
default:
|
|
DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
sp = q_cur_offset(q);
|
|
q->cur++;
|
|
}
|
|
|
|
return lbrIsAlive(l, lstate, q->streamState, sp);
|
|
}
|
|
|
|
char JOIN(ENGINE_EXEC_NAME, _Q)(const struct NFA *nfa, struct mq *q, s64a end) {
|
|
DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
|
|
return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, CALLBACK_OUTPUT);
|
|
}
|
|
|
|
char JOIN(ENGINE_EXEC_NAME, _Q2)(const struct NFA *nfa, struct mq *q, s64a end) {
|
|
DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end);
|
|
return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, STOP_AT_MATCH);
|
|
}
|
|
|
|
static really_inline
|
|
void JOIN(ENGINE_EXEC_NAME, _StreamSilent)(const struct NFA *nfa, struct mq *q,
|
|
const u8 *buf, size_t length) {
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
const struct RepeatInfo *info = getRepeatInfo(l);
|
|
struct lbr_state *lstate = (struct lbr_state *)q->state;
|
|
assert(ISALIGNED(lstate));
|
|
|
|
assert(!repeatIsDead(info, lstate));
|
|
|
|
// This call doesn't produce matches, so we elide the lbrMatchLoop call
|
|
// entirely and just do escape scans to maintain the repeat.
|
|
|
|
size_t eloc = 0;
|
|
char escaped = FWDSCAN_FN(nfa, buf, 0, length, &eloc);
|
|
// cppcheck-suppress knownConditionTrueFalse
|
|
if (escaped) {
|
|
assert(eloc < length);
|
|
DEBUG_PRINTF("escape found at %zu, clearing repeat\n", eloc);
|
|
clearRepeat(info, lstate);
|
|
}
|
|
}
|
|
|
|
// Rose infix path.
|
|
char JOIN(ENGINE_EXEC_NAME, _QR)(const struct NFA *nfa, struct mq *q,
|
|
ReportID report) {
|
|
assert(nfa && q);
|
|
assert(isLbrType(nfa->type));
|
|
|
|
if (q->cur == q->end) {
|
|
return 1;
|
|
}
|
|
|
|
assert(q->cur + 1 < q->end); /* require at least two items */
|
|
assert(q_cur_type(q) == MQE_START);
|
|
u64a sp = q_cur_offset(q);
|
|
q->cur++;
|
|
DEBUG_PRINTF("sp=%llu\n", sp);
|
|
|
|
const struct lbr_common *l = getImplNfa(nfa);
|
|
const struct RepeatInfo *info = getRepeatInfo(l);
|
|
struct lbr_state *lstate = (struct lbr_state *)q->state;
|
|
assert(ISALIGNED(lstate));
|
|
const s64a lastLoc = q_last_loc(q);
|
|
|
|
while (q->cur < q->end) {
|
|
DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q),
|
|
q_cur_offset(q));
|
|
|
|
if (repeatIsDead(info, lstate)) {
|
|
DEBUG_PRINTF("repeat is dead\n");
|
|
goto scan_done;
|
|
}
|
|
|
|
u64a ep = q_cur_offset(q);
|
|
|
|
if (sp < q->offset) {
|
|
DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
|
|
assert(q->offset - sp <= q->hlength);
|
|
u64a local_ep = MIN(q->offset, ep);
|
|
const u8 *ptr = q->history + q->hlength + sp - q->offset;
|
|
JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, local_ep - sp);
|
|
sp = local_ep;
|
|
}
|
|
|
|
if (repeatIsDead(info, lstate)) {
|
|
DEBUG_PRINTF("repeat is dead\n");
|
|
goto scan_done;
|
|
}
|
|
|
|
if (sp < ep) {
|
|
DEBUG_PRINTF("MAIN BUFFER SCAN\n");
|
|
assert(ep - q->offset <= q->length);
|
|
const u8 *ptr = q->buffer + sp - q->offset;
|
|
JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, ep - sp);
|
|
}
|
|
|
|
if (repeatIsDead(info, lstate)) {
|
|
scan_done:
|
|
if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, lastLoc)) {
|
|
assert(repeatIsDead(info, lstate));
|
|
assert(q->cur == q->end);
|
|
return 0;
|
|
}
|
|
} else {
|
|
switch (q_cur_type(q)) {
|
|
case MQE_TOP:
|
|
case MQE_TOP_FIRST:
|
|
lbrTop(l, lstate, q->streamState, q_cur_offset(q));
|
|
break;
|
|
case MQE_START:
|
|
case MQE_END:
|
|
break;
|
|
default:
|
|
DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q));
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
sp = q_cur_offset(q);
|
|
q->cur++;
|
|
}
|
|
|
|
if (repeatIsDead(info, lstate)) {
|
|
DEBUG_PRINTF("repeat is dead\n");
|
|
return 0;
|
|
}
|
|
|
|
if (lbrInAccept(l, lstate, q->streamState, sp, report)) {
|
|
return MO_MATCHES_PENDING;
|
|
}
|
|
|
|
return lbrIsActive(l, lstate, q->streamState, sp);
|
|
}
|
|
|
|
#undef ENGINE_EXEC_NAME
|
|
#undef EXEC_FN
|
|
#undef FWDSCAN_FN
|
|
#undef REVSCAN_FN
|
|
#undef ENGINE_ROOT_NAME
|