mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
384 lines
13 KiB
C
384 lines
13 KiB
C
/*
|
|
* Copyright (c) 2015-2018, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#ifndef ROSE_MATCH_H
|
|
#define ROSE_MATCH_H
|
|
|
|
#include "catchup.h"
|
|
#include "runtime.h"
|
|
#include "scratch.h"
|
|
#include "report.h"
|
|
#include "rose_common.h"
|
|
#include "rose_internal.h"
|
|
#include "ue2common.h"
|
|
#include "hwlm/hwlm.h"
|
|
#include "nfa/nfa_api.h"
|
|
#include "nfa/nfa_api_queue.h"
|
|
#include "nfa/nfa_api_util.h"
|
|
#include "som/som_runtime.h"
|
|
#include "util/bitutils.h"
|
|
#include "util/exhaust.h"
|
|
#include "util/fatbit.h"
|
|
#include "util/multibit.h"
|
|
|
|
/* Callbacks, defined in catchup.c */
|
|
|
|
int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context);
|
|
|
|
/* Callbacks, defined in match.c */
|
|
|
|
hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch);
|
|
hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id,
|
|
struct hs_scratch *scratch);
|
|
hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id,
|
|
struct hs_scratch *scratch);
|
|
int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx);
|
|
|
|
/* Common code, used all over Rose runtime */
|
|
|
|
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
|
|
struct hs_scratch *scratch, u32 event,
|
|
u64a top_squash_distance, u64a end,
|
|
char in_catchup);
|
|
|
|
/** \brief Initialize the queue for a suffix/outfix engine. */
|
|
static really_inline
|
|
void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
|
|
struct hs_scratch *scratch) {
|
|
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
|
assert(scratch->fullState);
|
|
q->nfa = getNfaByInfo(t, info);
|
|
q->end = 0;
|
|
q->cur = 0;
|
|
q->state = scratch->fullState + info->fullStateOffset;
|
|
q->streamState = scratch->core_info.state + info->stateOffset;
|
|
q->offset = scratch->core_info.buf_offset;
|
|
q->buffer = scratch->core_info.buf;
|
|
q->length = scratch->core_info.len;
|
|
q->history = scratch->core_info.hbuf;
|
|
q->hlength = scratch->core_info.hlen;
|
|
q->cb = roseNfaAdaptor;
|
|
q->context = scratch;
|
|
q->report_current = 0;
|
|
|
|
DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
|
|
"state=%u\n", qi, q->offset, info->fullStateOffset,
|
|
info->stateOffset, *(u32 *)q->state);
|
|
}
|
|
|
|
/** \brief Initialize the queue for a leftfix (prefix/infix) engine. */
|
|
static really_inline
|
|
void initRoseQueue(const struct RoseEngine *t, u32 qi,
|
|
const struct LeftNfaInfo *left,
|
|
struct hs_scratch *scratch) {
|
|
struct mq *q = scratch->queues + qi;
|
|
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
|
q->nfa = getNfaByInfo(t, info);
|
|
q->end = 0;
|
|
q->cur = 0;
|
|
q->state = scratch->fullState + info->fullStateOffset;
|
|
|
|
// Transient roses don't have stream state, we use tstate in scratch
|
|
// instead. The only reason we need this at ALL is for LimEx extended
|
|
// regions, which assume that they have access to q->streamState +
|
|
// compressedStateSize.
|
|
if (left->transient) {
|
|
q->streamState = (char *)scratch->tstate + info->stateOffset;
|
|
} else {
|
|
q->streamState = scratch->core_info.state + info->stateOffset;
|
|
}
|
|
|
|
q->offset = scratch->core_info.buf_offset;
|
|
q->buffer = scratch->core_info.buf;
|
|
q->length = scratch->core_info.len;
|
|
q->history = scratch->core_info.hbuf;
|
|
q->hlength = scratch->core_info.hlen;
|
|
q->cb = NULL;
|
|
q->context = NULL;
|
|
q->report_current = 0;
|
|
|
|
DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
|
|
"state=%u\n", qi, q->offset, info->fullStateOffset,
|
|
info->stateOffset, *(u32 *)q->state);
|
|
}
|
|
|
|
/** returns 0 if space for two items (top and end) on the queue */
|
|
static really_inline
|
|
char isQueueFull(const struct mq *q) {
|
|
return q->end + 2 > MAX_MQE_LEN;
|
|
}
|
|
|
|
static really_inline
|
|
void loadStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
|
|
DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
|
|
q->offset, q->length, q->hlength, loc);
|
|
nfaExpandState(nfa, q->state, q->streamState, q->offset + loc,
|
|
queue_prev_byte(q, loc));
|
|
}
|
|
|
|
static really_inline
|
|
void storeRoseDelay(const struct RoseEngine *t, char *state,
|
|
const struct LeftNfaInfo *left, u32 loc) {
|
|
u32 di = left->lagIndex;
|
|
if (di == ROSE_OFFSET_INVALID) {
|
|
return;
|
|
}
|
|
|
|
assert(loc < 256); // ONE WHOLE BYTE!
|
|
DEBUG_PRINTF("storing rose delay %u in slot %u\n", loc, di);
|
|
u8 *leftfixDelay = getLeftfixLagTable(t, state);
|
|
assert(loc <= MAX_STORED_LEFTFIX_LAG);
|
|
leftfixDelay[di] = loc;
|
|
}
|
|
|
|
static really_inline
|
|
void setAsZombie(const struct RoseEngine *t, char *state,
|
|
const struct LeftNfaInfo *left) {
|
|
u32 di = left->lagIndex;
|
|
assert(di != ROSE_OFFSET_INVALID);
|
|
if (di == ROSE_OFFSET_INVALID) {
|
|
return;
|
|
}
|
|
|
|
u8 *leftfixDelay = getLeftfixLagTable(t, state);
|
|
leftfixDelay[di] = OWB_ZOMBIE_ALWAYS_YES;
|
|
}
|
|
|
|
/* loadRoseDelay MUST NOT be called on the first stream write as it is only
|
|
* initialized for running nfas on stream boundaries */
|
|
static really_inline
|
|
u32 loadRoseDelay(const struct RoseEngine *t, const char *state,
|
|
const struct LeftNfaInfo *left) {
|
|
u32 di = left->lagIndex;
|
|
if (di == ROSE_OFFSET_INVALID) {
|
|
return 0;
|
|
}
|
|
|
|
const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
|
|
u32 loc = leftfixDelay[di];
|
|
DEBUG_PRINTF("read rose delay %u from slot %u\n", loc, di);
|
|
return loc;
|
|
}
|
|
|
|
static really_inline
|
|
char isZombie(const struct RoseEngine *t, const char *state,
|
|
const struct LeftNfaInfo *left) {
|
|
u32 di = left->lagIndex;
|
|
assert(di != ROSE_OFFSET_INVALID);
|
|
if (di == ROSE_OFFSET_INVALID) {
|
|
return 0;
|
|
}
|
|
|
|
const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
|
|
DEBUG_PRINTF("read owb %hhu from slot %u\n", leftfixDelay[di], di);
|
|
return leftfixDelay[di] == OWB_ZOMBIE_ALWAYS_YES;
|
|
}
|
|
|
|
hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t,
|
|
struct hs_scratch *scratch, u64a end);
|
|
|
|
static really_inline
|
|
hwlmcb_rv_t flushQueuedLiterals(const struct RoseEngine *t,
|
|
struct hs_scratch *scratch, u64a end) {
|
|
struct RoseContext *tctxt = &scratch->tctxt;
|
|
|
|
if (tctxt->delayLastEndOffset == end) {
|
|
DEBUG_PRINTF("no progress, no flush\n");
|
|
return HWLM_CONTINUE_MATCHING;
|
|
}
|
|
|
|
if (!tctxt->filledDelayedSlots && !scratch->al_log_sum) {
|
|
tctxt->delayLastEndOffset = end;
|
|
return HWLM_CONTINUE_MATCHING;
|
|
}
|
|
|
|
return flushQueuedLiterals_i(t, scratch, end);
|
|
}
|
|
|
|
static really_inline
|
|
hwlmcb_rv_t cleanUpDelayed(const struct RoseEngine *t,
|
|
struct hs_scratch *scratch, size_t length,
|
|
u64a offset) {
|
|
if (can_stop_matching(scratch)) {
|
|
return HWLM_TERMINATE_MATCHING;
|
|
}
|
|
|
|
if (flushQueuedLiterals(t, scratch, length + offset)
|
|
== HWLM_TERMINATE_MATCHING) {
|
|
return HWLM_TERMINATE_MATCHING;
|
|
}
|
|
|
|
struct RoseContext *tctxt = &scratch->tctxt;
|
|
if (tctxt->filledDelayedSlots) {
|
|
DEBUG_PRINTF("dirty\n");
|
|
scratch->core_info.status |= STATUS_DELAY_DIRTY;
|
|
} else {
|
|
scratch->core_info.status &= ~STATUS_DELAY_DIRTY;
|
|
}
|
|
|
|
tctxt->filledDelayedSlots = 0;
|
|
tctxt->delayLastEndOffset = offset;
|
|
|
|
return HWLM_CONTINUE_MATCHING;
|
|
}
|
|
|
|
static rose_inline
|
|
void roseFlushLastByteHistory(const struct RoseEngine *t,
|
|
struct hs_scratch *scratch, u64a currEnd) {
|
|
if (!t->lastByteHistoryIterOffset) {
|
|
return;
|
|
}
|
|
|
|
const struct RoseContext *tctxt = &scratch->tctxt;
|
|
const struct core_info *ci = &scratch->core_info;
|
|
|
|
/* currEnd is last byte of string + 1 */
|
|
if (tctxt->lastEndOffset == ci->buf_offset + ci->len
|
|
|| currEnd != ci->buf_offset + ci->len) {
|
|
/* already flushed or it is not yet time to flush */
|
|
return;
|
|
}
|
|
|
|
DEBUG_PRINTF("flushing\n");
|
|
|
|
const struct mmbit_sparse_iter *it =
|
|
getByOffset(t, t->lastByteHistoryIterOffset);
|
|
assert(ISALIGNED(it));
|
|
|
|
const u32 numStates = t->rolesWithStateCount;
|
|
void *role_state = getRoleState(scratch->core_info.state);
|
|
|
|
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
|
|
|
|
mmbit_sparse_iter_unset(role_state, numStates, it, si_state);
|
|
}
|
|
|
|
static rose_inline
|
|
int roseHasInFlightMatches(const struct RoseEngine *t, char *state,
|
|
const struct hs_scratch *scratch) {
|
|
if (scratch->al_log_sum) {
|
|
DEBUG_PRINTF("anchored literals in log\n");
|
|
return 1;
|
|
}
|
|
|
|
if (scratch->tctxt.filledDelayedSlots) {
|
|
DEBUG_PRINTF("delayed literal\n");
|
|
return 1;
|
|
}
|
|
|
|
if (mmbit_any(getRoleState(state), t->rolesWithStateCount)) {
|
|
DEBUG_PRINTF("role state is set\n");
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static rose_inline
|
|
hwlmcb_rv_t roseHaltIfExhausted(const struct RoseEngine *t,
|
|
struct hs_scratch *scratch) {
|
|
struct core_info *ci = &scratch->core_info;
|
|
if (isAllExhausted(t, ci->exhaustionVector)) {
|
|
ci->status |= STATUS_EXHAUSTED;
|
|
scratch->tctxt.groups = 0;
|
|
DEBUG_PRINTF("all exhausted, termination requested\n");
|
|
return HWLM_TERMINATE_MATCHING;
|
|
}
|
|
|
|
return HWLM_CONTINUE_MATCHING;
|
|
}
|
|
|
|
static really_inline
|
|
hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t,
|
|
struct hs_scratch *scratch, u32 qi, s64a loc,
|
|
char is_mpv, char in_catchup) {
|
|
struct RoseContext *tctxt = &scratch->tctxt;
|
|
u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
|
|
struct fatbit *activeQueues = scratch->aqa;
|
|
u32 aaCount = t->activeArrayCount;
|
|
u32 qCount = t->queueCount;
|
|
|
|
struct mq *q = &scratch->queues[qi];
|
|
DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n",
|
|
q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset);
|
|
if (q_cur_loc(q) == loc) {
|
|
/* too many tops enqueued at the one spot; need to flatten this queue.
|
|
* We can use the full catchups as it will short circuit as we are
|
|
* already at this location. It also saves waking everybody up */
|
|
pushQueueNoMerge(q, MQE_END, loc);
|
|
nfaQueueExec(q->nfa, q, loc);
|
|
q->cur = q->end = 0;
|
|
pushQueueAt(q, 0, MQE_START, loc);
|
|
} else if (!in_catchup) {
|
|
if (is_mpv) {
|
|
tctxt->next_mpv_offset = 0; /* force us to catch the mpv */
|
|
if (loc + scratch->core_info.buf_offset
|
|
<= tctxt->minNonMpvMatchOffset) {
|
|
DEBUG_PRINTF("flushing chained\n");
|
|
if (roseCatchUpMPV(t, loc, scratch) ==
|
|
HWLM_TERMINATE_MATCHING) {
|
|
return HWLM_TERMINATE_MATCHING;
|
|
}
|
|
goto done_queue_empty;
|
|
}
|
|
}
|
|
|
|
if (roseCatchUpTo(t, scratch, loc + scratch->core_info.buf_offset) ==
|
|
HWLM_TERMINATE_MATCHING) {
|
|
return HWLM_TERMINATE_MATCHING;
|
|
}
|
|
} else {
|
|
/* we must be a chained nfa */
|
|
assert(is_mpv);
|
|
DEBUG_PRINTF("flushing chained\n");
|
|
tctxt->next_mpv_offset = 0; /* force us to catch the mpv */
|
|
if (roseCatchUpMPV(t, loc, scratch) == HWLM_TERMINATE_MATCHING) {
|
|
return HWLM_TERMINATE_MATCHING;
|
|
}
|
|
}
|
|
done_queue_empty:
|
|
if (!mmbit_set(aa, aaCount, qi)) {
|
|
initQueue(q, qi, t, scratch);
|
|
nfaQueueInitState(q->nfa, q);
|
|
pushQueueAt(q, 0, MQE_START, loc);
|
|
fatbit_set(activeQueues, qCount, qi);
|
|
}
|
|
|
|
assert(!isQueueFull(q));
|
|
|
|
return roseHaltIfExhausted(t, scratch);
|
|
}
|
|
|
|
static rose_inline
|
|
hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t,
|
|
struct hs_scratch *scratch, u32 qi, s64a loc) {
|
|
return ensureQueueFlushed_i(t, scratch, qi, loc, 0, 0);
|
|
}
|
|
|
|
#endif
|