vectorscan/src/runtime.c

1240 lines
38 KiB
C

/*
* Copyright (c) 2015-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime functions.
*/
#include <stdlib.h>
#include <string.h>
#include "allocator.h"
#include "hs_compile.h" /* for HS_MODE_* flags */
#include "hs_runtime.h"
#include "hs_internal.h"
#include "hwlm/hwlm.h"
#include "nfa/mcclellan.h"
#include "nfa/nfa_api.h"
#include "nfa/nfa_api_util.h"
#include "nfa/nfa_internal.h"
#include "nfa/nfa_rev_api.h"
#include "nfa/sheng.h"
#include "smallwrite/smallwrite_internal.h"
#include "rose/rose.h"
#include "rose/runtime.h"
#include "database.h"
#include "report.h"
#include "scratch.h"
#include "som/som_runtime.h"
#include "som/som_stream.h"
#include "state.h"
#include "stream_compress.h"
#include "ue2common.h"
#include "util/exhaust.h"
#include "util/multibit.h"
static really_inline
void prefetch_data(const char *data, unsigned length) {
__builtin_prefetch(data);
__builtin_prefetch(data + length/2);
__builtin_prefetch(data + length - 24);
}
/** dummy event handler for use when user does not provide one */
static
int HS_CDECL null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
UNUSED unsigned long long to, UNUSED unsigned flags,
UNUSED void *ctxt) {
return 0;
}
static really_inline
u32 getHistoryAmount(const struct RoseEngine *t, u64a offset) {
return MIN(t->historyRequired, offset);
}
static really_inline
u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) {
return (u8 *)state + t->stateOffsets.history + t->historyRequired
- MIN(t->historyRequired, offset);
}
/** \brief Sanity checks for scratch space.
*
* Although more at home in scratch.c, it is located here to be closer to its
* callers.
*/
static really_inline
char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) {
if (!ISALIGNED_CL(s)) {
DEBUG_PRINTF("bad alignment %p\n", s);
return 0;
}
if (s->magic != SCRATCH_MAGIC) {
DEBUG_PRINTF("bad magic 0x%x\n", s->magic);
return 0;
}
if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) {
DEBUG_PRINTF("bad state size\n");
return 0;
}
if (t->queueCount > s->queueCount) {
DEBUG_PRINTF("bad queue count\n");
return 0;
}
/* TODO: add quick rose sanity checks */
return 1;
}
static really_inline
void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
char *state, match_event_handler onEvent, void *userCtx,
const char *data, size_t length, const u8 *history,
size_t hlen, u64a offset, u8 status,
UNUSED unsigned int flags) {
assert(rose);
s->core_info.userContext = userCtx;
s->core_info.userCallback = onEvent ? onEvent : null_onEvent;
s->core_info.rose = rose;
s->core_info.state = state; /* required for chained queues + evec */
s->core_info.exhaustionVector = state + rose->stateOffsets.exhausted;
s->core_info.status = status;
s->core_info.buf = (const u8 *)data;
s->core_info.len = length;
s->core_info.hbuf = history;
s->core_info.hlen = hlen;
s->core_info.buf_offset = offset;
/* and some stuff not actually in core info */
s->som_set_now_offset = ~0ULL;
s->deduper.current_report_offset = ~0ULL;
s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
s->fdr_conf = NULL;
// Rose program execution (used for some report paths) depends on these
// values being initialised.
s->tctxt.lastMatchOffset = 0;
s->tctxt.minMatchOffset = offset;
s->tctxt.minNonMpvMatchOffset = offset;
}
#define STATUS_VALID_BITS \
(STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_DELAY_DIRTY)
/** \brief Retrieve status bitmask from stream state. */
static really_inline
u8 getStreamStatus(const char *state) {
u8 status = *(const u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS);
assert((status & ~STATUS_VALID_BITS) == 0);
return status;
}
/** \brief Store status bitmask to stream state. */
static really_inline
void setStreamStatus(char *state, u8 status) {
assert((status & ~STATUS_VALID_BITS) == 0);
*(u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS) = status;
}
/** \brief Initialise SOM state. Used in both block and streaming mode. */
static really_inline
void initSomState(const struct RoseEngine *rose, char *state) {
assert(rose && state);
const u32 somCount = rose->somLocationCount;
mmbit_clear((u8 *)state + rose->stateOffsets.somValid, somCount);
mmbit_clear((u8 *)state + rose->stateOffsets.somWritable, somCount);
}
static really_inline
void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) {
assert(rose);
assert(scratch);
initSomState(rose, scratch->core_info.state);
DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len);
roseBlockExec(rose, scratch);
}
static really_inline
void pureLiteralInitScratch(struct hs_scratch *scratch, u64a offset) {
// Some init has already been done.
assert(offset == scratch->core_info.buf_offset);
scratch->tctxt.lit_offset_adjust = offset + 1;
scratch->tctxt.lastEndOffset = offset;
scratch->tctxt.delayLastEndOffset = offset;
scratch->tctxt.filledDelayedSlots = 0;
scratch->al_log_sum = 0;
}
static really_inline
void pureLiteralBlockExec(const struct RoseEngine *rose,
struct hs_scratch *scratch) {
assert(rose);
assert(scratch);
const struct HWLM *ftable = getFLiteralMatcher(rose);
initSomState(rose, scratch->core_info.state);
const u8 *buffer = scratch->core_info.buf;
size_t length = scratch->core_info.len;
DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl);
pureLiteralInitScratch(scratch, 0);
scratch->tctxt.groups = rose->initialGroups;
hwlmExec(ftable, buffer, length, 0, roseCallback, scratch,
rose->initialGroups & rose->floating_group_mask);
}
static really_inline
void initOutfixQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
struct hs_scratch *scratch) {
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
q->nfa = getNfaByInfo(t, info);
q->end = 0;
q->cur = 0;
q->state = scratch->fullState + info->fullStateOffset;
q->streamState = (char *)scratch->core_info.state + info->stateOffset;
q->offset = scratch->core_info.buf_offset;
q->buffer = scratch->core_info.buf;
q->length = scratch->core_info.len;
q->history = scratch->core_info.hbuf;
q->hlength = scratch->core_info.hlen;
q->cb = roseReportAdaptor;
q->context = scratch;
q->report_current = 0;
DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
"state=%u\n", qi, q->offset, info->fullStateOffset,
info->stateOffset, *(u32 *)q->state);
}
static never_inline
void soleOutfixBlockExec(const struct RoseEngine *t,
struct hs_scratch *scratch) {
assert(t);
assert(scratch);
initSomState(t, scratch->core_info.state);
assert(t->outfixEndQueue == 1);
assert(!t->amatcherOffset);
assert(!t->ematcherOffset);
assert(!t->fmatcherOffset);
const struct NFA *nfa = getNfaByQueue(t, 0);
size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
scratch->core_info.len);
if (!len) {
return;
}
struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
q->length = len; /* adjust for rev_accel */
nfaQueueInitState(nfa, q);
pushQueueAt(q, 0, MQE_START, 0);
pushQueueAt(q, 1, MQE_TOP, 0);
pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
char rv = nfaQueueExec(q->nfa, q, scratch->core_info.len);
if (rv && nfaAcceptsEod(nfa) && len == scratch->core_info.len) {
nfaCheckFinalState(nfa, q->state, q->streamState, q->length, q->cb,
scratch);
}
}
static rose_inline
void runSmallWriteEngine(const struct SmallWriteEngine *smwr,
struct hs_scratch *scratch) {
assert(smwr);
assert(scratch);
const u8 *buffer = scratch->core_info.buf;
size_t length = scratch->core_info.len;
DEBUG_PRINTF("USING SMALL WRITE\n");
if (length <= smwr->start_offset) {
DEBUG_PRINTF("too short\n");
return;
}
const struct NFA *nfa = getSmwrNfa(smwr);
size_t local_alen = length - smwr->start_offset;
const u8 *local_buffer = buffer + smwr->start_offset;
assert(isDfaType(nfa->type));
if (nfa->type == MCCLELLAN_NFA_8) {
nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer,
local_alen, roseReportAdaptor, scratch);
} else if (nfa->type == MCCLELLAN_NFA_16) {
nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer,
local_alen, roseReportAdaptor, scratch);
} else {
nfaExecSheng_B(nfa, smwr->start_offset, local_buffer,
local_alen, roseReportAdaptor, scratch);
}
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
unsigned length, unsigned flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *userCtx) {
if (unlikely(!scratch || !data)) {
return HS_INVALID;
}
hs_error_t err = validDatabase(db);
if (unlikely(err != HS_SUCCESS)) {
return err;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (unlikely(!ISALIGNED_16(rose))) {
return HS_INVALID;
}
if (unlikely(rose->mode != HS_MODE_BLOCK)) {
return HS_DB_MODE_ERROR;
}
if (unlikely(!validScratch(rose, scratch))) {
return HS_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
if (rose->minWidth > length) {
DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length);
unmarkScratchInUse(scratch);
return HS_SUCCESS;
}
prefetch_data(data, length);
/* populate core info in scratch */
populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data,
length, NULL, 0, 0, 0, flags);
clearEvec(rose, scratch->core_info.exhaustionVector);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = scratch->bstate +
rose->stateOffsets.logicalVec;
scratch->core_info.combVector = scratch->bstate +
rose->stateOffsets.combVec;
scratch->tctxt.lastCombMatchOffset = 0;
clearLvec(rose, scratch->core_info.logicalVector,
scratch->core_info.combVector);
}
if (!length) {
if (rose->boundary.reportZeroEodOffset) {
roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0,
scratch);
}
goto set_retval;
}
if (rose->boundary.reportZeroOffset) {
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
goto set_retval;
}
}
if (rose->minWidthExcludingBoundaries > length) {
DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n",
rose->minWidthExcludingBoundaries, length);
goto done_scan;
}
// Similarly, we may have a maximum width (for engines constructed entirely
// of bi-anchored patterns).
if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF
&& length > rose->maxBiAnchoredWidth) {
DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length,
rose->maxBiAnchoredWidth);
goto done_scan;
}
// Is this a small write case?
if (rose->smallWriteOffset) {
const struct SmallWriteEngine *smwr = getSmallWrite(rose);
assert(smwr);
// Apply the small write engine if and only if the block (buffer) is
// small enough. Otherwise, we allow rose &co to deal with it.
if (length < smwr->largestBuffer) {
DEBUG_PRINTF("Attempting small write of block %u bytes long.\n",
length);
runSmallWriteEngine(smwr, scratch);
goto done_scan;
}
}
switch (rose->runtimeImpl) {
default:
assert(0);
case ROSE_RUNTIME_FULL_ROSE:
rawBlockExec(rose, scratch);
break;
case ROSE_RUNTIME_PURE_LITERAL:
pureLiteralBlockExec(rose, scratch);
break;
case ROSE_RUNTIME_SINGLE_OUTFIX:
soleOutfixBlockExec(rose, scratch);
break;
}
done_scan:
if (told_to_stop_matching(scratch)) {
unmarkScratchInUse(scratch);
return HS_SCAN_TERMINATED;
}
if (rose->hasSom) {
int halt = flushStoredSomMatches(scratch, ~0ULL);
if (halt) {
unmarkScratchInUse(scratch);
return HS_SCAN_TERMINATED;
}
}
if (rose->boundary.reportEodOffset) {
roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length,
scratch);
}
if (rose->flushCombProgramOffset) {
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
unmarkScratchInUse(scratch);
return HS_SCAN_TERMINATED;
}
}
set_retval:
DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
told_to_stop_matching(scratch));
hs_error_t rv = told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED
: HS_SUCCESS;
unmarkScratchInUse(scratch);
return rv;
}
static really_inline
void maintainHistoryBuffer(const struct RoseEngine *rose, char *state,
const char *buffer, size_t length) {
if (!rose->historyRequired) {
return;
}
// Hopefully few of our users are scanning no data.
if (unlikely(length == 0)) {
DEBUG_PRINTF("zero-byte scan\n");
return;
}
char *his_state = state + rose->stateOffsets.history;
if (length < rose->historyRequired) {
size_t shortfall = rose->historyRequired - length;
memmove(his_state, his_state + rose->historyRequired - shortfall,
shortfall);
}
size_t amount = MIN(rose->historyRequired, length);
memcpy(his_state + rose->historyRequired - amount, buffer + length - amount,
amount);
#ifdef DEBUG_HISTORY
printf("History [%u] : ", rose->historyRequired);
for (size_t i = 0; i < rose->historyRequired; i++) {
printf(" %02hhx", his_state[i]);
}
printf("\n");
#endif
}
static really_inline
void init_stream(struct hs_stream *s, const struct RoseEngine *rose,
char init_history) {
char *state = getMultiState(s);
if (init_history) {
// Make absolutely sure that the 16 bytes leading up to the end of the
// history buffer are initialised, as we rely on this (regardless of the
// actual values used) in FDR.
char *hist_end =
state + rose->stateOffsets.history + rose->historyRequired;
assert(hist_end - 16 >= (const char *)s);
memset(hist_end - 16, 0x5a, 16);
}
s->rose = rose;
s->offset = 0;
setStreamStatus(state, 0);
roseInitState(rose, state);
clearEvec(rose, state + rose->stateOffsets.exhausted);
if (rose->ckeyCount) {
clearLvec(rose, state + rose->stateOffsets.logicalVec,
state + rose->stateOffsets.combVec);
}
// SOM state multibit structures.
initSomState(rose, state);
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db,
UNUSED unsigned flags,
hs_stream_t **stream) {
if (unlikely(!stream)) {
return HS_INVALID;
}
*stream = NULL;
hs_error_t err = validDatabase(db);
if (unlikely(err != HS_SUCCESS)) {
return err;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (unlikely(!ISALIGNED_16(rose))) {
return HS_INVALID;
}
if (unlikely(rose->mode != HS_MODE_STREAM)) {
return HS_DB_MODE_ERROR;
}
size_t stateSize = rose->stateOffsets.end;
struct hs_stream *s = hs_stream_alloc(sizeof(struct hs_stream) + stateSize);
if (unlikely(!s)) {
return HS_NOMEM;
}
init_stream(s, rose, 1);
*stream = s;
return HS_SUCCESS;
}
static really_inline
void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
const struct RoseEngine *rose = id->rose;
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("stream already broken\n");
return;
}
if (isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
DEBUG_PRINTF("stream exhausted\n");
return;
}
roseStreamEodExec(rose, id->offset, scratch);
}
static never_inline
void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
const struct RoseEngine *t = id->rose;
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("stream already broken\n");
return;
}
if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
DEBUG_PRINTF("stream exhausted\n");
return;
}
assert(t->outfixEndQueue == 1);
assert(!t->amatcherOffset);
assert(!t->ematcherOffset);
assert(!t->fmatcherOffset);
const struct NFA *nfa = getNfaByQueue(t, 0);
struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
if (!scratch->core_info.buf_offset) {
DEBUG_PRINTF("buf_offset is zero\n");
return; /* no vacuous engines */
}
nfaExpandState(nfa, q->state, q->streamState, q->offset,
queue_prev_byte(q, 0));
assert(nfaAcceptsEod(nfa));
nfaCheckFinalState(nfa, q->state, q->streamState, q->offset, q->cb,
scratch);
}
static really_inline
void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
DEBUG_PRINTF("--- report eod matches at offset %llu\n", id->offset);
assert(onEvent);
const struct RoseEngine *rose = id->rose;
char *state = getMultiState(id);
u8 status = getStreamStatus(state);
if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED)) {
DEBUG_PRINTF("stream is broken, just freeing storage\n");
return;
}
populateCoreInfo(scratch, rose, state, onEvent, context, NULL, 0,
getHistory(state, rose, id->offset),
getHistoryAmount(rose, id->offset), id->offset, status, 0);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = state +
rose->stateOffsets.logicalVec;
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
scratch->tctxt.lastCombMatchOffset = id->offset;
}
if (rose->somLocationCount) {
loadSomFromStream(scratch, id->offset);
}
if (!id->offset) {
if (rose->boundary.reportZeroEodOffset) {
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportZeroEodOffset, 0, scratch);
if (rv == MO_HALT_MATCHING) {
return;
}
}
} else {
if (rose->boundary.reportEodOffset) {
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportEodOffset, id->offset, scratch);
if (rv == MO_HALT_MATCHING) {
return;
}
}
if (rose->requiresEodCheck) {
switch (rose->runtimeImpl) {
default:
case ROSE_RUNTIME_PURE_LITERAL:
assert(0);
case ROSE_RUNTIME_FULL_ROSE:
rawEodExec(id, scratch);
break;
case ROSE_RUNTIME_SINGLE_OUTFIX:
soleOutfixEodExec(id, scratch);
break;
}
}
}
if (rose->hasSom && !told_to_stop_matching(scratch)) {
int halt = flushStoredSomMatches(scratch, ~0ULL);
if (halt) {
DEBUG_PRINTF("told to stop matching\n");
scratch->core_info.status |= STATUS_TERMINATED;
}
}
if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
DEBUG_PRINTF("told to stop matching\n");
scratch->core_info.status |= STATUS_TERMINATED;
}
}
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id,
const hs_stream_t *from_id) {
if (!to_id) {
return HS_INVALID;
}
*to_id = NULL;
if (!from_id || !from_id->rose) {
return HS_INVALID;
}
const struct RoseEngine *rose = from_id->rose;
size_t stateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
struct hs_stream *s = hs_stream_alloc(stateSize);
if (!s) {
return HS_NOMEM;
}
memcpy(s, from_id, stateSize);
*to_id = s;
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
const hs_stream_t *from_id,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
if (!from_id || !from_id->rose) {
return HS_INVALID;
}
if (!to_id || to_id->rose != from_id->rose) {
return HS_INVALID;
}
if (to_id == from_id) {
return HS_INVALID;
}
if (onEvent) {
if (!scratch || !validScratch(to_id->rose, scratch)) {
return HS_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
report_eod_matches(to_id, scratch, onEvent, context);
unmarkScratchInUse(scratch);
}
size_t stateSize
= sizeof(struct hs_stream) + from_id->rose->stateOffsets.end;
memcpy(to_id, from_id, stateSize);
return HS_SUCCESS;
}
static really_inline
void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) {
assert(stream_state);
assert(scratch);
assert(!can_stop_matching(scratch));
DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
stream_state->offset, scratch->core_info.len);
const struct RoseEngine *rose = stream_state->rose;
assert(rose);
roseStreamExec(rose, scratch);
if (!told_to_stop_matching(scratch) &&
isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
DEBUG_PRINTF("stream exhausted\n");
scratch->core_info.status |= STATUS_EXHAUSTED;
}
}
static really_inline
void pureLiteralStreamExec(struct hs_stream *stream_state,
struct hs_scratch *scratch) {
assert(stream_state);
assert(scratch);
assert(!can_stop_matching(scratch));
const struct RoseEngine *rose = stream_state->rose;
const struct HWLM *ftable = getFLiteralMatcher(rose);
size_t len2 = scratch->core_info.len;
DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
stream_state->offset, scratch->core_info.len);
pureLiteralInitScratch(scratch, stream_state->offset);
scratch->tctxt.groups = loadGroups(rose, scratch->core_info.state);
// Pure literal cases don't have floatingMinDistance set, so we always
// start the match region at zero.
const size_t start = 0;
hwlmExecStreaming(ftable, len2, start, roseCallback, scratch,
rose->initialGroups & rose->floating_group_mask);
if (!told_to_stop_matching(scratch) &&
isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
DEBUG_PRINTF("stream exhausted\n");
scratch->core_info.status |= STATUS_EXHAUSTED;
}
}
static never_inline
void soleOutfixStreamExec(struct hs_stream *stream_state,
struct hs_scratch *scratch) {
assert(stream_state);
assert(scratch);
assert(!can_stop_matching(scratch));
const struct RoseEngine *t = stream_state->rose;
assert(t->outfixEndQueue == 1);
assert(!t->amatcherOffset);
assert(!t->ematcherOffset);
assert(!t->fmatcherOffset);
const struct NFA *nfa = getNfaByQueue(t, 0);
struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
if (!scratch->core_info.buf_offset) {
nfaQueueInitState(nfa, q);
pushQueueAt(q, 0, MQE_START, 0);
pushQueueAt(q, 1, MQE_TOP, 0);
pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
} else {
nfaExpandState(nfa, q->state, q->streamState, q->offset,
queue_prev_byte(q, 0));
pushQueueAt(q, 0, MQE_START, 0);
pushQueueAt(q, 1, MQE_END, scratch->core_info.len);
}
if (nfaQueueExec(q->nfa, q, scratch->core_info.len)) {
nfaQueueCompressState(nfa, q, scratch->core_info.len);
} else if (!told_to_stop_matching(scratch)) {
scratch->core_info.status |= STATUS_EXHAUSTED;
}
}
static inline
hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
unsigned length, UNUSED unsigned flags,
hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
assert(id);
assert(scratch);
if (unlikely(!data)) {
return HS_INVALID;
}
const struct RoseEngine *rose = id->rose;
char *state = getMultiState(id);
u8 status = getStreamStatus(state);
if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED)) {
DEBUG_PRINTF("stream is broken, halting scan\n");
if (status & STATUS_TERMINATED) {
return HS_SCAN_TERMINATED;
} else {
return HS_SUCCESS;
}
}
// We avoid doing any work if the user has given us zero bytes of data to
// scan. Arguably we should define some semantics for how we treat vacuous
// cases here.
if (unlikely(length == 0)) {
DEBUG_PRINTF("zero length block\n");
return HS_SUCCESS;
}
u32 historyAmount = getHistoryAmount(rose, id->offset);
populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
getHistory(state, rose, id->offset), historyAmount,
id->offset, status, flags);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = state +
rose->stateOffsets.logicalVec;
scratch->core_info.combVector = state + rose->stateOffsets.combVec;
scratch->tctxt.lastCombMatchOffset = id->offset;
}
assert(scratch->core_info.hlen <= id->offset
&& scratch->core_info.hlen <= rose->historyRequired);
prefetch_data(data, length);
if (rose->somLocationCount) {
loadSomFromStream(scratch, id->offset);
}
if (!id->offset && rose->boundary.reportZeroOffset) {
DEBUG_PRINTF("zero reports\n");
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
DEBUG_PRINTF("halting scan\n");
setStreamStatus(state, scratch->core_info.status);
if (told_to_stop_matching(scratch)) {
return HS_SCAN_TERMINATED;
} else {
assert(scratch->core_info.status & STATUS_EXHAUSTED);
return HS_SUCCESS;
}
}
}
switch (rose->runtimeImpl) {
default:
assert(0);
case ROSE_RUNTIME_FULL_ROSE:
rawStreamExec(id, scratch);
break;
case ROSE_RUNTIME_PURE_LITERAL:
pureLiteralStreamExec(id, scratch);
break;
case ROSE_RUNTIME_SINGLE_OUTFIX:
soleOutfixStreamExec(id, scratch);
}
if (rose->hasSom && !told_to_stop_matching(scratch)) {
int halt = flushStoredSomMatches(scratch, ~0ULL);
if (halt) {
scratch->core_info.status |= STATUS_TERMINATED;
}
}
if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) {
scratch->core_info.status |= STATUS_TERMINATED;
}
}
setStreamStatus(state, scratch->core_info.status);
if (likely(!can_stop_matching(scratch))) {
maintainHistoryBuffer(rose, state, data, length);
id->offset += length; /* maintain offset */
if (rose->somLocationCount) {
storeSomToStream(scratch, id->offset);
}
} else if (told_to_stop_matching(scratch)) {
return HS_SCAN_TERMINATED;
}
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
unsigned length, unsigned flags,
hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
if (unlikely(!id || !scratch || !data ||
!validScratch(id->rose, scratch))) {
return HS_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
hs_error_t rv = hs_scan_stream_internal(id, data, length, flags, scratch,
onEvent, context);
unmarkScratchInUse(scratch);
return rv;
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
if (!id) {
return HS_INVALID;
}
if (onEvent) {
if (!scratch || !validScratch(id->rose, scratch)) {
return HS_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
report_eod_matches(id, scratch, onEvent, context);
unmarkScratchInUse(scratch);
}
hs_stream_free(id);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
if (!id) {
return HS_INVALID;
}
if (onEvent) {
if (!scratch || !validScratch(id->rose, scratch)) {
return HS_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
report_eod_matches(id, scratch, onEvent, context);
unmarkScratchInUse(scratch);
}
// history already initialised
init_stream(id, id->rose, 0);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_stream_size(const hs_database_t *db,
size_t *stream_size) {
if (!stream_size) {
return HS_INVALID;
}
hs_error_t ret = validDatabase(db);
if (ret != HS_SUCCESS) {
return ret;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (!ISALIGNED_16(rose)) {
return HS_INVALID;
}
if (rose->mode != HS_MODE_STREAM) {
return HS_DB_MODE_ERROR;
}
u32 base_stream_size = rose->stateOffsets.end;
// stream state plus the hs_stream struct itself
*stream_size = base_stream_size + sizeof(struct hs_stream);
return HS_SUCCESS;
}
#if defined(DEBUG) || defined(DUMP_SUPPORT)
#include "util/compare.h"
// A debugging crutch: print a hex-escaped version of the match for our
// perusal.
static UNUSED
void dumpData(const char *data, size_t len) {
DEBUG_PRINTF("BUFFER:");
for (size_t i = 0; i < len; i++) {
u8 c = data[i];
if (ourisprint(c) && c != '\'') {
printf("%c", c);
} else {
printf("\\x%02x", c);
}
}
printf("\n");
}
#endif
HS_PUBLIC_API
hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
const char * const * data,
const unsigned int *length,
unsigned int count,
UNUSED unsigned int flags,
hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
if (unlikely(!scratch || !data || !length)) {
return HS_INVALID;
}
hs_error_t err = validDatabase(db);
if (unlikely(err != HS_SUCCESS)) {
return err;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (unlikely(!ISALIGNED_16(rose))) {
return HS_INVALID;
}
if (unlikely(rose->mode != HS_MODE_VECTORED)) {
return HS_DB_MODE_ERROR;
}
if (unlikely(!validScratch(rose, scratch))) {
return HS_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
hs_stream_t *id = (hs_stream_t *)(scratch->bstate);
init_stream(id, rose, 1); /* open stream */
for (u32 i = 0; i < count; i++) {
DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset,
length[i]);
#ifdef DEBUG
dumpData(data[i], length[i]);
#endif
hs_error_t ret
= hs_scan_stream_internal(id, data[i], length[i], 0, scratch,
onEvent, context);
if (ret != HS_SUCCESS) {
unmarkScratchInUse(scratch);
return ret;
}
}
/* close stream */
if (onEvent) {
report_eod_matches(id, scratch, onEvent, context);
if (told_to_stop_matching(scratch)) {
unmarkScratchInUse(scratch);
return HS_SCAN_TERMINATED;
}
}
unmarkScratchInUse(scratch);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf,
size_t buf_space, size_t *used_space) {
if (unlikely(!stream || !used_space)) {
return HS_INVALID;
}
if (unlikely(buf_space && !buf)) {
return HS_INVALID;
}
const struct RoseEngine *rose = stream->rose;
size_t stream_size = size_compress_stream(rose, stream);
DEBUG_PRINTF("require %zu [orig %zu]\n", stream_size,
rose->stateOffsets.end + sizeof(struct hs_stream));
*used_space = stream_size;
if (buf_space < stream_size) {
return HS_INSUFFICIENT_SPACE;
}
compress_stream(buf, stream_size, rose, stream);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_expand_stream(const hs_database_t *db,
hs_stream_t **stream,
const char *buf, size_t buf_size) {
if (unlikely(!stream || !buf)) {
return HS_INVALID;
}
*stream = NULL;
hs_error_t err = validDatabase(db);
if (unlikely(err != HS_SUCCESS)) {
return err;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (unlikely(!ISALIGNED_16(rose))) {
return HS_INVALID;
}
if (unlikely(rose->mode != HS_MODE_STREAM)) {
return HS_DB_MODE_ERROR;
}
size_t stream_size = rose->stateOffsets.end + sizeof(struct hs_stream);
struct hs_stream *s = hs_stream_alloc(stream_size);
if (unlikely(!s)) {
return HS_NOMEM;
}
if (!expand_stream(s, rose, buf, buf_size)) {
hs_stream_free(s);
return HS_INVALID;
}
*stream = s;
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_reset_and_expand_stream(hs_stream_t *to_stream,
const char *buf, size_t buf_size,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
if (unlikely(!to_stream || !buf)) {
return HS_INVALID;
}
const struct RoseEngine *rose = to_stream->rose;
if (onEvent) {
if (!scratch || !validScratch(to_stream->rose, scratch)) {
return HS_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
report_eod_matches(to_stream, scratch, onEvent, context);
unmarkScratchInUse(scratch);
}
if (expand_stream(to_stream, rose, buf, buf_size)) {
return HS_SUCCESS;
} else {
return HS_INVALID;
}
}