vectorscan/src/runtime.c
Justin Viiret 50885f210a exhaust: Update interface
- Only use functions in exhaust.h for valid ekeys
 - Use INVALID_EKEY everywhere (remove dupe END_EXHAUST sentinel)
2016-04-20 13:34:55 +10:00

1133 lines
34 KiB
C

/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime functions.
*/
#include <stdlib.h>
#include <string.h>
#include "allocator.h"
#include "hs_compile.h" /* for HS_MODE_* flags */
#include "hs_runtime.h"
#include "hs_internal.h"
#include "hwlm/hwlm.h"
#include "nfa/mcclellan.h"
#include "nfa/nfa_api.h"
#include "nfa/nfa_api_util.h"
#include "nfa/nfa_internal.h"
#include "nfa/nfa_rev_api.h"
#include "smallwrite/smallwrite_internal.h"
#include "rose/rose.h"
#include "rose/runtime.h"
#include "database.h"
#include "report.h"
#include "scratch.h"
#include "som/som_runtime.h"
#include "som/som_stream.h"
#include "state.h"
#include "ue2common.h"
#include "util/exhaust.h"
#include "util/fatbit.h"
#include "util/multibit.h"
static really_inline
void prefetch_data(const char *data, unsigned length) {
__builtin_prefetch(data);
__builtin_prefetch(data + length/2);
__builtin_prefetch(data + length - 24);
}
/** dummy event handler for use when user does not provide one */
static
int null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
UNUSED unsigned long long to, UNUSED unsigned flags,
UNUSED void *ctxt) {
return 0;
}
static really_inline
u32 getHistoryAmount(const struct RoseEngine *t, u64a offset) {
return MIN(t->historyRequired, offset);
}
static really_inline
u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) {
return (u8 *)state + t->stateOffsets.history + t->historyRequired
- MIN(t->historyRequired, offset);
}
/** \brief Sanity checks for scratch space.
*
* Although more at home in scratch.c, it is located here to be closer to its
* callers.
*/
static really_inline
char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) {
if (!ISALIGNED_CL(s)) {
DEBUG_PRINTF("bad alignment %p\n", s);
return 0;
}
if (s->magic != SCRATCH_MAGIC) {
DEBUG_PRINTF("bad magic 0x%x\n", s->magic);
return 0;
}
if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) {
DEBUG_PRINTF("bad state size\n");
return 0;
}
if (t->queueCount > s->queueCount) {
DEBUG_PRINTF("bad queue count\n");
return 0;
}
/* TODO: add quick rose sanity checks */
return 1;
}
static really_inline
void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
char *state, match_event_handler onEvent, void *userCtx,
const char *data, size_t length, const u8 *history,
size_t hlen, u64a offset, u8 status,
UNUSED unsigned int flags) {
assert(rose);
s->core_info.userContext = userCtx;
s->core_info.userCallback = onEvent ? onEvent : null_onEvent;
s->core_info.rose = rose;
s->core_info.state = state; /* required for chained queues + evec */
s->core_info.exhaustionVector = state + rose->stateOffsets.exhausted;
s->core_info.status = status;
s->core_info.buf = (const u8 *)data;
s->core_info.len = length;
s->core_info.hbuf = history;
s->core_info.hlen = hlen;
s->core_info.buf_offset = offset;
/* and some stuff not actually in core info */
s->som_set_now_offset = ~0ULL;
s->deduper.current_report_offset = ~0ULL;
s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
}
#define STATUS_VALID_BITS \
(STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_DELAY_DIRTY)
/** \brief Retrieve status bitmask from stream state. */
static really_inline
u8 getStreamStatus(const char *state) {
u8 status = *(const u8 *)state;
assert((status & ~STATUS_VALID_BITS) == 0);
return status;
}
/** \brief Store status bitmask to stream state. */
static really_inline
void setStreamStatus(char *state, u8 status) {
assert((status & ~STATUS_VALID_BITS) == 0);
*(u8 *)state = status;
}
static
int roseAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) {
return roseAdaptor_i(offset, id, scratch, 0, 0);
}
static
int roseSimpleAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) {
return roseAdaptor_i(offset, id, scratch, 1, 0);
}
static
int roseSomAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) {
return roseAdaptor_i(offset, id, scratch, 0, 1);
}
static
int roseSimpleSomAdaptor(u64a offset, ReportID id, struct hs_scratch *scratch) {
return roseAdaptor_i(offset, id, scratch, 1, 1);
}
static really_inline
RoseCallback selectAdaptor(const struct RoseEngine *rose) {
const char is_simple = rose->simpleCallback;
const char do_som = rose->hasSom;
if (do_som) {
return is_simple ? roseSimpleSomAdaptor : roseSomAdaptor;
} else {
return is_simple ? roseSimpleAdaptor : roseAdaptor;
}
}
static
int roseSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id,
struct hs_scratch *scratch) {
return roseSomAdaptor_i(from_offset, to_offset, id, scratch, 0);
}
static
int roseSimpleSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id,
struct hs_scratch *scratch) {
return roseSomAdaptor_i(from_offset, to_offset, id, scratch, 1);
}
static really_inline
RoseCallbackSom selectSomAdaptor(const struct RoseEngine *rose) {
const char is_simple = rose->simpleCallback;
return is_simple ? roseSimpleSomSomAdaptor : roseSomSomAdaptor;
}
static
int outfixSimpleSomAdaptor(u64a offset, ReportID id, void *context) {
return roseAdaptor_i(offset, id, context, 1, 1);
}
static
int outfixSimpleAdaptor(u64a offset, ReportID id, void *context) {
return roseAdaptor_i(offset, id, context, 1, 0);
}
static
int outfixSomAdaptor(u64a offset, ReportID id, void *context) {
return roseAdaptor_i(offset, id, context, 0, 1);
}
static
int outfixAdaptor(u64a offset, ReportID id, void *context) {
return roseAdaptor_i(offset, id, context, 0, 0);
}
static really_inline
NfaCallback selectOutfixAdaptor(const struct RoseEngine *rose) {
const char is_simple = rose->simpleCallback;
const char do_som = rose->hasSom;
if (do_som) {
return is_simple ? outfixSimpleSomAdaptor : outfixSomAdaptor;
} else {
return is_simple ? outfixSimpleAdaptor : outfixAdaptor;
}
}
static
int outfixSimpleSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id,
void *context) {
return roseSomAdaptor_i(from_offset, to_offset, id, context, 1);
}
static
int outfixSomSomAdaptor(u64a from_offset, u64a to_offset, ReportID id,
void *context) {
return roseSomAdaptor_i(from_offset, to_offset, id, context, 0);
}
static really_inline
SomNfaCallback selectOutfixSomAdaptor(const struct RoseEngine *rose) {
const char is_simple = rose->simpleCallback;
return is_simple ? outfixSimpleSomSomAdaptor : outfixSomSomAdaptor;
}
/** \brief Initialise SOM state. Used in both block and streaming mode. */
static really_inline
void initSomState(const struct RoseEngine *rose, char *state) {
assert(rose && state);
const u32 somCount = rose->somLocationCount;
mmbit_clear((u8 *)state + rose->stateOffsets.somValid, somCount);
mmbit_clear((u8 *)state + rose->stateOffsets.somWritable, somCount);
}
static really_inline
void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) {
assert(rose);
assert(scratch);
initSomState(rose, scratch->core_info.state);
DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len);
roseBlockExec(rose, scratch, selectAdaptor(rose),
selectSomAdaptor(rose));
}
static really_inline
void pureLiteralBlockExec(const struct RoseEngine *rose,
struct hs_scratch *scratch) {
assert(rose);
assert(scratch);
const struct HWLM *ftable = getFLiteralMatcher(rose);
initSomState(rose, scratch->core_info.state);
const u8 *buffer = scratch->core_info.buf;
size_t length = scratch->core_info.len;
DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl);
hwlmExec(ftable, buffer, length, 0, rosePureLiteralCallback, scratch,
rose->initialGroups);
}
static really_inline
void initOutfixQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
struct hs_scratch *scratch) {
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
q->nfa = getNfaByInfo(t, info);
q->end = 0;
q->cur = 0;
q->state = scratch->fullState + info->fullStateOffset;
q->streamState = (char *)scratch->core_info.state + info->stateOffset;
q->offset = scratch->core_info.buf_offset;
q->buffer = scratch->core_info.buf;
q->length = scratch->core_info.len;
q->history = scratch->core_info.hbuf;
q->hlength = scratch->core_info.hlen;
q->cb = selectOutfixAdaptor(t);
q->som_cb = selectOutfixSomAdaptor(t);
q->context = scratch;
q->report_current = 0;
DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
"state=%u\n", qi, q->offset, info->fullStateOffset,
info->stateOffset, *(u32 *)q->state);
}
static never_inline
void soleOutfixBlockExec(const struct RoseEngine *t,
struct hs_scratch *scratch) {
assert(t);
assert(scratch);
initSomState(t, scratch->core_info.state);
assert(t->outfixEndQueue == 1);
assert(!t->amatcherOffset);
assert(!t->ematcherOffset);
assert(!t->fmatcherOffset);
const struct NFA *nfa = getNfaByQueue(t, 0);
size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
scratch->core_info.len);
if (!len) {
return;
}
struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
q->length = len; /* adjust for rev_accel */
nfaQueueInitState(nfa, q);
pushQueueAt(q, 0, MQE_START, 0);
pushQueueAt(q, 1, MQE_TOP, 0);
pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
char rv = nfaQueueExec(q->nfa, q, scratch->core_info.len);
if (rv && nfaAcceptsEod(nfa) && len == scratch->core_info.len) {
nfaCheckFinalState(nfa, q->state, q->streamState, q->length,
q->cb, q->som_cb, scratch);
}
}
static rose_inline
void runSmallWriteEngine(const struct SmallWriteEngine *smwr,
struct hs_scratch *scratch) {
assert(smwr);
assert(scratch);
const u8 *buffer = scratch->core_info.buf;
size_t length = scratch->core_info.len;
DEBUG_PRINTF("USING SMALL WRITE\n");
if (length <= smwr->start_offset) {
DEBUG_PRINTF("too short\n");
return;
}
const struct NFA *nfa = getSmwrNfa(smwr);
const struct RoseEngine *rose = scratch->core_info.rose;
size_t local_alen = length - smwr->start_offset;
const u8 *local_buffer = buffer + smwr->start_offset;
assert(isMcClellanType(nfa->type));
if (nfa->type == MCCLELLAN_NFA_8) {
nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer,
local_alen, selectOutfixAdaptor(rose), scratch);
} else {
nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer,
local_alen, selectOutfixAdaptor(rose), scratch);
}
}
HS_PUBLIC_API
hs_error_t hs_scan(const hs_database_t *db, const char *data, unsigned length,
unsigned flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *userCtx) {
if (unlikely(!scratch || !data)) {
return HS_INVALID;
}
hs_error_t err = validDatabase(db);
if (unlikely(err != HS_SUCCESS)) {
return err;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (unlikely(!ISALIGNED_16(rose))) {
return HS_INVALID;
}
if (unlikely(rose->mode != HS_MODE_BLOCK)) {
return HS_DB_MODE_ERROR;
}
if (unlikely(!validScratch(rose, scratch))) {
return HS_INVALID;
}
if (rose->minWidth > length) {
DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length);
return HS_SUCCESS;
}
prefetch_data(data, length);
/* populate core info in scratch */
populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data,
length, NULL, 0, 0, 0, flags);
clearEvec(rose, scratch->core_info.exhaustionVector);
// Rose program execution (used for some report paths) depends on these
// values being initialised.
scratch->tctxt.lastMatchOffset = 0;
scratch->tctxt.minMatchOffset = 0;
if (!length) {
if (rose->boundary.reportZeroEodOffset) {
roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0,
scratch);
}
goto set_retval;
}
if (rose->boundary.reportZeroOffset) {
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
goto set_retval;
}
}
if (rose->minWidthExcludingBoundaries > length) {
DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n",
rose->minWidthExcludingBoundaries, length);
goto done_scan;
}
// Similarly, we may have a maximum width (for engines constructed entirely
// of bi-anchored patterns).
if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF
&& length > rose->maxBiAnchoredWidth) {
DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length,
rose->maxBiAnchoredWidth);
goto done_scan;
}
// Is this a small write case?
if (rose->smallWriteOffset) {
const struct SmallWriteEngine *smwr = getSmallWrite(rose);
assert(smwr);
// Apply the small write engine if and only if the block (buffer) is
// small enough. Otherwise, we allow rose &co to deal with it.
if (length < smwr->largestBuffer) {
DEBUG_PRINTF("Attempting small write of block %u bytes long.\n",
length);
runSmallWriteEngine(smwr, scratch);
goto done_scan;
}
}
switch (rose->runtimeImpl) {
default:
assert(0);
case ROSE_RUNTIME_FULL_ROSE:
rawBlockExec(rose, scratch);
break;
case ROSE_RUNTIME_PURE_LITERAL:
pureLiteralBlockExec(rose, scratch);
break;
case ROSE_RUNTIME_SINGLE_OUTFIX:
soleOutfixBlockExec(rose, scratch);
break;
}
done_scan:
if (told_to_stop_matching(scratch)) {
return HS_SCAN_TERMINATED;
}
if (rose->hasSom) {
int halt = flushStoredSomMatches(scratch, ~0ULL);
if (halt) {
return HS_SCAN_TERMINATED;
}
}
if (rose->boundary.reportEodOffset) {
roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length,
scratch);
}
set_retval:
DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
told_to_stop_matching(scratch));
return told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED : HS_SUCCESS;
}
static really_inline
void maintainHistoryBuffer(const struct RoseEngine *rose, char *state,
const char *buffer, size_t length) {
if (!rose->historyRequired) {
return;
}
// Hopefully few of our users are scanning no data.
if (unlikely(length == 0)) {
DEBUG_PRINTF("zero-byte scan\n");
return;
}
char *his_state = state + rose->stateOffsets.history;
if (length < rose->historyRequired) {
size_t shortfall = rose->historyRequired - length;
memmove(his_state, his_state + rose->historyRequired - shortfall,
shortfall);
}
size_t amount = MIN(rose->historyRequired, length);
memcpy(his_state + rose->historyRequired - amount, buffer + length - amount,
amount);
#ifdef DEBUG_HISTORY
printf("History [%u] : ", rose->historyRequired);
for (size_t i = 0; i < rose->historyRequired; i++) {
printf(" %02hhx", his_state[i]);
}
printf("\n");
#endif
}
static really_inline
void init_stream(struct hs_stream *s, const struct RoseEngine *rose) {
s->rose = rose;
s->offset = 0;
char *state = getMultiState(s);
setStreamStatus(state, 0);
roseInitState(rose, state);
clearEvec(rose, state + rose->stateOffsets.exhausted);
// SOM state multibit structures.
initSomState(rose, state);
}
HS_PUBLIC_API
hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags,
hs_stream_t **stream) {
if (unlikely(!stream)) {
return HS_INVALID;
}
*stream = NULL;
hs_error_t err = validDatabase(db);
if (unlikely(err != HS_SUCCESS)) {
return err;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (unlikely(!ISALIGNED_16(rose))) {
return HS_INVALID;
}
if (unlikely(rose->mode != HS_MODE_STREAM)) {
return HS_DB_MODE_ERROR;
}
size_t stateSize = rose->stateOffsets.end;
struct hs_stream *s = hs_stream_alloc(sizeof(struct hs_stream) + stateSize);
if (unlikely(!s)) {
return HS_NOMEM;
}
init_stream(s, rose);
*stream = s;
return HS_SUCCESS;
}
static really_inline
void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
const struct RoseEngine *rose = id->rose;
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("stream already broken\n");
return;
}
if (isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
DEBUG_PRINTF("stream exhausted\n");
return;
}
roseEodExec(rose, id->offset, scratch, selectAdaptor(rose),
selectSomAdaptor(rose));
}
static never_inline
void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
const struct RoseEngine *t = id->rose;
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("stream already broken\n");
return;
}
if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
DEBUG_PRINTF("stream exhausted\n");
return;
}
assert(t->outfixEndQueue == 1);
assert(!t->amatcherOffset);
assert(!t->ematcherOffset);
assert(!t->fmatcherOffset);
const struct NFA *nfa = getNfaByQueue(t, 0);
struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
if (!scratch->core_info.buf_offset) {
DEBUG_PRINTF("buf_offset is zero\n");
return; /* no vacuous engines */
}
nfaExpandState(nfa, q->state, q->streamState, q->offset,
queue_prev_byte(q, 0));
assert(nfaAcceptsEod(nfa));
nfaCheckFinalState(nfa, q->state, q->streamState, q->offset, q->cb,
q->som_cb, scratch);
}
static really_inline
void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
DEBUG_PRINTF("--- report eod matches at offset %llu\n", id->offset);
assert(onEvent);
const struct RoseEngine *rose = id->rose;
char *state = getMultiState(id);
u8 status = getStreamStatus(state);
if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED)) {
DEBUG_PRINTF("stream is broken, just freeing storage\n");
return;
}
populateCoreInfo(scratch, rose, state, onEvent, context, NULL, 0,
getHistory(state, rose, id->offset),
getHistoryAmount(rose, id->offset), id->offset, status, 0);
// Rose program execution (used for some report paths) depends on these
// values being initialised.
scratch->tctxt.lastMatchOffset = 0;
scratch->tctxt.minMatchOffset = id->offset;
if (rose->somLocationCount) {
loadSomFromStream(scratch, id->offset);
}
if (!id->offset) {
if (rose->boundary.reportZeroEodOffset) {
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportZeroEodOffset, 0, scratch);
if (rv == MO_HALT_MATCHING) {
return;
}
}
} else {
if (rose->boundary.reportEodOffset) {
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportEodOffset, id->offset, scratch);
if (rv == MO_HALT_MATCHING) {
return;
}
}
if (rose->requiresEodCheck) {
switch (rose->runtimeImpl) {
default:
case ROSE_RUNTIME_PURE_LITERAL:
assert(0);
case ROSE_RUNTIME_FULL_ROSE:
rawEodExec(id, scratch);
break;
case ROSE_RUNTIME_SINGLE_OUTFIX:
soleOutfixEodExec(id, scratch);
break;
}
}
}
if (rose->hasSom && !told_to_stop_matching(scratch)) {
int halt = flushStoredSomMatches(scratch, ~0ULL);
if (halt) {
DEBUG_PRINTF("told to stop matching\n");
scratch->core_info.status |= STATUS_TERMINATED;
}
}
}
HS_PUBLIC_API
hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id) {
if (!to_id) {
return HS_INVALID;
}
*to_id = NULL;
if (!from_id || !from_id->rose) {
return HS_INVALID;
}
const struct RoseEngine *rose = from_id->rose;
size_t stateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
struct hs_stream *s = hs_stream_alloc(stateSize);
if (!s) {
return HS_NOMEM;
}
memcpy(s, from_id, stateSize);
*to_id = s;
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
const hs_stream_t *from_id,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
if (!from_id || !from_id->rose) {
return HS_INVALID;
}
if (!to_id || to_id->rose != from_id->rose) {
return HS_INVALID;
}
if (to_id == from_id) {
return HS_INVALID;
}
if (onEvent) {
if (!scratch || !validScratch(to_id->rose, scratch)) {
return HS_INVALID;
}
report_eod_matches(to_id, scratch, onEvent, context);
}
size_t stateSize
= sizeof(struct hs_stream) + from_id->rose->stateOffsets.end;
memcpy(to_id, from_id, stateSize);
return HS_SUCCESS;
}
static really_inline
void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) {
assert(stream_state);
assert(scratch);
assert(!can_stop_matching(scratch));
DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
stream_state->offset, scratch->core_info.len);
const struct RoseEngine *rose = stream_state->rose;
assert(rose);
roseStreamExec(rose, scratch, selectAdaptor(rose), selectSomAdaptor(rose));
if (!told_to_stop_matching(scratch) &&
isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
DEBUG_PRINTF("stream exhausted\n");
scratch->core_info.status |= STATUS_EXHAUSTED;
}
}
static really_inline
void pureLiteralStreamExec(struct hs_stream *stream_state,
struct hs_scratch *scratch) {
assert(stream_state);
assert(scratch);
assert(!can_stop_matching(scratch));
char *state = getMultiState(stream_state);
const struct RoseEngine *rose = stream_state->rose;
const struct HWLM *ftable = getFLiteralMatcher(rose);
size_t len2 = scratch->core_info.len;
u8 *hwlm_stream_state;
if (rose->floatingStreamState) {
hwlm_stream_state = getFloatingMatcherState(rose, state);
} else {
hwlm_stream_state = NULL;
}
DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
stream_state->offset, scratch->core_info.len);
// Pure literal cases don't have floatingMinDistance set, so we always
// start the match region at zero.
const size_t start = 0;
hwlmExecStreaming(ftable, scratch, len2, start, rosePureLiteralCallback,
scratch, rose->initialGroups, hwlm_stream_state);
if (!told_to_stop_matching(scratch) &&
isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
DEBUG_PRINTF("stream exhausted\n");
scratch->core_info.status |= STATUS_EXHAUSTED;
}
}
static never_inline
void soleOutfixStreamExec(struct hs_stream *stream_state,
struct hs_scratch *scratch) {
assert(stream_state);
assert(scratch);
assert(!can_stop_matching(scratch));
const struct RoseEngine *t = stream_state->rose;
assert(t->outfixEndQueue == 1);
assert(!t->amatcherOffset);
assert(!t->ematcherOffset);
assert(!t->fmatcherOffset);
const struct NFA *nfa = getNfaByQueue(t, 0);
struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
if (!scratch->core_info.buf_offset) {
nfaQueueInitState(nfa, q);
pushQueueAt(q, 0, MQE_START, 0);
pushQueueAt(q, 1, MQE_TOP, 0);
pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
} else {
nfaExpandState(nfa, q->state, q->streamState, q->offset,
queue_prev_byte(q, 0));
pushQueueAt(q, 0, MQE_START, 0);
pushQueueAt(q, 1, MQE_END, scratch->core_info.len);
}
if (nfaQueueExec(q->nfa, q, scratch->core_info.len)) {
nfaQueueCompressState(nfa, q, scratch->core_info.len);
} else if (!told_to_stop_matching(scratch)) {
scratch->core_info.status |= STATUS_EXHAUSTED;
}
}
static inline
hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
unsigned length, UNUSED unsigned flags,
hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
if (unlikely(!id || !scratch || !data || !validScratch(id->rose, scratch))) {
return HS_INVALID;
}
const struct RoseEngine *rose = id->rose;
char *state = getMultiState(id);
u8 status = getStreamStatus(state);
if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED)) {
DEBUG_PRINTF("stream is broken, halting scan\n");
if (status & STATUS_TERMINATED) {
return HS_SCAN_TERMINATED;
} else {
return HS_SUCCESS;
}
}
// We avoid doing any work if the user has given us zero bytes of data to
// scan. Arguably we should define some semantics for how we treat vacuous
// cases here.
if (unlikely(length == 0)) {
DEBUG_PRINTF("zero length block\n");
return HS_SUCCESS;
}
u32 historyAmount = getHistoryAmount(rose, id->offset);
populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
getHistory(state, rose, id->offset), historyAmount,
id->offset, status, flags);
assert(scratch->core_info.hlen <= id->offset
&& scratch->core_info.hlen <= rose->historyRequired);
// Rose program execution (used for some report paths) depends on these
// values being initialised.
scratch->tctxt.lastMatchOffset = 0;
scratch->tctxt.minMatchOffset = id->offset;
prefetch_data(data, length);
if (rose->somLocationCount) {
loadSomFromStream(scratch, id->offset);
}
if (!id->offset && rose->boundary.reportZeroOffset) {
DEBUG_PRINTF("zero reports\n");
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
DEBUG_PRINTF("halting scan\n");
setStreamStatus(state, scratch->core_info.status);
if (told_to_stop_matching(scratch)) {
return HS_SCAN_TERMINATED;
} else {
assert(scratch->core_info.status & STATUS_EXHAUSTED);
return HS_SUCCESS;
}
}
}
switch (rose->runtimeImpl) {
default:
assert(0);
case ROSE_RUNTIME_FULL_ROSE:
rawStreamExec(id, scratch);
break;
case ROSE_RUNTIME_PURE_LITERAL:
pureLiteralStreamExec(id, scratch);
break;
case ROSE_RUNTIME_SINGLE_OUTFIX:
soleOutfixStreamExec(id, scratch);
}
if (rose->hasSom && !told_to_stop_matching(scratch)) {
int halt = flushStoredSomMatches(scratch, ~0ULL);
if (halt) {
scratch->core_info.status |= STATUS_TERMINATED;
}
}
setStreamStatus(state, scratch->core_info.status);
if (likely(!can_stop_matching(scratch))) {
maintainHistoryBuffer(rose, state, data, length);
id->offset += length; /* maintain offset */
if (rose->somLocationCount) {
storeSomToStream(scratch, id->offset);
}
} else if (told_to_stop_matching(scratch)) {
return HS_SCAN_TERMINATED;
}
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_scan_stream(hs_stream_t *id, const char *data, unsigned length,
unsigned flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
return hs_scan_stream_internal(id, data, length, flags, scratch,
onEvent, context);
}
HS_PUBLIC_API
hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
if (!id) {
return HS_INVALID;
}
if (onEvent) {
if (!scratch || !validScratch(id->rose, scratch)) {
return HS_INVALID;
}
report_eod_matches(id, scratch, onEvent, context);
}
hs_stream_free(id);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *context) {
if (!id) {
return HS_INVALID;
}
if (onEvent) {
if (!scratch || !validScratch(id->rose, scratch)) {
return HS_INVALID;
}
report_eod_matches(id, scratch, onEvent, context);
}
init_stream(id, id->rose);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_stream_size(const hs_database_t *db, size_t *stream_size) {
if (!stream_size) {
return HS_INVALID;
}
hs_error_t ret = validDatabase(db);
if (ret != HS_SUCCESS) {
return ret;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (!ISALIGNED_16(rose)) {
return HS_INVALID;
}
if (rose->mode != HS_MODE_STREAM) {
return HS_DB_MODE_ERROR;
}
u32 base_stream_size = rose->stateOffsets.end;
// stream state plus the hs_stream struct itself
*stream_size = base_stream_size + sizeof(struct hs_stream);
return HS_SUCCESS;
}
#if defined(DEBUG) || defined(DUMP_SUPPORT)
#include "util/compare.h"
// A debugging crutch: print a hex-escaped version of the match for our
// perusal.
static UNUSED
void dumpData(const char *data, size_t len) {
DEBUG_PRINTF("BUFFER:");
for (size_t i = 0; i < len; i++) {
u8 c = data[i];
if (ourisprint(c) && c != '\'') {
printf("%c", c);
} else {
printf("\\x%02x", c);
}
}
printf("\n");
}
#endif
HS_PUBLIC_API
hs_error_t hs_scan_vector(const hs_database_t *db, const char * const * data,
const unsigned int *length, unsigned int count,
UNUSED unsigned int flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
if (unlikely(!scratch || !data || !length)) {
return HS_INVALID;
}
hs_error_t err = validDatabase(db);
if (unlikely(err != HS_SUCCESS)) {
return err;
}
const struct RoseEngine *rose = hs_get_bytecode(db);
if (unlikely(!ISALIGNED_16(rose))) {
return HS_INVALID;
}
if (unlikely(rose->mode != HS_MODE_VECTORED)) {
return HS_DB_MODE_ERROR;
}
if (unlikely(!validScratch(rose, scratch))) {
return HS_INVALID;
}
hs_stream_t *id = (hs_stream_t *)(scratch->bstate);
init_stream(id, rose); /* open stream */
for (u32 i = 0; i < count; i++) {
DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset,
length[i]);
#ifdef DEBUG
dumpData(data[i], length[i]);
#endif
hs_error_t ret
= hs_scan_stream_internal(id, data[i], length[i], 0, scratch,
onEvent, context);
if (ret != HS_SUCCESS) {
return ret;
}
}
/* close stream */
if (onEvent) {
report_eod_matches(id, scratch, onEvent, context);
if (told_to_stop_matching(scratch)) {
return HS_SCAN_TERMINATED;
}
}
return HS_SUCCESS;
}