Rose: use program for all literal matches

Unifies all literal match paths so that the Rose program is used for all
of them. This removes the previous specialised "direct report" and
"multi direct report" paths. Some additional REPORT instruction work was
necessary for this.

Reworked literal construction path at compile time in prep for using
program offsets as literal IDs.

Completely removed the anchored log runtime, which is no longer worth
the extra complexity.
This commit is contained in:
Justin Viiret
2016-02-18 09:45:37 +11:00
committed by Matthew Barr
parent b58d05dfec
commit 67b9784dae
26 changed files with 681 additions and 1176 deletions

View File

@@ -26,6 +26,25 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* \brief Rose runtime: code for catching up output-exposed engines.
*
* Rose has several components which run behind the main (floating table) clock
* and need to be caught up before we report matches.
*
* Currently we have to deal with:
* 1. Suffix/Outfix NFAs
* 2. A single MPV NFA (chained), which may also be triggered by (1).
*
* The approach is to:
* - (A) build a priority queue of the suffix/outfixes based on their first
* match location;
* - (B) process the matches from the priority queue in order;
* - (C) As we report matches from (B) we interleave matches from the MPV if it
* exists.
*/
#ifndef ROSE_CATCHUP_H
#define ROSE_CATCHUP_H
@@ -35,43 +54,16 @@
#include "rose_common.h"
#include "rose_internal.h"
#include "ue2common.h"
#include "nfa/nfa_internal.h"
#include "util/bitutils.h"
#include "util/multibit.h"
/*
* Rose has several components which run behind the main (floating table) clock
* and need to be caught up before we report matches.
*
* Currently we have to deal with:
* 1) Stored matches from the anchored matcher
* 2) Suffix/Outfix nfas
* 3) a single MPV nfa (chained) (which may also be triggered by (1) and (2)).
*
* The approach is to:
* A) build a priority queue of the suffix/outfixes based on their first match
* location
* B) process the matches from the anchored matches in order
* C) As we report a match from (B) we interleave matches from the suffixes
* D) As we report matches from (B) and (C) we interleave matches from the
* mpv if it exists.
*/
/* Callbacks, defined in catchup.c */
hwlmcb_rv_t roseCatchUpSufAndChains(s64a loc, struct hs_scratch *scratch);
hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch);
hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch);
/* will only catch mpv upto last reported external match */
/* will only catch mpv up to last reported external match */
hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch);
/* will only catch mpv upto last reported external match */
hwlmcb_rv_t roseCatchUpAnchoredAndSuf(s64a loc, struct hs_scratch *scratch);
hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc,
struct hs_scratch *scratch);
@@ -81,44 +73,42 @@ void streamInitSufPQ(const struct RoseEngine *t, char *state,
struct hs_scratch *scratch);
static really_inline
hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc,
struct hs_scratch *scratch) {
u64a cur_offset = loc + scratch->core_info.buf_offset;
assert(cur_offset >= scratch->tctxt.minMatchOffset);
if (0) {
quick_exit:
updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset);
return HWLM_CONTINUE_MATCHING;
}
int canSkipCatchUpMPV(const struct RoseEngine *t, struct hs_scratch *scratch,
u64a cur_offset) {
if (!has_chained_nfas(t)) {
goto quick_exit;
return 1;
}
/* note: we may have to run at less than tctxt.minMatchOffset as we may
* have a full queue of postponed events that we need to flush */
if (cur_offset < scratch->tctxt.next_mpv_offset) {
DEBUG_PRINTF("skipping cur_offset %lld min %lld, mpv %lld\n",
DEBUG_PRINTF("skipping cur_offset %llu min %llu, mpv %llu\n",
cur_offset, scratch->tctxt.minMatchOffset,
scratch->tctxt.next_mpv_offset);
goto quick_exit;
return 1;
}
assert(t->activeArrayCount);
DEBUG_PRINTF("cur offset offset: %lld\n", cur_offset);
DEBUG_PRINTF("cur offset offset: %llu\n", cur_offset);
DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
DEBUG_PRINTF("roseCatchUpMPV to %lld\n", loc);
assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */
u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
u32 aaCount = t->activeArrayCount;
const u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
return !mmbit_isset(aa, t->activeArrayCount, 0);
}
if (!mmbit_isset(aa, aaCount, 0)){
goto quick_exit;
/** \brief Catches up the MPV. */
static really_inline
hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc,
struct hs_scratch *scratch) {
u64a cur_offset = loc + scratch->core_info.buf_offset;
assert(cur_offset >= scratch->tctxt.minMatchOffset);
if (canSkipCatchUpMPV(t, scratch, cur_offset)) {
updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset);
return HWLM_CONTINUE_MATCHING;
}
/* Note: chained tails MUST not participate in the priority queue as
@@ -128,20 +118,10 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc,
return roseCatchUpMPV_i(t, loc, scratch);
}
static really_inline
u64a currentAnchoredEnd(const struct RoseEngine *t, struct RoseContext *tctxt) {
if (tctxt->curr_anchored_loc == MMB_INVALID) {
return ANCHORED_MATCH_SENTINEL;
} else {
return tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1;
}
}
/* catches up nfas, anchored matches and the mpv */
/** \brief Catches up NFAs and the MPV. */
static rose_inline
hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end,
char in_anchored) {
struct hs_scratch *scratch, u64a end) {
/* no need to catch up if we are at the same offset as last time */
if (end <= scratch->tctxt.minMatchOffset) {
/* we must already be up to date */
@@ -158,24 +138,13 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
}
assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt);
hwlmcb_rv_t rv;
if (in_anchored
|| curr_anchored_end == ANCHORED_MATCH_SENTINEL
|| curr_anchored_end > end) {
if (!t->activeArrayCount
|| !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
updateMinMatchOffset(&scratch->tctxt, end);
rv = HWLM_CONTINUE_MATCHING;
} else {
rv = roseCatchUpSufAndChains(loc, scratch);
}
if (!t->activeArrayCount
|| !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
updateMinMatchOffset(&scratch->tctxt, end);
rv = HWLM_CONTINUE_MATCHING;
} else {
if (!t->activeArrayCount) {
rv = roseCatchUpAnchoredOnly(loc, scratch);
} else {
rv = roseCatchUpAll(loc, scratch);
}
rv = roseCatchUpAll(loc, scratch);
}
assert(rv != HWLM_CONTINUE_MATCHING
@@ -185,13 +154,16 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
return rv;
}
/* Catches up anything which may add triggers on the mpv: anchored matches
* and suf/outfixes. The MPV will be run only to intersperse matches in
* the output match stream if external matches are raised. */
/**
* \brief Catches up anything which may add triggers on the MPV (suffixes and
* outfixes).
*
* The MPV will be run only to intersperse matches in the output match stream
* if external matches are raised.
*/
static rose_inline
hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end,
char in_anchored) {
struct hs_scratch *scratch, u64a end) {
/* no need to catch up if we are at the same offset as last time */
if (end <= scratch->tctxt.minNonMpvMatchOffset) {
/* we must already be up to date */
@@ -203,27 +175,21 @@ hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t,
assert(t->activeArrayCount); /* mpv is in active array */
assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt);
if (in_anchored
|| curr_anchored_end == ANCHORED_MATCH_SENTINEL
|| curr_anchored_end > end) {
if (!t->mpvTriggeredByLeaf) {
/* no need to check as they never put triggers onto the mpv */
return HWLM_CONTINUE_MATCHING;
}
/* sadly, this branch rarely gets taken as the mpv itself is usually
* alive. */
char *state = scratch->core_info.state;
if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
scratch->tctxt.minNonMpvMatchOffset = end;
return HWLM_CONTINUE_MATCHING;
}
return roseCatchUpSuf(loc, scratch);
} else {
return roseCatchUpAnchoredAndSuf(loc, scratch);
if (!t->mpvTriggeredByLeaf) {
/* no need to check as they never put triggers onto the mpv */
return HWLM_CONTINUE_MATCHING;
}
/* sadly, this branch rarely gets taken as the mpv itself is usually
* alive. */
char *state = scratch->core_info.state;
if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
scratch->tctxt.minNonMpvMatchOffset = end;
return HWLM_CONTINUE_MATCHING;
}
return roseCatchUpSuf(loc, scratch);
}
#endif