mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
Initial commit of Hyperscan
This commit is contained in:
293
src/rose/block.c
Normal file
293
src/rose/block.c
Normal file
@@ -0,0 +1,293 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "catchup.h"
|
||||
#include "init.h"
|
||||
#include "match.h"
|
||||
#include "nfa/nfa_api.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfa/nfa_rev_api.h"
|
||||
#include "nfa/mcclellan.h"
|
||||
#include "util/fatbit.h"
|
||||
#include "rose_sidecar_runtime.h"
|
||||
#include "rose.h"
|
||||
#include "rose_common.h"
|
||||
|
||||
static rose_inline
|
||||
void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable,
|
||||
struct hs_scratch *scratch) {
|
||||
const u8 *buffer = scratch->core_info.buf;
|
||||
size_t length = scratch->core_info.len;
|
||||
size_t alen = MIN(length, t->anchoredDistance);
|
||||
const struct anchored_matcher_info *curr = atable;
|
||||
|
||||
DEBUG_PRINTF("BEGIN ANCHORED (over %zu/%zu)\n", alen, length);
|
||||
|
||||
do {
|
||||
const struct NFA *nfa
|
||||
= (const struct NFA *)((const char *)curr + sizeof(*curr));
|
||||
|
||||
assert(t->anchoredDistance > curr->anchoredMinDistance);
|
||||
if (length >= curr->anchoredMinDistance) {
|
||||
size_t local_alen = alen - curr->anchoredMinDistance;
|
||||
const u8 *local_buffer = buffer + curr->anchoredMinDistance;
|
||||
|
||||
DEBUG_PRINTF("--anchored nfa (+%u)\n", curr->anchoredMinDistance);
|
||||
assert(isMcClellanType(nfa->type));
|
||||
if (nfa->type == MCCLELLAN_NFA_8) {
|
||||
nfaExecMcClellan8_B(nfa, curr->anchoredMinDistance,
|
||||
local_buffer, local_alen,
|
||||
roseAnchoredCallback, &scratch->tctxt);
|
||||
} else {
|
||||
nfaExecMcClellan16_B(nfa, curr->anchoredMinDistance,
|
||||
local_buffer, local_alen,
|
||||
roseAnchoredCallback, &scratch->tctxt);
|
||||
}
|
||||
}
|
||||
|
||||
if (!curr->next_offset) {
|
||||
break;
|
||||
}
|
||||
|
||||
curr = (const void *)((const char *)curr + curr->next_offset);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void init_sidecar(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||
if (!t->smatcherOffset) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("welcome to the sidecar\n");
|
||||
assert(t->initSideEnableOffset);
|
||||
// We have to enable some sidecar literals
|
||||
const char *template = (const char *)t + t->initSideEnableOffset;
|
||||
|
||||
memcpy(&scratch->side_enabled, template, t->stateOffsets.sidecar_size);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void init_state_for_block(const struct RoseEngine *t, u8 *state) {
|
||||
assert(t);
|
||||
assert(state);
|
||||
|
||||
DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n",
|
||||
t, t->roleCount, t->rolesWithStateCount);
|
||||
|
||||
// Rose is guaranteed 8-aligned state
|
||||
assert(ISALIGNED_N(state, 8));
|
||||
|
||||
init_state(t, state);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void init_outfixes_for_block(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, u8 *state,
|
||||
char is_small_block) {
|
||||
/* active leaf array has been cleared by the init scatter */
|
||||
|
||||
if (t->initMpvNfa != MO_INVALID_IDX) {
|
||||
assert(t->initMpvNfa == 0);
|
||||
const struct NFA *nfa = getNfaByQueue(t, 0);
|
||||
DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
|
||||
scratch->core_info.len);
|
||||
size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
|
||||
scratch->core_info.len);
|
||||
if (len) {
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
u8 *activeArray = getActiveLeafArray(t, state);
|
||||
const u32 activeArraySize = t->activeArrayCount;
|
||||
const u32 qCount = t->queueCount;
|
||||
|
||||
mmbit_set(activeArray, activeArraySize, 0);
|
||||
fatbit_set(scratch->aqa, qCount, 0);
|
||||
|
||||
struct mq *q = scratch->queues;
|
||||
initQueue(q, 0, t, tctxt);
|
||||
q->length = len; /* adjust for rev_accel */
|
||||
nfaQueueInitState(nfa, q);
|
||||
pushQueueAt(q, 0, MQE_START, 0);
|
||||
pushQueueAt(q, 1, MQE_TOP, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_small_block && !t->hasOutfixesInSmallBlock) {
|
||||
DEBUG_PRINTF("all outfixes in small block table\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (t->outfixBeginQueue != t->outfixEndQueue) {
|
||||
blockInitSufPQ(t, state, scratch, is_small_block);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
RoseCallback callback, RoseCallbackSom som_callback,
|
||||
void *ctxt, u8 *state, char is_small_block) {
|
||||
init_state_for_block(t, state);
|
||||
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
|
||||
tctxt->t = t;
|
||||
tctxt->depth = 1;
|
||||
tctxt->groups = t->initialGroups;
|
||||
tctxt->lit_offset_adjust = 1; // index after last byte
|
||||
tctxt->delayLastEndOffset = 0;
|
||||
tctxt->lastEndOffset = 0;
|
||||
tctxt->filledDelayedSlots = 0;
|
||||
tctxt->state = state;
|
||||
tctxt->cb = callback;
|
||||
tctxt->cb_som = som_callback;
|
||||
tctxt->userCtx = ctxt;
|
||||
tctxt->lastMatchOffset = 0;
|
||||
tctxt->minMatchOffset = 0;
|
||||
tctxt->minNonMpvMatchOffset = 0;
|
||||
tctxt->next_mpv_offset = 0;
|
||||
tctxt->curr_anchored_loc = MMB_INVALID;
|
||||
tctxt->curr_row_offset = 0;
|
||||
tctxt->side_curr = 0;
|
||||
|
||||
scratch->am_log_sum = 0; /* clear the anchored logs */
|
||||
scratch->al_log_sum = 0;
|
||||
|
||||
fatbit_clear(scratch->aqa);
|
||||
|
||||
init_sidecar(t, scratch); /* Init the sidecar enabled state */
|
||||
|
||||
scratch->catchup_pq.qm_size = 0;
|
||||
|
||||
init_outfixes_for_block(t, scratch, state, is_small_block);
|
||||
}
|
||||
|
||||
void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
RoseCallback callback, RoseCallbackSom som_callback,
|
||||
void *ctx) {
|
||||
assert(t);
|
||||
assert(scratch);
|
||||
assert(scratch->core_info.buf);
|
||||
assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
|
||||
< MAX_SPARSE_ITER_STATES);
|
||||
|
||||
const size_t length = scratch->core_info.len;
|
||||
|
||||
// We have optimizations for small block scans: we run a single coalesced
|
||||
// HWLM scan instead of running the anchored and floating matchers. Some
|
||||
// outfixes are disabled as well (for SEP scans of single-byte literals,
|
||||
// which are also run in the HWLM scan).
|
||||
const char is_small_block =
|
||||
(length < ROSE_SMALL_BLOCK_LEN && t->sbmatcherOffset);
|
||||
|
||||
u8 *state = (u8 *)scratch->core_info.state;
|
||||
|
||||
init_for_block(t, scratch, callback, som_callback, ctx, state,
|
||||
is_small_block);
|
||||
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
|
||||
if (is_small_block) {
|
||||
const void *sbtable = getSBLiteralMatcher(t);
|
||||
assert(sbtable);
|
||||
|
||||
size_t sblen = MIN(length, t->smallBlockDistance);
|
||||
|
||||
DEBUG_PRINTF("BEGIN SMALL BLOCK (over %zu/%zu)\n", sblen, length);
|
||||
DEBUG_PRINTF("-- %016llx\n", tctxt->groups);
|
||||
hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback,
|
||||
tctxt, tctxt->groups);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
const void *atable = getALiteralMatcher(t);
|
||||
|
||||
if (atable) {
|
||||
if (t->amatcherMaxBiAnchoredWidth != ROSE_BOUND_INF
|
||||
&& length > t->amatcherMaxBiAnchoredWidth) {
|
||||
goto skip_atable;
|
||||
}
|
||||
|
||||
if (length < t->amatcherMinWidth) {
|
||||
goto skip_atable;
|
||||
}
|
||||
|
||||
|
||||
runAnchoredTableBlock(t, atable, scratch);
|
||||
|
||||
if (can_stop_matching(scratch)) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
resetAnchoredLog(t, scratch);
|
||||
skip_atable:;
|
||||
}
|
||||
|
||||
const struct HWLM *ftable = getFLiteralMatcher(t);
|
||||
if (ftable) {
|
||||
DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance,
|
||||
t->floatingMinDistance);
|
||||
if (t->noFloatingRoots && tctxt->depth == 1) {
|
||||
DEBUG_PRINTF("skip FLOATING: no inflight matches\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (t->fmatcherMaxBiAnchoredWidth != ROSE_BOUND_INF
|
||||
&& length > t->fmatcherMaxBiAnchoredWidth) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (length < t->fmatcherMinWidth) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
const u8 *buffer = scratch->core_info.buf;
|
||||
size_t flen = length;
|
||||
if (t->floatingDistance != ROSE_BOUND_INF) {
|
||||
flen = MIN(t->floatingDistance, length);
|
||||
}
|
||||
if (flen <= t->floatingMinDistance) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
|
||||
DEBUG_PRINTF("-- %016llx\n", tctxt->groups);
|
||||
hwlmExec(ftable, buffer, flen, t->floatingMinDistance,
|
||||
roseCallback, tctxt, tctxt->groups);
|
||||
}
|
||||
|
||||
exit:;
|
||||
u8 dummy_delay_mask = 0;
|
||||
if (cleanUpDelayed(length, 0, tctxt, &dummy_delay_mask)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(!can_stop_matching(scratch));
|
||||
|
||||
roseCatchUpTo(t, state, length, scratch, 0);
|
||||
}
|
1281
src/rose/catchup.c
Normal file
1281
src/rose/catchup.c
Normal file
File diff suppressed because it is too large
Load Diff
229
src/rose/catchup.h
Normal file
229
src/rose/catchup.h
Normal file
@@ -0,0 +1,229 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_CATCHUP_H
|
||||
#define ROSE_CATCHUP_H
|
||||
|
||||
#include "hwlm/hwlm.h"
|
||||
#include "runtime.h"
|
||||
#include "scratch.h"
|
||||
#include "rose_common.h"
|
||||
#include "rose_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/multibit.h"
|
||||
|
||||
/*
|
||||
* Rose has several components which run behind the main (floating table) clock
|
||||
* and need to be caught up before we report matches.
|
||||
*
|
||||
* Currently we have to deal with:
|
||||
* 1) Stored matches from the anchored matcher
|
||||
* 2) Suffix/Outfix nfas
|
||||
* 3) a single MPV nfa (chained) (which may also be triggered by (1) and (2)).
|
||||
*
|
||||
* The approach is to:
|
||||
* A) build a priority queue of the suffix/outfixes based on their first match
|
||||
* location
|
||||
* B) process the matches from the anchored matches in order
|
||||
* C) As we report a match from (B) we interleave matches from the suffixes
|
||||
* D) As we report matches from (B) and (C) we interleave matches from the
|
||||
* mpv if it exists.
|
||||
*/
|
||||
|
||||
/* Callbacks, defined in catchup.c */
|
||||
|
||||
hwlmcb_rv_t roseCatchUpSufAndChains(s64a loc, struct hs_scratch *scratch);
|
||||
|
||||
hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch);
|
||||
|
||||
hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch);
|
||||
|
||||
|
||||
/* will only catch mpv upto last reported external match */
|
||||
hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch);
|
||||
|
||||
/* will only catch mpv upto last reported external match */
|
||||
hwlmcb_rv_t roseCatchUpAnchoredAndSuf(s64a loc, struct hs_scratch *scratch);
|
||||
|
||||
|
||||
hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, u8 *state, s64a loc,
|
||||
struct hs_scratch *scratch);
|
||||
|
||||
void blockInitSufPQ(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch, char is_small_block);
|
||||
void streamInitSufPQ(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch);
|
||||
|
||||
static really_inline
|
||||
hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, u8 *state,
|
||||
s64a loc, struct hs_scratch *scratch) {
|
||||
u64a cur_offset = loc + scratch->core_info.buf_offset;
|
||||
assert(cur_offset >= scratch->tctxt.minMatchOffset);
|
||||
|
||||
if (0) {
|
||||
quick_exit:
|
||||
updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset);
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
if (!has_chained_nfas(t)) {
|
||||
goto quick_exit;
|
||||
}
|
||||
|
||||
/* note: we may have to run at less than tctxt.minMatchOffset as we may
|
||||
* have a full queue of postponed events that we need to flush */
|
||||
if (cur_offset < scratch->tctxt.next_mpv_offset) {
|
||||
DEBUG_PRINTF("skipping cur_offset %lld min %lld, mpv %lld\n",
|
||||
cur_offset, scratch->tctxt.minMatchOffset,
|
||||
scratch->tctxt.next_mpv_offset);
|
||||
goto quick_exit;
|
||||
}
|
||||
|
||||
assert(t->activeArrayCount);
|
||||
|
||||
DEBUG_PRINTF("cur offset offset: %lld\n", cur_offset);
|
||||
DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
|
||||
|
||||
DEBUG_PRINTF("roseCatchUpMPV to %lld\n", loc);
|
||||
|
||||
assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */
|
||||
|
||||
u8 *aa = getActiveLeafArray(t, state);
|
||||
u32 aaCount = t->activeArrayCount;
|
||||
|
||||
if (!mmbit_isset(aa, aaCount, 0)){
|
||||
goto quick_exit;
|
||||
}
|
||||
|
||||
/* Note: chained tails MUST not participate in the priority queue as
|
||||
* they may have events pushed on during this process which may be before
|
||||
* the catch up point */
|
||||
|
||||
return roseCatchUpMPV_i(t, state, loc, scratch);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u64a currentAnchoredEnd(const struct RoseEngine *t, struct RoseContext *tctxt) {
|
||||
if (tctxt->curr_anchored_loc == MMB_INVALID) {
|
||||
return ANCHORED_MATCH_SENTINEL;
|
||||
} else {
|
||||
return tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* catches up nfas, anchored matches and the mpv */
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, u8 *state, u64a end,
|
||||
struct hs_scratch *scratch, char in_anchored) {
|
||||
/* no need to catch up if we are at the same offset as last time */
|
||||
if (end <= scratch->tctxt.minMatchOffset) {
|
||||
/* we must already be up to date */
|
||||
DEBUG_PRINTF("skip\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
s64a loc = end - scratch->core_info.buf_offset;
|
||||
|
||||
if (end <= scratch->tctxt.minNonMpvMatchOffset) {
|
||||
/* only need to catch up the mpv */
|
||||
return roseCatchUpMPV(t, state, loc, scratch);
|
||||
}
|
||||
|
||||
assert(t == scratch->tctxt.t);
|
||||
assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
|
||||
u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt);
|
||||
hwlmcb_rv_t rv;
|
||||
if (in_anchored
|
||||
|| curr_anchored_end == ANCHORED_MATCH_SENTINEL
|
||||
|| curr_anchored_end > end) {
|
||||
if (!t->activeArrayCount
|
||||
|| !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
|
||||
updateMinMatchOffset(&scratch->tctxt, end);
|
||||
rv = HWLM_CONTINUE_MATCHING;
|
||||
} else {
|
||||
rv = roseCatchUpSufAndChains(loc, scratch);
|
||||
}
|
||||
} else {
|
||||
if (!t->activeArrayCount) {
|
||||
rv = roseCatchUpAnchoredOnly(loc, scratch);
|
||||
} else {
|
||||
rv = roseCatchUpAll(loc, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
assert(rv != HWLM_CONTINUE_MATCHING
|
||||
|| scratch->tctxt.minMatchOffset == end);
|
||||
assert(rv != HWLM_CONTINUE_MATCHING
|
||||
|| scratch->tctxt.minNonMpvMatchOffset == end);
|
||||
return rv;
|
||||
}
|
||||
|
||||
/* Catches up anything which may add triggers on the mpv: anchored matches
|
||||
* and suf/outfixes. The MPV will be run only to intersperse matches in
|
||||
* the output match stream if external matches are raised. */
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, u8 *state,
|
||||
u64a end, struct hs_scratch *scratch,
|
||||
char in_anchored) {
|
||||
/* no need to catch up if we are at the same offset as last time */
|
||||
if (end <= scratch->tctxt.minNonMpvMatchOffset) {
|
||||
/* we must already be up to date */
|
||||
DEBUG_PRINTF("skip\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
s64a loc = end - scratch->core_info.buf_offset;
|
||||
|
||||
assert(t == scratch->tctxt.t);
|
||||
assert(t->activeArrayCount); /* mpv is in active array */
|
||||
assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
|
||||
u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt);
|
||||
if (in_anchored
|
||||
|| curr_anchored_end == ANCHORED_MATCH_SENTINEL
|
||||
|| curr_anchored_end > end) {
|
||||
if (!t->mpvTriggeredByLeaf) {
|
||||
/* no need to check as they never put triggers onto the mpv */
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
/* sadly, this branch rarely gets taken as the mpv itself is usually
|
||||
* alive. */
|
||||
if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
|
||||
scratch->tctxt.minNonMpvMatchOffset = end;
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
return roseCatchUpSuf(loc, scratch);
|
||||
} else {
|
||||
return roseCatchUpAnchoredAndSuf(loc, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
264
src/rose/counting_miracle.h
Normal file
264
src/rose/counting_miracle.h
Normal file
@@ -0,0 +1,264 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_COUNTING_MIRACLE_H
|
||||
#define ROSE_COUNTING_MIRACLE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "runtime.h"
|
||||
#include "rose_internal.h"
|
||||
#include "nfa/nfa_api_queue.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
/** \brief Maximum number of bytes to scan when looking for a "counting miracle"
|
||||
* stop character. */
|
||||
#define COUNTING_MIRACLE_LEN_MAX 256
|
||||
|
||||
static really_inline
|
||||
char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end,
|
||||
u32 target_count, u32 *count_inout,
|
||||
const u8 **d_out) {
|
||||
assert(d <= d_end);
|
||||
|
||||
u32 count = *count_inout;
|
||||
|
||||
m128 chars = set16x8(c);
|
||||
|
||||
for (; d + 16 <= d_end; d_end -= 16) {
|
||||
m128 data = loadu128(d_end - 16);
|
||||
u32 z1 = movemask128(eq128(chars, data));
|
||||
count += popcount32(z1);
|
||||
|
||||
if (count >= target_count) {
|
||||
*d_out = d_end - 16;
|
||||
*count_inout = count;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (d != d_end) {
|
||||
char temp[sizeof(m128)];
|
||||
assert(d + sizeof(temp) > d_end);
|
||||
memset(temp, c + 1, sizeof(temp));
|
||||
memcpy(temp, d, d_end - d);
|
||||
m128 data = loadu128(temp);
|
||||
u32 z1 = movemask128(eq128(chars, data));
|
||||
count += popcount32(z1);
|
||||
|
||||
if (count >= target_count) {
|
||||
*d_out = d;
|
||||
*count_inout = count;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
*count_inout = count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define GET_LO_4(chars) and128(chars, low4bits)
|
||||
#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4)
|
||||
|
||||
static really_inline
|
||||
u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
|
||||
const u8 *d, const u8 *d_end,
|
||||
u32 target_count, u32 *count_inout,
|
||||
const u8 **d_out) {
|
||||
assert(d <= d_end);
|
||||
|
||||
u32 count = *count_inout;
|
||||
|
||||
const m128 zeroes = zeroes128();
|
||||
const m128 low4bits = _mm_set1_epi8(0xf);
|
||||
|
||||
for (; d + 16 <= d_end; d_end -= 16) {
|
||||
m128 data = loadu128(d_end - 16);
|
||||
m128 c_lo = pshufb(mask_lo, GET_LO_4(data));
|
||||
m128 c_hi = pshufb(mask_hi, GET_HI_4(data));
|
||||
m128 t = and128(c_lo, c_hi);
|
||||
u32 z1 = movemask128(eq128(t, zeroes));
|
||||
count += popcount32(z1 ^ 0xffff);
|
||||
|
||||
if (count >= target_count) {
|
||||
*d_out = d_end - 16;
|
||||
*count_inout = count;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (d != d_end) {
|
||||
char temp[sizeof(m128)];
|
||||
assert(d + sizeof(temp) > d_end);
|
||||
memset(temp, poison, sizeof(temp));
|
||||
memcpy(temp, d, d_end - d);
|
||||
m128 data = loadu128(temp);
|
||||
m128 c_lo = pshufb(mask_lo, GET_LO_4(data));
|
||||
m128 c_hi = pshufb(mask_hi, GET_HI_4(data));
|
||||
m128 t = and128(c_lo, c_hi);
|
||||
u32 z1 = movemask128(eq128(t, zeroes));
|
||||
count += popcount32(z1 ^ 0xffff);
|
||||
|
||||
if (count >= target_count) {
|
||||
*d_out = d;
|
||||
*count_inout = count;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
*count_inout = count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief "Counting Miracle" scan: If we see more than N instances of a
|
||||
* particular character class we know that the engine must be dead.
|
||||
*
|
||||
* Scans the buffer/history between relative locations \a begin_loc and \a
|
||||
* end_loc, and returns a miracle location (if any) that appears in the stream
|
||||
* after \a begin_loc.
|
||||
*
|
||||
* Returns 1 if some bytes can be skipped and sets \a miracle_loc
|
||||
* appropriately, 0 otherwise.
|
||||
*/
|
||||
static never_inline
|
||||
int roseCountingMiracleOccurs(const struct RoseEngine *t,
|
||||
const struct LeftNfaInfo *left,
|
||||
const struct core_info *ci, s64a begin_loc,
|
||||
const s64a end_loc, s64a *miracle_loc) {
|
||||
if (!left->countingMiracleOffset) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct RoseCountingMiracle *cm
|
||||
= (const void *)((const char *)t + left->countingMiracleOffset);
|
||||
|
||||
assert(!left->transient);
|
||||
assert(cm->count > 1); /* should be a normal miracle then */
|
||||
|
||||
DEBUG_PRINTF("looking for counting miracle over [%lld,%lld], maxLag=%u\n",
|
||||
begin_loc, end_loc, left->maxLag);
|
||||
DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
|
||||
|
||||
assert(begin_loc <= end_loc);
|
||||
assert(begin_loc >= -(s64a)ci->hlen);
|
||||
assert(end_loc <= (s64a)ci->len);
|
||||
|
||||
const s64a scan_end_loc = end_loc - left->maxLag;
|
||||
if (scan_end_loc <= begin_loc) {
|
||||
DEBUG_PRINTF("nothing to scan\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const s64a start = MAX(begin_loc, scan_end_loc - COUNTING_MIRACLE_LEN_MAX);
|
||||
DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
|
||||
|
||||
u32 count = 0;
|
||||
|
||||
s64a m_loc = start;
|
||||
|
||||
if (!cm->shufti) {
|
||||
u8 c = cm->c;
|
||||
|
||||
// Scan buffer.
|
||||
const s64a buf_scan_start = MAX(0, start);
|
||||
if (scan_end_loc > buf_scan_start) {
|
||||
const u8 *buf = ci->buf;
|
||||
const u8 *d = buf + scan_end_loc;
|
||||
const u8 *d_start = buf + buf_scan_start;
|
||||
const u8 *d_out;
|
||||
if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
|
||||
&d_out)) {
|
||||
assert(d_out >= d_start);
|
||||
m_loc = (d_out - d_start) + buf_scan_start;
|
||||
goto success;
|
||||
}
|
||||
}
|
||||
|
||||
// Scan history.
|
||||
if (start < 0) {
|
||||
const u8 *hbuf_end = ci->hbuf + ci->hlen;
|
||||
const u8 *d = hbuf_end + MIN(0, scan_end_loc);
|
||||
const u8 *d_start = hbuf_end + start;
|
||||
const u8 *d_out;
|
||||
if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
|
||||
&d_out)) {
|
||||
assert(d_out >= d_start);
|
||||
m_loc = (d_out - d_start) + start;
|
||||
goto success;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
m128 lo = cm->lo;
|
||||
m128 hi = cm->hi;
|
||||
u8 poison = cm->poison;
|
||||
|
||||
// Scan buffer.
|
||||
const s64a buf_scan_start = MAX(0, start);
|
||||
if (scan_end_loc > buf_scan_start) {
|
||||
const u8 *buf = ci->buf;
|
||||
const u8 *d = buf + scan_end_loc;
|
||||
const u8 *d_start = buf + buf_scan_start;
|
||||
const u8 *d_out;
|
||||
if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
|
||||
cm->count, &count, &d_out)) {
|
||||
assert(d_out >= d_start);
|
||||
m_loc = (d_out - d_start) + buf_scan_start;
|
||||
goto success;
|
||||
}
|
||||
}
|
||||
|
||||
// Scan history.
|
||||
if (start < 0) {
|
||||
const u8 *hbuf_end = ci->hbuf + ci->hlen;
|
||||
const u8 *d = hbuf_end + MIN(0, scan_end_loc);
|
||||
const u8 *d_start = hbuf_end + start;
|
||||
const u8 *d_out;
|
||||
if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
|
||||
cm->count, &count, &d_out)) {
|
||||
assert(d_out >= d_start);
|
||||
m_loc = (d_out - d_start) + start;
|
||||
goto success;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("found %u/%u\n", count, cm->count);
|
||||
return 0;
|
||||
|
||||
success:
|
||||
DEBUG_PRINTF("found %u/%u\n", count, cm->count);
|
||||
assert(count >= cm->count);
|
||||
assert(m_loc < scan_end_loc);
|
||||
assert(m_loc >= start);
|
||||
|
||||
*miracle_loc = m_loc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
390
src/rose/eod.c
Normal file
390
src/rose/eod.c
Normal file
@@ -0,0 +1,390 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "catchup.h"
|
||||
#include "match.h"
|
||||
#include "rose_sidecar_runtime.h"
|
||||
#include "rose.h"
|
||||
#include "util/fatbit.h"
|
||||
|
||||
static really_inline
|
||||
void initContext(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
struct hs_scratch *scratch, RoseCallback callback,
|
||||
RoseCallbackSom som_callback, void *ctx) {
|
||||
struct RoseRuntimeState *rstate = getRuntimeState(state);
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
tctxt->t = t;
|
||||
tctxt->depth = rstate->stored_depth;
|
||||
tctxt->groups = loadGroups(t, state); /* TODO: diff groups for eod */
|
||||
tctxt->lit_offset_adjust = scratch->core_info.buf_offset
|
||||
- scratch->core_info.hlen
|
||||
+ 1; // index after last byte
|
||||
tctxt->delayLastEndOffset = offset;
|
||||
tctxt->lastEndOffset = offset;
|
||||
tctxt->filledDelayedSlots = 0;
|
||||
tctxt->state = state;
|
||||
tctxt->cb = callback;
|
||||
tctxt->cb_som = som_callback;
|
||||
tctxt->userCtx = ctx;
|
||||
tctxt->lastMatchOffset = 0;
|
||||
tctxt->minMatchOffset = 0;
|
||||
tctxt->minNonMpvMatchOffset = 0;
|
||||
tctxt->next_mpv_offset = 0;
|
||||
tctxt->curr_anchored_loc = MMB_INVALID;
|
||||
tctxt->curr_row_offset = 0;
|
||||
|
||||
scratch->catchup_pq.qm_size = 0;
|
||||
scratch->al_log_sum = 0; /* clear the anchored logs */
|
||||
|
||||
fatbit_clear(scratch->aqa);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch,
|
||||
const char is_streaming) {
|
||||
assert(t->ematcherOffset);
|
||||
|
||||
size_t eod_len;
|
||||
const u8 *eod_data;
|
||||
if (!is_streaming) { /* Block */
|
||||
eod_data = scratch->core_info.buf;
|
||||
eod_len = scratch->core_info.len;
|
||||
} else { /* Streaming */
|
||||
eod_len = scratch->core_info.hlen;
|
||||
eod_data = scratch->core_info.hbuf;
|
||||
}
|
||||
|
||||
assert(eod_data);
|
||||
assert(eod_len);
|
||||
|
||||
// If we don't have enough bytes to produce a match from an EOD table scan,
|
||||
// there's no point scanning.
|
||||
if (eod_len < t->eodmatcherMinWidth) {
|
||||
DEBUG_PRINTF("len=%zu < eodmatcherMinWidth=%u\n", eod_len,
|
||||
t->eodmatcherMinWidth);
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
// Ensure that we only need scan the last N bytes, where N is the length of
|
||||
// the eod-anchored matcher region.
|
||||
size_t adj = eod_len - MIN(eod_len, t->ematcherRegionSize);
|
||||
|
||||
DEBUG_PRINTF("eod offset=%llu, eod length=%zu\n", offset, eod_len);
|
||||
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
|
||||
/* update side_curr for eod_len */
|
||||
tctxt->side_curr = offset - eod_len;
|
||||
|
||||
/* no need to enable any sidecar groups as they are for .*A.* constructs
|
||||
* not allowed in the eod table */
|
||||
|
||||
const struct HWLM *etable = getELiteralMatcher(t);
|
||||
|
||||
hwlmExec(etable, eod_data, eod_len, adj, roseCallback, tctxt, tctxt->groups);
|
||||
|
||||
// We may need to fire delayed matches
|
||||
u8 dummy_delay_mask = 0;
|
||||
return cleanUpDelayed(0, offset, tctxt, &dummy_delay_mask);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
if (!t->eodIterOffset) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
const struct RoseRole *roleTable = getRoleTable(t);
|
||||
const struct RosePred *predTable = getPredTable(t);
|
||||
const struct RoseIterMapping *iterMapBase
|
||||
= getByOffset(t, t->eodIterMapOffset);
|
||||
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset);
|
||||
assert(ISALIGNED(iterMapBase));
|
||||
assert(ISALIGNED(it));
|
||||
|
||||
// Sparse iterator state was allocated earlier
|
||||
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
|
||||
struct fatbit *handled_roles = scratch->handled_roles;
|
||||
|
||||
const u32 numStates = t->rolesWithStateCount;
|
||||
|
||||
void *role_state = getRoleState(state);
|
||||
u32 idx = 0;
|
||||
u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s);
|
||||
|
||||
fatbit_clear(handled_roles);
|
||||
|
||||
for (; i != MMB_INVALID;
|
||||
i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
|
||||
DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx);
|
||||
const struct RoseIterMapping *iterMap = iterMapBase + idx;
|
||||
const struct RoseIterRole *roles = getByOffset(t, iterMap->offset);
|
||||
assert(ISALIGNED(roles));
|
||||
|
||||
DEBUG_PRINTF("%u roles to consider\n", iterMap->count);
|
||||
for (u32 j = 0; j != iterMap->count; j++) {
|
||||
u32 role = roles[j].role;
|
||||
assert(role < t->roleCount);
|
||||
DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred);
|
||||
const struct RoseRole *tr = roleTable + role;
|
||||
|
||||
if (fatbit_isset(handled_roles, t->roleCount, role)) {
|
||||
DEBUG_PRINTF("role %u already handled by the walk, skip\n",
|
||||
role);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special case: if this role is a trivial case (pred type simple)
|
||||
// we don't need to check any history and we already know the pred
|
||||
// role is on.
|
||||
if (tr->flags & ROSE_ROLE_PRED_SIMPLE) {
|
||||
DEBUG_PRINTF("pred type is simple, no need for checks\n");
|
||||
} else {
|
||||
assert(roles[j].pred < t->predCount);
|
||||
const struct RosePred *tp = predTable + roles[j].pred;
|
||||
if (!roseCheckPredHistory(tp, offset)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* mark role as handled so we don't touch it again in this walk */
|
||||
fatbit_set(handled_roles, t->roleCount, role);
|
||||
|
||||
DEBUG_PRINTF("fire report for role %u, report=%u\n", role,
|
||||
tr->reportId);
|
||||
int rv = scratch->tctxt.cb(offset, tr->reportId,
|
||||
scratch->tctxt.userCtx);
|
||||
if (rv == MO_HALT_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void roseCheckNfaEod(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch, u64a offset,
|
||||
const char is_streaming) {
|
||||
/* data, len is used for state decompress, should be full available data */
|
||||
const u8 *aa = getActiveLeafArray(t, state);
|
||||
const u32 aaCount = t->activeArrayCount;
|
||||
|
||||
u8 key = 0;
|
||||
|
||||
if (is_streaming) {
|
||||
const u8 *eod_data = scratch->core_info.hbuf;
|
||||
size_t eod_len = scratch->core_info.hlen;
|
||||
key = eod_len ? eod_data[eod_len - 1] : 0;
|
||||
}
|
||||
|
||||
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
|
||||
qi = mmbit_iterate(aa, aaCount, qi)) {
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
const struct NFA *nfa = getNfaByInfo(t, info);
|
||||
|
||||
if (!nfaAcceptsEod(nfa)) {
|
||||
DEBUG_PRINTF("nfa %u does not accept eod\n", qi);
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("checking nfa %u\n", qi);
|
||||
|
||||
char *fstate = scratch->fullState + info->fullStateOffset;
|
||||
const char *sstate = (const char *)state + info->stateOffset;
|
||||
|
||||
if (is_streaming) {
|
||||
// Decompress stream state.
|
||||
nfaExpandState(nfa, fstate, sstate, offset, key);
|
||||
}
|
||||
|
||||
nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb,
|
||||
scratch->tctxt.cb_som, scratch->tctxt.userCtx);
|
||||
}
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void cleanupAfterEodMatcher(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
|
||||
// Flush history to make sure it's consistent.
|
||||
roseFlushLastByteHistory(t, state, offset, tctxt);
|
||||
|
||||
// Catch up the sidecar to cope with matches raised in the etable.
|
||||
catchup_sidecar(tctxt, offset);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void roseCheckEodSuffixes(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
const u8 *aa = getActiveLeafArray(t, state);
|
||||
const u32 aaCount = t->activeArrayCount;
|
||||
UNUSED u32 qCount = t->queueCount;
|
||||
|
||||
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
|
||||
qi = mmbit_iterate(aa, aaCount, qi)) {
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
const struct NFA *nfa = getNfaByInfo(t, info);
|
||||
|
||||
assert(nfaAcceptsEod(nfa));
|
||||
|
||||
DEBUG_PRINTF("checking nfa %u\n", qi);
|
||||
|
||||
assert(fatbit_isset(scratch->aqa, qCount, qi)); /* we have just been
|
||||
triggered */
|
||||
|
||||
char *fstate = scratch->fullState + info->fullStateOffset;
|
||||
const char *sstate = (const char *)state + info->stateOffset;
|
||||
|
||||
struct mq *q = scratch->queues + qi;
|
||||
|
||||
pushQueueNoMerge(q, MQE_END, scratch->core_info.len);
|
||||
|
||||
q->context = NULL;
|
||||
/* rose exec is used as we don't want to / can't raise matches in the
|
||||
* history buffer. */
|
||||
char rv = nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX);
|
||||
if (rv) { /* nfa is still alive */
|
||||
nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb,
|
||||
scratch->tctxt.cb_som, scratch->tctxt.userCtx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
struct hs_scratch *scratch, const char is_streaming) {
|
||||
assert(t);
|
||||
assert(scratch->core_info.buf || scratch->core_info.hbuf);
|
||||
assert(!scratch->core_info.buf || !scratch->core_info.hbuf);
|
||||
assert(!can_stop_matching(scratch));
|
||||
|
||||
// Fire the special EOD event literal.
|
||||
if (t->hasEodEventLiteral) {
|
||||
DEBUG_PRINTF("firing eod event id %u at offset %llu\n",
|
||||
t->eodLiteralId, offset);
|
||||
const struct core_info *ci = &scratch->core_info;
|
||||
size_t len = ci->buf ? ci->len : ci->hlen;
|
||||
assert(len || !ci->buf); /* len may be 0 if no history is required
|
||||
* (bounds checks only can lead to this) */
|
||||
|
||||
roseRunEvent(len, t->eodLiteralId, &scratch->tctxt);
|
||||
if (can_stop_matching(scratch)) {
|
||||
DEBUG_PRINTF("user told us to stop\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
roseCheckNfaEod(t, state, scratch, offset, is_streaming);
|
||||
|
||||
if (!t->eodIterOffset && !t->ematcherOffset) {
|
||||
DEBUG_PRINTF("no eod accepts\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Handle pending EOD reports.
|
||||
int itrv = roseEodRunIterator(t, state, offset, scratch);
|
||||
if (itrv == MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Run the EOD anchored matcher if there is one.
|
||||
if (t->ematcherOffset) {
|
||||
assert(t->ematcherRegionSize);
|
||||
// Unset the reports we just fired so we don't fire them again below.
|
||||
mmbit_clear(getRoleState(state), t->rolesWithStateCount);
|
||||
mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount);
|
||||
sidecar_enabled_populate(t, scratch, state);
|
||||
|
||||
hwlmcb_rv_t rv = roseEodRunMatcher(t, offset, scratch, is_streaming);
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
cleanupAfterEodMatcher(t, state, offset, scratch);
|
||||
|
||||
// Fire any new EOD reports.
|
||||
roseEodRunIterator(t, state, offset, scratch);
|
||||
|
||||
roseCheckEodSuffixes(t, state, offset, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
void roseEodExec(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
struct hs_scratch *scratch, RoseCallback callback,
|
||||
RoseCallbackSom som_callback, void *context) {
|
||||
assert(state);
|
||||
assert(scratch);
|
||||
assert(callback);
|
||||
assert(context);
|
||||
assert(t->requiresEodCheck);
|
||||
DEBUG_PRINTF("ci buf %p/%zu his %p/%zu\n", scratch->core_info.buf,
|
||||
scratch->core_info.len, scratch->core_info.hbuf,
|
||||
scratch->core_info.hlen);
|
||||
|
||||
if (t->maxBiAnchoredWidth != ROSE_BOUND_INF
|
||||
&& offset > t->maxBiAnchoredWidth) {
|
||||
DEBUG_PRINTF("bailing, we are beyond max width\n");
|
||||
/* also some of the history/state may be stale */
|
||||
return;
|
||||
}
|
||||
|
||||
initContext(t, state, offset, scratch, callback, som_callback, context);
|
||||
|
||||
roseEodExec_i(t, state, offset, scratch, 1);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void prepForEod(const struct RoseEngine *t, u8 *state, size_t length,
|
||||
struct RoseContext *tctxt) {
|
||||
roseFlushLastByteHistory(t, state, length, tctxt);
|
||||
tctxt->lastEndOffset = length;
|
||||
if (t->requiresEodSideCatchup) {
|
||||
catchup_sidecar(tctxt, length);
|
||||
}
|
||||
}
|
||||
|
||||
void roseBlockEodExec(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
assert(t->requiresEodCheck);
|
||||
assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF
|
||||
|| offset <= t->maxBiAnchoredWidth);
|
||||
|
||||
assert(!can_stop_matching(scratch));
|
||||
|
||||
u8 *state = (u8 *)scratch->core_info.state;
|
||||
|
||||
// Ensure that history is correct before we look for EOD matches
|
||||
prepForEod(t, state, scratch->core_info.len, &scratch->tctxt);
|
||||
|
||||
roseEodExec_i(t, state, offset, scratch, 0);
|
||||
}
|
154
src/rose/infix.h
Normal file
154
src/rose/infix.h
Normal file
@@ -0,0 +1,154 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef INFIX_H
|
||||
#define INFIX_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "nfa/nfa_api.h"
|
||||
#include "nfa/nfa_api_queue.h"
|
||||
|
||||
static really_inline
|
||||
int infixTooOld(struct mq *q, s64a curr_loc) {
|
||||
u32 maxAge = q->nfa->maxWidth;
|
||||
|
||||
if (!maxAge) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return q_last_loc(q) + maxAge < curr_loc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes tops which are known not to affect the final state from the queue.
|
||||
* May also reinitialise the engine state if it is unneeded.
|
||||
*
|
||||
* maxAge is the maximum width of the infix. Any tops/state before this can be
|
||||
* ignored. 0 is used to indicate that there is no upper bound on the width of
|
||||
* the pattern.
|
||||
*
|
||||
* maxTops is the maximum number of locations of tops that can affect the top.
|
||||
* It is only possible for the last maxTops tops to affect the final state -
|
||||
* earlier ones can be safely removed. Also, any state before the max tops may
|
||||
* be ignored.
|
||||
*
|
||||
* This code assumes/requires that there are not multiple tops at the same
|
||||
* location in the queue. This code also assumes that it is not a multitop
|
||||
* engine.
|
||||
*/
|
||||
static really_inline
|
||||
void reduceQueue(struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) {
|
||||
assert(q->end > q->cur);
|
||||
assert(maxTops);
|
||||
u32 qlen = q->end - q->cur; /* includes MQE_START */
|
||||
DEBUG_PRINTF("q=%p, len=%u, maxTops=%u maxAge=%u\n", q, qlen, maxTops,
|
||||
maxAge);
|
||||
char any_work = 0;
|
||||
if (maxAge && q->items[q->cur].location + maxAge < curr_loc) {
|
||||
any_work = 1;
|
||||
}
|
||||
|
||||
if (qlen - 1 > maxTops) {
|
||||
any_work = 1;
|
||||
}
|
||||
|
||||
if (qlen - 1 == maxTops
|
||||
&& q->items[q->cur].location != q->items[q->cur + 1].location) {
|
||||
/* we can advance start to the first top location */
|
||||
any_work = 1;
|
||||
}
|
||||
|
||||
if (!any_work) {
|
||||
DEBUG_PRINTF("nothing to do\n");
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
debugQueue(q);
|
||||
#endif
|
||||
|
||||
char drop_state = qlen - 1 >= maxTops
|
||||
|| (maxAge && q->items[q->cur].location + maxAge < curr_loc);
|
||||
|
||||
LIMIT_TO_AT_MOST(&maxTops, qlen - 1);
|
||||
|
||||
// We leave our START where it is, at the front of the queue.
|
||||
assert(q->items[q->cur].type == MQE_START);
|
||||
|
||||
// We want to shuffle maxQueueLen items from the end of the queue to just
|
||||
// after the start, effectively dequeuing old items. We could use memmove
|
||||
// for this, but it's probably not a good idea to take the cost of the
|
||||
// function call.
|
||||
const struct mq_item *src = &q->items[q->cur + qlen - maxTops];
|
||||
|
||||
q->items[0] = q->items[q->cur]; /* shift start event to 0 slot */
|
||||
q->cur = 0;
|
||||
q->end = 1;
|
||||
struct mq_item *dst = &q->items[1];
|
||||
u32 i = 0;
|
||||
if (maxAge) {
|
||||
/* any event which is older than maxAge can be dropped */
|
||||
for (; i < maxTops; i++, src++) {
|
||||
if (src->location >= curr_loc - maxAge) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < maxTops; i++) {
|
||||
*dst = *src;
|
||||
src++;
|
||||
dst++;
|
||||
q->end++;
|
||||
}
|
||||
|
||||
if (drop_state) {
|
||||
/* clear state and shift start up to first top */
|
||||
s64a new_loc;
|
||||
if (q->end > 1) {
|
||||
new_loc = q->items[1].location;
|
||||
} else {
|
||||
DEBUG_PRINTF("no tops\n");
|
||||
new_loc = curr_loc;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("advancing start from %lld to %lld\n",
|
||||
q->items[0].location, new_loc);
|
||||
assert(new_loc > q->items[0].location);
|
||||
q->items[0].location = new_loc;
|
||||
nfaQueueInitState(q->nfa, q);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("reduced queue to len=%u\n", q->end - q->cur);
|
||||
#ifdef DEBUG
|
||||
debugQueue(q);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
120
src/rose/init.c
Normal file
120
src/rose/init.c
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "init.h"
|
||||
#include "match.h"
|
||||
#include "runtime.h"
|
||||
#include "scratch.h"
|
||||
#include "rose.h"
|
||||
#include "rose_common.h"
|
||||
#include "rose_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/mcclellan.h"
|
||||
#include "nfa/nfa_api_util.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "sidecar/sidecar.h"
|
||||
#include "sidecar/sidecar_internal.h"
|
||||
#include "util/multibit.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static really_inline
|
||||
void init_rstate(const struct RoseEngine *t, u8 *state) {
|
||||
// Set runtime state: initial depth is 1 and we take our initial groups
|
||||
// from the RoseEngine.
|
||||
DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups);
|
||||
struct RoseRuntimeState *rstate = getRuntimeState(state);
|
||||
rstate->stored_depth = 1;
|
||||
storeGroups(t, state, t->initialGroups);
|
||||
rstate->flags = 0;
|
||||
rstate->broken = NOT_BROKEN;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void init_sidecar(const struct RoseEngine *t, u8 *state) {
|
||||
assert(getSLiteralMatcher(t));
|
||||
|
||||
struct sidecar_enabled *enabled_state
|
||||
= (struct sidecar_enabled *)(state + t->stateOffsets.sidecar);
|
||||
|
||||
DEBUG_PRINTF("welcome to the sidecar\n");
|
||||
assert(t->initSideEnableOffset);
|
||||
// We have to enable some sidecar literals
|
||||
const char *template = (const char *)t + t->initSideEnableOffset;
|
||||
|
||||
memcpy(enabled_state, template, t->stateOffsets.sidecar_size);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void init_outfixes(const struct RoseEngine *t, u8 *state) {
|
||||
/* The active leaf array has been init'ed by the scatter with outfix
|
||||
* bits set on */
|
||||
|
||||
// Init the NFA state for each outfix.
|
||||
for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
const struct NFA *nfa = getNfaByInfo(t, info);
|
||||
nfaInitCompressedState(nfa, 0, state + info->stateOffset,
|
||||
0 /* assume NUL at start */);
|
||||
}
|
||||
|
||||
if (t->initMpvNfa != MO_INVALID_IDX) {
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, t->initMpvNfa);
|
||||
const struct NFA *nfa = getNfaByInfo(t, info);
|
||||
nfaInitCompressedState(nfa, 0, state + info->stateOffset,
|
||||
0 /* assume NUL at start */);
|
||||
mmbit_set(getActiveLeafArray(t, state), t->activeArrayCount,
|
||||
t->initMpvNfa);
|
||||
}
|
||||
}
|
||||
|
||||
void roseInitState(const struct RoseEngine *t, u8 *state) {
|
||||
assert(t);
|
||||
assert(state);
|
||||
|
||||
DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n",
|
||||
t, t->roleCount, t->rolesWithStateCount);
|
||||
|
||||
// Rose is guaranteed 8-aligned state
|
||||
assert(ISALIGNED_N(state, 8));
|
||||
|
||||
init_rstate(t, state);
|
||||
|
||||
// Init the sidecar state
|
||||
if (t->smatcherOffset) {
|
||||
init_sidecar(t, state);
|
||||
}
|
||||
|
||||
init_state(t, state);
|
||||
init_outfixes(t, state);
|
||||
|
||||
// Clear the floating matcher state, if any.
|
||||
DEBUG_PRINTF("clearing %u bytes of floating matcher state\n",
|
||||
t->floatingStreamState);
|
||||
memset(getFloatingMatcherState(t, state), 0, t->floatingStreamState);
|
||||
}
|
46
src/rose/init.h
Normal file
46
src/rose/init.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_INIT_H
|
||||
#define ROSE_INIT_H
|
||||
|
||||
#include "rose_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/scatter_runtime.h"
|
||||
|
||||
/*
|
||||
* Initialisation code common to streaming mode Rose (init.c) and block mode
|
||||
* Rose (block.c) code.
|
||||
*/
|
||||
|
||||
static really_inline
|
||||
void init_state(const struct RoseEngine *t, u8 *state) {
|
||||
scatter(state, t, &t->state_init);
|
||||
}
|
||||
|
||||
#endif // ROSE_INIT_H
|
2127
src/rose/match.c
Normal file
2127
src/rose/match.c
Normal file
File diff suppressed because it is too large
Load Diff
326
src/rose/match.h
Normal file
326
src/rose/match.h
Normal file
@@ -0,0 +1,326 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_MATCH_H
|
||||
#define ROSE_MATCH_H
|
||||
|
||||
#include "hwlm/hwlm.h"
|
||||
#include "runtime.h"
|
||||
#include "scratch.h"
|
||||
#include "rose_common.h"
|
||||
#include "rose_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/nfa_api.h"
|
||||
#include "nfa/nfa_api_queue.h"
|
||||
#include "nfa/nfa_api_util.h"
|
||||
#include "som/som_runtime.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/internal_report.h"
|
||||
#include "util/multibit.h"
|
||||
|
||||
/* Callbacks, defined in catchup.c */
|
||||
|
||||
int roseNfaAdaptor(u64a offset, ReportID id, void *context);
|
||||
int roseNfaAdaptorNoInternal(u64a offset, ReportID id, void *context);
|
||||
int roseNfaSomAdaptor(u64a from_offset, u64a offset, ReportID id, void *context);
|
||||
|
||||
/* Callbacks, defined in match.c */
|
||||
|
||||
hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctx);
|
||||
hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
|
||||
void *ctx);
|
||||
int roseAnchoredCallback(u64a end, u32 id, void *ctx);
|
||||
void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt);
|
||||
|
||||
/* Common code, used all over Rose runtime */
|
||||
|
||||
static rose_inline
|
||||
void resetAnchoredLog(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||
u8 **anchoredRows = getAnchoredLog(scratch);
|
||||
u32 region_width = t->anchoredMatches;
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
|
||||
tctxt->curr_anchored_loc = bf64_iterate(scratch->am_log_sum, MMB_INVALID);
|
||||
if (tctxt->curr_anchored_loc != MMB_INVALID) {
|
||||
assert(tctxt->curr_anchored_loc < scratch->anchored_region_len);
|
||||
u8 *curr_row = anchoredRows[tctxt->curr_anchored_loc];
|
||||
tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width,
|
||||
MMB_INVALID);
|
||||
assert(tctxt->curr_row_offset != MMB_INVALID);
|
||||
}
|
||||
DEBUG_PRINTF("AL reset --> %u, %u\n", tctxt->curr_anchored_loc,
|
||||
tctxt->curr_row_offset);
|
||||
}
|
||||
|
||||
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r,
|
||||
u64a end, struct RoseContext *tctxt,
|
||||
char in_anchored, char in_catchup);
|
||||
|
||||
static really_inline
|
||||
void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
|
||||
struct RoseContext *tctxt) {
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
assert(scratch->fullState);
|
||||
q->nfa = getNfaByInfo(t, info);
|
||||
q->end = 0;
|
||||
q->cur = 0;
|
||||
q->state = scratch->fullState + info->fullStateOffset;
|
||||
q->streamState = (char *)tctxt->state + info->stateOffset;
|
||||
q->offset = scratch->core_info.buf_offset;
|
||||
q->buffer = scratch->core_info.buf;
|
||||
q->length = scratch->core_info.len;
|
||||
q->history = scratch->core_info.hbuf;
|
||||
q->hlength = scratch->core_info.hlen;
|
||||
if (info->only_external) {
|
||||
q->cb = roseNfaAdaptorNoInternal;
|
||||
} else {
|
||||
q->cb = roseNfaAdaptor;
|
||||
}
|
||||
q->som_cb = roseNfaSomAdaptor;
|
||||
q->context = tctxt;
|
||||
q->report_current = 0;
|
||||
|
||||
DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
|
||||
"state=%u\n", qi, q->offset, info->fullStateOffset,
|
||||
info->stateOffset, *(u32 *)q->state);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void initRoseQueue(const struct RoseEngine *t, u32 qi,
|
||||
const struct LeftNfaInfo *left,
|
||||
struct RoseContext *tctxt) {
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
struct mq *q = scratch->queues + qi;
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
q->nfa = getNfaByInfo(t, info);
|
||||
q->end = 0;
|
||||
q->cur = 0;
|
||||
q->state = scratch->fullState + info->fullStateOffset;
|
||||
|
||||
// Transient roses don't have stream state, we use tstate in scratch
|
||||
// instead. The only reason we need this at ALL is for LimEx extended
|
||||
// regions, which assume that they have access to q->streamState +
|
||||
// compressedStateSize.
|
||||
if (left->transient) {
|
||||
q->streamState = (char *)scratch->tstate + info->stateOffset;
|
||||
} else {
|
||||
q->streamState = (char *)tctxt->state + info->stateOffset;
|
||||
}
|
||||
|
||||
q->offset = scratch->core_info.buf_offset;
|
||||
q->buffer = scratch->core_info.buf;
|
||||
q->length = scratch->core_info.len;
|
||||
q->history = scratch->core_info.hbuf;
|
||||
q->hlength = scratch->core_info.hlen;
|
||||
q->cb = NULL;
|
||||
q->context = NULL;
|
||||
q->report_current = 0;
|
||||
|
||||
DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
|
||||
"state=%u\n", qi, q->offset, info->fullStateOffset,
|
||||
info->stateOffset, *(u32 *)q->state);
|
||||
}
|
||||
|
||||
/** returns 0 if space for two items (top and end) on the queue */
|
||||
static really_inline
|
||||
char isQueueFull(const struct mq *q) {
|
||||
return q->end + 2 > MAX_MQE_LEN;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void loadStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
|
||||
DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
|
||||
q->offset, q->length, q->hlength, loc);
|
||||
nfaExpandState(nfa, q->state, q->streamState, q->offset + loc,
|
||||
queue_prev_byte(q, loc));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void storeRoseDelay(const struct RoseEngine *t, u8 *state,
|
||||
const struct LeftNfaInfo *left, u32 loc) {
|
||||
u32 di = left->lagIndex;
|
||||
if (di == ROSE_OFFSET_INVALID) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(loc < 256); // ONE WHOLE BYTE!
|
||||
DEBUG_PRINTF("storing rose delay %u in slot %u\n", loc, di);
|
||||
u8 *leftfixDelay = getLeftfixLagTable(t, state);
|
||||
assert(loc <= MAX_STORED_LEFTFIX_LAG);
|
||||
leftfixDelay[di] = loc;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void setAsZombie(const struct RoseEngine *t, u8 *state,
|
||||
const struct LeftNfaInfo *left) {
|
||||
u32 di = left->lagIndex;
|
||||
assert(di != ROSE_OFFSET_INVALID);
|
||||
if (di == ROSE_OFFSET_INVALID) {
|
||||
return;
|
||||
}
|
||||
|
||||
u8 *leftfixDelay = getLeftfixLagTable(t, state);
|
||||
leftfixDelay[di] = OWB_ZOMBIE_ALWAYS_YES;
|
||||
}
|
||||
|
||||
/* loadRoseDelay MUST NOT be called on the first stream write as it is only
|
||||
* initialized for running nfas on stream boundaries */
|
||||
static really_inline
|
||||
u32 loadRoseDelay(const struct RoseEngine *t, const u8 *state,
|
||||
const struct LeftNfaInfo *left) {
|
||||
u32 di = left->lagIndex;
|
||||
if (di == ROSE_OFFSET_INVALID) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
|
||||
u32 loc = leftfixDelay[di];
|
||||
DEBUG_PRINTF("read rose delay %u from slot %u\n", loc, di);
|
||||
return loc;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char isZombie(const struct RoseEngine *t, const u8 *state,
|
||||
const struct LeftNfaInfo *left) {
|
||||
u32 di = left->lagIndex;
|
||||
assert(di != ROSE_OFFSET_INVALID);
|
||||
if (di == ROSE_OFFSET_INVALID) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
|
||||
DEBUG_PRINTF("read owb %hhu from slot %u\n", leftfixDelay[di], di);
|
||||
return leftfixDelay[di] == OWB_ZOMBIE_ALWAYS_YES;
|
||||
}
|
||||
|
||||
hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a end);
|
||||
|
||||
static really_inline
|
||||
hwlmcb_rv_t flushQueuedLiterals(struct RoseContext *tctxt, u64a end) {
|
||||
if (tctxt->delayLastEndOffset == end) {
|
||||
DEBUG_PRINTF("no progress, no flush\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
if (!tctxt->filledDelayedSlots && !tctxtToScratch(tctxt)->al_log_sum) {
|
||||
tctxt->delayLastEndOffset = end;
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
return flushQueuedLiterals_i(tctxt, end);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, struct RoseContext *tctxt,
|
||||
u8 *status) {
|
||||
if (can_stop_matching(tctxtToScratch(tctxt))) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
if (flushQueuedLiterals(tctxt, length + offset)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
if (tctxt->filledDelayedSlots) {
|
||||
DEBUG_PRINTF("dirty\n");
|
||||
*status |= DELAY_FLOAT_DIRTY;
|
||||
} else {
|
||||
*status &= ~DELAY_FLOAT_DIRTY;
|
||||
}
|
||||
|
||||
tctxt->filledDelayedSlots = 0;
|
||||
tctxt->delayLastEndOffset = offset;
|
||||
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void update_depth(struct RoseContext *tctxt, const struct RoseRole *tr) {
|
||||
u8 d = MAX(tctxt->depth, tr->depth + 1);
|
||||
assert(d >= tctxt->depth);
|
||||
DEBUG_PRINTF("depth now %hhu was %hhu\n", d, tctxt->depth);
|
||||
tctxt->depth = d;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int roseCheckHistoryAnch(const struct RosePred *tp, u64a end) {
|
||||
DEBUG_PRINTF("end %llu min %u max %u\n", end, tp->minBound, tp->maxBound);
|
||||
if (tp->maxBound == ROSE_BOUND_INF) {
|
||||
return end >= tp->minBound;
|
||||
} else {
|
||||
return end >= tp->minBound && end <= tp->maxBound;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that a predecessor's history requirements are satisfied.
|
||||
static really_inline
|
||||
int roseCheckPredHistory(const struct RosePred *tp, u64a end) {
|
||||
DEBUG_PRINTF("pred type %u\n", tp->historyCheck);
|
||||
|
||||
if (tp->historyCheck == ROSE_ROLE_HISTORY_ANCH) {
|
||||
return roseCheckHistoryAnch(tp, end);
|
||||
}
|
||||
|
||||
assert(tp->historyCheck == ROSE_ROLE_HISTORY_NONE ||
|
||||
tp->historyCheck == ROSE_ROLE_HISTORY_LAST_BYTE);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Note: uses the stashed sparse iter state; cannot be called from
|
||||
* anybody else who is using it */
|
||||
static rose_inline
|
||||
void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state,
|
||||
u64a currEnd, struct RoseContext *tctxt) {
|
||||
if (!t->lastByteHistoryIterOffset) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
|
||||
/* currEnd is last byte of string + 1 */
|
||||
if (tctxt->lastEndOffset == ci->buf_offset + ci->len
|
||||
|| currEnd != ci->buf_offset + ci->len) {
|
||||
/* already flushed or it is not yet time to flush */
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("flushing\n");
|
||||
|
||||
const struct mmbit_sparse_iter *it
|
||||
= (const void *)((const char *)t + t->lastByteHistoryIterOffset);
|
||||
const u32 numStates = t->rolesWithStateCount;
|
||||
void *role_state = getRoleState(state);
|
||||
|
||||
mmbit_sparse_iter_unset(role_state, numStates, it,
|
||||
scratch->sparse_iter_state);
|
||||
}
|
||||
|
||||
#endif
|
138
src/rose/miracle.h
Normal file
138
src/rose/miracle.h
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_MIRACLE_H
|
||||
#define ROSE_MIRACLE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "runtime.h"
|
||||
#include "rose_internal.h"
|
||||
|
||||
/** \brief Maximum number of bytes to scan when looking for a "miracle" stop
|
||||
* character. */
|
||||
#define MIRACLE_LEN_MAX 32
|
||||
|
||||
static really_inline
|
||||
u64a roseMiracleScan(const u8 *stop, const u8 *d, const u8 *d_start) {
|
||||
assert(d >= d_start);
|
||||
|
||||
// Note: unrolling this loop manually does appear to reduce its
|
||||
// performance. I'm sick of tilting at this particular windmill.
|
||||
|
||||
u32 mshift = 0;
|
||||
do {
|
||||
u64a s = (u64a)stop[*d];
|
||||
if (s) {
|
||||
s <<= mshift;
|
||||
return s;
|
||||
}
|
||||
mshift++;
|
||||
} while (--d >= d_start);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief "Miracle" scan: uses stop table to check if we can skip forward to a
|
||||
* location where we know that the given rose engine will be in a known state.
|
||||
*
|
||||
* Scans the buffer/history between relative locations \a begin_loc and \a
|
||||
* end_loc, and returns a miracle location (if any) that appears in the stream
|
||||
* after \a begin_loc.
|
||||
*
|
||||
* Returns 1 if some bytes can be skipped and sets \a miracle_loc
|
||||
* appropriately, 0 otherwise.
|
||||
*/
|
||||
static rose_inline
|
||||
char roseMiracleOccurs(const struct RoseEngine *t,
|
||||
const struct LeftNfaInfo *left,
|
||||
const struct core_info *ci, const s64a begin_loc,
|
||||
const s64a end_loc, s64a *miracle_loc) {
|
||||
assert(!left->transient);
|
||||
assert(left->stopTable);
|
||||
|
||||
DEBUG_PRINTF("looking for miracle over [%lld,%lld], maxLag=%u\n",
|
||||
begin_loc, end_loc, left->maxLag);
|
||||
DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
|
||||
|
||||
assert(begin_loc <= end_loc);
|
||||
assert(begin_loc >= -(s64a)ci->hlen);
|
||||
assert(end_loc <= (s64a)ci->len);
|
||||
|
||||
const u8 *stop = getByOffset(t, left->stopTable);
|
||||
|
||||
const s64a scan_end_loc = end_loc - left->maxLag;
|
||||
if (scan_end_loc <= begin_loc) {
|
||||
DEBUG_PRINTF("nothing to scan\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const s64a start = MAX(begin_loc, scan_end_loc - MIRACLE_LEN_MAX);
|
||||
DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
|
||||
|
||||
u64a s = 0; // state, on bits are miracle locations
|
||||
|
||||
// Scan buffer.
|
||||
const s64a buf_scan_start = MAX(0, start);
|
||||
if (scan_end_loc > buf_scan_start) {
|
||||
const u8 *buf = ci->buf;
|
||||
const u8 *d = buf + scan_end_loc - 1;
|
||||
const u8 *d_start = buf + buf_scan_start;
|
||||
s = roseMiracleScan(stop, d, d_start);
|
||||
if (s) {
|
||||
goto miracle_found;
|
||||
}
|
||||
}
|
||||
|
||||
// Scan history.
|
||||
if (start < 0) {
|
||||
const u8 *hbuf_end = ci->hbuf + ci->hlen;
|
||||
const u8 *d = hbuf_end + MIN(0, scan_end_loc) - 1;
|
||||
const u8 *d_start = hbuf_end + start;
|
||||
s = roseMiracleScan(stop, d, d_start);
|
||||
if (scan_end_loc > 0) {
|
||||
// Shift s over to account for the buffer scan above.
|
||||
s <<= scan_end_loc;
|
||||
}
|
||||
}
|
||||
|
||||
if (s) {
|
||||
miracle_found:
|
||||
DEBUG_PRINTF("s=0x%llx, ctz=%u\n", s, ctz64(s));
|
||||
s64a loc = end_loc - left->maxLag - ctz64(s) - 1;
|
||||
if (loc > begin_loc) {
|
||||
DEBUG_PRINTF("miracle at %lld\n", loc);
|
||||
*miracle_loc = loc;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("no viable miraculous stop characters found\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // ROSE_MIRACLE_H
|
110
src/rose/rose.h
Normal file
110
src/rose/rose.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_H
|
||||
#define ROSE_H
|
||||
|
||||
#include "rose_types.h"
|
||||
#include "rose_internal.h"
|
||||
#include "runtime.h"
|
||||
#include "scratch.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/multibit.h"
|
||||
|
||||
// Initialise state space for engine use.
|
||||
void roseInitState(const struct RoseEngine *t, u8 *state);
|
||||
|
||||
void roseBlockEodExec(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch);
|
||||
void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
RoseCallback callback, RoseCallbackSom som_callback,
|
||||
void *context);
|
||||
|
||||
/* assumes core_info in scratch has been init to point to data */
|
||||
static really_inline
|
||||
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
RoseCallback callback, RoseCallbackSom som_callback,
|
||||
void *context) {
|
||||
assert(t);
|
||||
assert(scratch);
|
||||
assert(scratch->core_info.buf);
|
||||
|
||||
// If this block is shorter than our minimum width, then no pattern in this
|
||||
// RoseEngine could match.
|
||||
/* minWidth checks should have already been performed by the caller */
|
||||
const size_t length = scratch->core_info.len;
|
||||
assert(length >= t->minWidth);
|
||||
|
||||
// Similarly, we may have a maximum width (for engines constructed entirely
|
||||
// of bi-anchored patterns).
|
||||
/* This check is now handled by the interpreter */
|
||||
assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF
|
||||
|| length <= t->maxBiAnchoredWidth);
|
||||
|
||||
roseBlockExec_i(t, scratch, callback, som_callback, context);
|
||||
|
||||
if (!t->requiresEodCheck) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (can_stop_matching(scratch)) {
|
||||
DEBUG_PRINTF("bailing, already halted\n");
|
||||
return;
|
||||
}
|
||||
|
||||
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
|
||||
const u32 numStates = t->rolesWithStateCount;
|
||||
u8 *state = (u8 *)scratch->core_info.state;
|
||||
void *role_state = getRoleState(state);
|
||||
u32 idx = 0;
|
||||
const struct mmbit_sparse_iter *it
|
||||
= (const void *)((const u8 *)t + t->eodIterOffset);
|
||||
|
||||
if (!t->ematcherOffset && !t->hasEodEventLiteral
|
||||
&& !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)
|
||||
&& (!t->eodIterOffset
|
||||
|| mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s)
|
||||
== MMB_INVALID)) {
|
||||
return;
|
||||
}
|
||||
|
||||
roseBlockEodExec(t, length, scratch);
|
||||
}
|
||||
|
||||
/* assumes core_info in scratch has been init to point to data */
|
||||
void roseStreamExec(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch, RoseCallback callback,
|
||||
RoseCallbackSom som_callback, void *context);
|
||||
|
||||
void roseEodExec(const struct RoseEngine *t, u8 *state, u64a offset,
|
||||
struct hs_scratch *scratch, RoseCallback callback,
|
||||
RoseCallbackSom som_callback, void *context);
|
||||
|
||||
#define ROSE_CONTINUE_MATCHING_NO_EXHAUST 2
|
||||
|
||||
#endif // ROSE_H
|
151
src/rose/rose_build.h
Normal file
151
src/rose/rose_build.h
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose Build interface.
|
||||
*
|
||||
* Rose Build interface. Everything you ever needed to feed literals in and
|
||||
* get a RoseEngine out. This header should be everything needed by the rest
|
||||
* of UE2.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_H
|
||||
#define ROSE_BUILD_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "rose_common.h"
|
||||
#include "rose_in_graph.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
struct NFA;
|
||||
struct SmallWriteEngine;
|
||||
struct RoseEngine;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct BoundaryReports;
|
||||
struct CompileContext;
|
||||
struct raw_puff;
|
||||
struct raw_som_dfa;
|
||||
class CharReach;
|
||||
class NGHolder;
|
||||
class ReportManager;
|
||||
class SomSlotManager;
|
||||
|
||||
class RoseDedupeAux {
|
||||
public:
|
||||
virtual ~RoseDedupeAux();
|
||||
|
||||
/** \brief True if we can not establish that at most a single callback will
|
||||
* be generated at a given offset from this set of reports. */
|
||||
virtual bool requiresDedupeSupport(const std::set<ReportID> &reports) const
|
||||
= 0;
|
||||
};
|
||||
|
||||
/** \brief Abstract interface intended for callers from elsewhere in the tree,
|
||||
* real underlying implementation is RoseBuildImpl in rose_build_impl.h. */
|
||||
class RoseBuild : boost::noncopyable {
|
||||
public:
|
||||
virtual ~RoseBuild();
|
||||
|
||||
/** \brief Adds a single literal. */
|
||||
virtual void add(bool anchored, bool eod, const ue2_literal &lit,
|
||||
const ue2::flat_set<ReportID> &ids) = 0;
|
||||
|
||||
virtual bool addRose(const RoseInGraph &ig, bool prefilter,
|
||||
bool finalChance = false) = 0;
|
||||
virtual bool addSombeRose(const RoseInGraph &ig) = 0;
|
||||
|
||||
virtual bool addOutfix(const NGHolder &h) = 0;
|
||||
virtual bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) = 0;
|
||||
virtual bool addOutfix(const raw_puff &rp) = 0;
|
||||
|
||||
virtual bool addChainTail(const raw_puff &rp, u32 *queue_out,
|
||||
u32 *event_out) = 0;
|
||||
|
||||
/** \brief Returns true if we were able to add it as a mask. */
|
||||
virtual bool add(bool anchored, const std::vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports) = 0;
|
||||
|
||||
/** \brief Attempts to add the graph to the anchored acyclic table. Returns
|
||||
* true on success. */
|
||||
virtual bool addAnchoredAcyclic(const NGHolder &graph) = 0;
|
||||
|
||||
virtual bool validateMask(const std::vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports,
|
||||
bool anchored, bool eod) const = 0;
|
||||
virtual void addMask(const std::vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports, bool anchored,
|
||||
bool eod) = 0;
|
||||
|
||||
/** \brief Construct a runtime implementation. */
|
||||
virtual ue2::aligned_unique_ptr<RoseEngine> buildRose(u32 minWidth) = 0;
|
||||
|
||||
virtual std::unique_ptr<RoseDedupeAux> generateDedupeAux() const = 0;
|
||||
|
||||
/** Get a unique report identifier for a prefix|infix engine */
|
||||
virtual ReportID getNewNfaReport() = 0;
|
||||
|
||||
/** Note that we have seen a SOM pattern. */
|
||||
virtual void setSom() = 0;
|
||||
};
|
||||
|
||||
// Construct a usable Rose builder.
|
||||
std::unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm,
|
||||
SomSlotManager &ssm,
|
||||
const CompileContext &cc,
|
||||
const BoundaryReports &boundary);
|
||||
|
||||
bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
|
||||
const ReportManager &rm, const CompileContext &cc);
|
||||
|
||||
size_t roseSize(const RoseEngine *t);
|
||||
|
||||
/* used by heuristics to determine the small write engine. High numbers are
|
||||
* intended to indicate a lightweight rose. */
|
||||
u32 roseQuality(const RoseEngine *t);
|
||||
|
||||
ue2::aligned_unique_ptr<RoseEngine>
|
||||
roseAddSmallWrite(const RoseEngine *t, const SmallWriteEngine *smwr);
|
||||
|
||||
bool roseIsPureLiteral(const RoseEngine *t);
|
||||
|
||||
size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ROSE_BUILD_H
|
1986
src/rose/rose_build_add.cpp
Normal file
1986
src/rose/rose_build_add.cpp
Normal file
File diff suppressed because it is too large
Load Diff
48
src/rose/rose_build_add_internal.h
Normal file
48
src/rose/rose_build_add_internal.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_ADD_INTERNAL_H
|
||||
#define ROSE_BUILD_ADD_INTERNAL_H
|
||||
|
||||
#include "rose_graph.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseBuildImpl;
|
||||
|
||||
RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
|
||||
u32 minBound, u32 maxBound, u32 literalId,
|
||||
size_t literalLength,
|
||||
const ue2::flat_set<ReportID> &reports);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
779
src/rose/rose_build_add_mask.cpp
Normal file
779
src/rose/rose_build_add_mask.cpp
Normal file
@@ -0,0 +1,779 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "rose_build_impl.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "grey.h"
|
||||
#include "rose_build_add_internal.h"
|
||||
#include "rose_build_anchored.h"
|
||||
#include "rose_in_util.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
#include "nfagraph/ng_depth.h"
|
||||
#include "nfagraph/ng_dump.h"
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "nfagraph/ng_limex.h"
|
||||
#include "nfagraph/ng_reports.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "nfagraph/ng_width.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/charreach_util.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define MIN_MASK_LIT_LEN 2
|
||||
#define MAX_MASK_SIZE 255
|
||||
#define MAX_MASK_LITS 30
|
||||
|
||||
static
|
||||
void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
|
||||
ue2_literal *lit, u32 *offset, const Grey &grey) {
|
||||
bool case_fixed = false;
|
||||
bool nocase = false;
|
||||
|
||||
size_t best_begin = 0;
|
||||
size_t best_end = 0;
|
||||
size_t best_len = 0;
|
||||
|
||||
size_t begin = 0;
|
||||
size_t end = 0;
|
||||
|
||||
for (size_t i = 0; i < mask.size(); i++) {
|
||||
bool fail = false;
|
||||
if (mask[i].count() != 1 && !mask[i].isCaselessChar()) {
|
||||
DEBUG_PRINTF("hit non-literal char, resetting at %zu\n", i);
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!fail && streaming && (end >= grey.maxHistoryAvailable + 1)) {
|
||||
DEBUG_PRINTF("hit literal limit, resetting at %zu\n", i);
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!fail && case_fixed && mask[i].isAlpha()) {
|
||||
if (nocase && mask[i].count() != 2) {
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!nocase && mask[i].count() != 1) {
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (fail) {
|
||||
case_fixed = false;
|
||||
nocase = false;
|
||||
size_t len = end - begin;
|
||||
bool better = len > best_len;
|
||||
if (better) {
|
||||
best_begin = begin;
|
||||
best_end = end;
|
||||
best_len = len;
|
||||
}
|
||||
begin = i + 1;
|
||||
end = i + 1;
|
||||
} else {
|
||||
assert(end == i);
|
||||
end = i + 1;
|
||||
|
||||
if (mask[i].isAlpha()) {
|
||||
case_fixed = true;
|
||||
nocase = mask[i].count() == 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t len = end - begin;
|
||||
/* Everybody would rather be trigger towards the end */
|
||||
bool better = len >= best_len && mask.size() - end <= MAX_DELAY;
|
||||
|
||||
if (better) {
|
||||
best_begin = begin;
|
||||
best_end = end;
|
||||
best_len = len;
|
||||
}
|
||||
|
||||
for (size_t i = best_begin; i < best_end; i++) {
|
||||
assert(mask[i].count() == 1 || mask[i].count() == 2);
|
||||
lit->push_back(mask[i].find_first(), mask[i].count() > 1);
|
||||
}
|
||||
|
||||
*offset = verify_u32(best_begin);
|
||||
}
|
||||
|
||||
static
|
||||
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
||||
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
|
||||
char c = (char)i;
|
||||
bool nocase = myisupper(c) && cr.test(mytolower(c));
|
||||
if (myislower(c) && cr.test(mytoupper(c))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cand->size() >= MAX_MASK_LITS) {
|
||||
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
|
||||
return false;
|
||||
}
|
||||
|
||||
cand->emplace_back(c, nocase);
|
||||
}
|
||||
|
||||
assert(cand->size() <= MAX_MASK_LITS);
|
||||
return !cand->empty();
|
||||
}
|
||||
|
||||
static
|
||||
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
|
||||
DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
|
||||
DEBUG_PRINTF(" current cand list size %zu\n", cand->size());
|
||||
|
||||
vector<ue2_literal> curr;
|
||||
|
||||
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
|
||||
char c = (char)i;
|
||||
bool nocase = myisupper(c) && cr.test(mytolower(c));
|
||||
if (myislower(c) && cr.test(mytoupper(c))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto &lit : *cand) {
|
||||
if (curr.size() >= MAX_MASK_LITS) {
|
||||
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
|
||||
return false;
|
||||
}
|
||||
|
||||
curr.emplace_back(c, nocase);
|
||||
curr.back() += lit;
|
||||
}
|
||||
}
|
||||
|
||||
if (curr.back().length() > MAX_MASK2_WIDTH &&
|
||||
any_of(begin(curr), end(curr), mixed_sensitivity)) {
|
||||
DEBUG_PRINTF("mixed-sensitivity lit is too long, stopping\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(curr.size() <= MAX_MASK_LITS);
|
||||
cand->swap(curr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
u32 scoreFmlCandidates(const vector<ue2_literal> &cand) {
|
||||
if (cand.empty()) {
|
||||
DEBUG_PRINTF("no candidates\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const u32 len = cand.back().length();
|
||||
|
||||
DEBUG_PRINTF("length = %u count %zu\n", len, cand.size());
|
||||
u32 min_period = len;
|
||||
|
||||
for (const auto &lit : cand) {
|
||||
u32 period = lit.length() - maxStringSelfOverlap(lit);
|
||||
min_period = min(min_period, period);
|
||||
}
|
||||
DEBUG_PRINTF("min_period %u\n", min_period);
|
||||
u32 length_score =
|
||||
(5 * min_period + len) * (cand.back().any_nocase() ? 90 : 100);
|
||||
u32 count_penalty;
|
||||
if (len > 4) {
|
||||
count_penalty = 9 * len * cand.size();
|
||||
} else {
|
||||
count_penalty = 5 * cand.size();
|
||||
}
|
||||
if (length_score <= count_penalty) {
|
||||
return 1;
|
||||
}
|
||||
return length_score - count_penalty;
|
||||
}
|
||||
|
||||
/* favours later literals */
|
||||
static
|
||||
bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
|
||||
u32 *minBound, u32 *length) {
|
||||
*minBound = 0;
|
||||
*length = 0;
|
||||
|
||||
vector<ue2_literal> candidates, best_candidates;
|
||||
u32 best_score = 0;
|
||||
u32 best_minOffset = 0;
|
||||
vector<CharReach>::const_iterator it, itb, ite;
|
||||
for (it = itb = mask.begin(), ite = mask.end(); it != ite; ++it) {
|
||||
candidates.clear();
|
||||
if (!initFmlCandidates(*it, &candidates)) {
|
||||
DEBUG_PRINTF("failed to init\n");
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("++\n");
|
||||
vector<CharReach>::const_iterator jt = it;
|
||||
while (jt != itb) {
|
||||
--jt;
|
||||
DEBUG_PRINTF("--\n");
|
||||
if (!expandFmlCandidates(*jt, &candidates)) {
|
||||
DEBUG_PRINTF("expansion stopped\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
u32 score = scoreFmlCandidates(candidates);
|
||||
DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
|
||||
candidates.size());
|
||||
if (!candidates.empty() && score >= best_score) {
|
||||
best_minOffset = it - itb - candidates.back().length() + 1;
|
||||
best_candidates.swap(candidates);
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
|
||||
if (!best_score) {
|
||||
DEBUG_PRINTF("no lits\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
*minBound = best_minOffset;
|
||||
*length = best_candidates.back().length();
|
||||
|
||||
DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
|
||||
for (const auto &cand : best_candidates) {
|
||||
assert(cand.length() == *length);
|
||||
lit->push_back(cand);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
unique_ptr<NGHolder> buildMaskLhs(bool anchored, u32 prefix_len,
|
||||
const vector<CharReach> &mask) {
|
||||
DEBUG_PRINTF("build %slhs len %u/%zu\n", anchored ? "anc " : "", prefix_len,
|
||||
mask.size());
|
||||
|
||||
unique_ptr<NGHolder> lhs = ue2::make_unique<NGHolder>(NFA_PREFIX);
|
||||
|
||||
assert(prefix_len);
|
||||
assert(mask.size() >= prefix_len);
|
||||
NFAVertex pred = anchored ? lhs->start : lhs->startDs;
|
||||
|
||||
u32 m_idx = 0;
|
||||
while (prefix_len--) {
|
||||
NFAVertex v = add_vertex(*lhs);
|
||||
(*lhs)[v].char_reach = mask[m_idx++];
|
||||
add_edge(pred, v, *lhs);
|
||||
pred = v;
|
||||
}
|
||||
add_edge(pred, lhs->accept, *lhs);
|
||||
(*lhs)[pred].reports.insert(0);
|
||||
|
||||
return lhs;
|
||||
}
|
||||
|
||||
static
|
||||
void buildLiteralMask(const vector<CharReach> &mask, vector<u8> &msk,
|
||||
vector<u8> &cmp, u32 delay) {
|
||||
msk.clear();
|
||||
cmp.clear();
|
||||
if (mask.size() <= delay) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Construct an and/cmp mask from our mask ending at delay positions before
|
||||
// the end of the literal, with max length HWLM_MASKLEN.
|
||||
|
||||
auto ite = mask.end() - delay;
|
||||
auto it = ite - min(size_t{HWLM_MASKLEN}, mask.size() - delay);
|
||||
|
||||
for (; it != ite; ++it) {
|
||||
msk.push_back(0);
|
||||
cmp.push_back(0);
|
||||
make_and_cmp_mask(*it, &msk.back(), &cmp.back());
|
||||
}
|
||||
|
||||
assert(msk.size() == cmp.size());
|
||||
assert(msk.size() <= HWLM_MASKLEN);
|
||||
}
|
||||
|
||||
static
|
||||
bool validateTransientMask(const vector<CharReach> &mask, bool eod, const Grey &grey) {
|
||||
assert(!mask.empty());
|
||||
|
||||
// An EOD anchored mask requires that everything fit into history, while an
|
||||
// ordinary floating case can handle one byte more (i.e., max history size
|
||||
// and one byte in the buffer).
|
||||
const size_t max_width = grey.maxHistoryAvailable + (eod ? 0 : 1);
|
||||
if (mask.size() > max_width) {
|
||||
DEBUG_PRINTF("mask too long for max available history\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<ue2_literal> lits;
|
||||
u32 lit_minBound; /* minBound of each literal in lit */
|
||||
u32 lit_length; /* length of each literal in lit */
|
||||
if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
|
||||
DEBUG_PRINTF("failed to find any lits\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lits.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const u32 delay = mask.size() - lit_length - lit_minBound;
|
||||
if (delay > MAX_DELAY) {
|
||||
DEBUG_PRINTF("delay %u is too much\n", delay);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lit_length == 1 && lits.size() > 3) {
|
||||
DEBUG_PRINTF("no decent trigger\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Mixed-sensitivity literals require benefits masks to implement, and thus
|
||||
// have a maximum length. This has been taken into account in
|
||||
// findMaskLiterals.
|
||||
assert(lit_length <= MAX_MASK2_WIDTH ||
|
||||
none_of(begin(lits), end(lits), mixed_sensitivity));
|
||||
|
||||
// Build the HWLM literal mask.
|
||||
vector<u8> msk, cmp;
|
||||
if (grey.roseHamsterMasks) {
|
||||
buildLiteralMask(mask, msk, cmp, delay);
|
||||
}
|
||||
|
||||
// We consider the HWLM mask length to run from the first non-zero byte to
|
||||
// the end, and let max(mask length, literal length) be the effective
|
||||
// literal length.
|
||||
//
|
||||
// A one-byte literal with no mask is too short, but a one-byte literal
|
||||
// with a few bytes of mask information is OK.
|
||||
|
||||
u32 msk_length = distance(find_if(begin(msk), end(msk),
|
||||
[](u8 v) { return v != 0; }), end(msk));
|
||||
u32 eff_lit_length = max(lit_length, msk_length);
|
||||
DEBUG_PRINTF("msk_length=%u, eff_lit_length = %u\n", msk_length,
|
||||
eff_lit_length);
|
||||
|
||||
if (eff_lit_length < MIN_MASK_LIT_LEN) {
|
||||
DEBUG_PRINTF("literals too short\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("mask is ok\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) {
|
||||
ue2::flat_set<NFAVertex> curr = {g.accept};
|
||||
ue2::flat_set<NFAVertex> next;
|
||||
|
||||
for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
|
||||
const CharReach &cr = *it;
|
||||
DEBUG_PRINTF("check %s\n", describeClass(*it).c_str());
|
||||
next.clear();
|
||||
for (auto v : curr) {
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (isSubsetOf(cr, g[u].char_reach)) {
|
||||
next.insert(u);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (next.empty()) {
|
||||
DEBUG_PRINTF("no path to start\n");
|
||||
return true;
|
||||
}
|
||||
curr.swap(next);
|
||||
}
|
||||
|
||||
for (auto v : curr) {
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == g.start || u == g.startDs) {
|
||||
DEBUG_PRINTF("literal spans graph from start to accept\n");
|
||||
return false;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("literal doesn't reach start\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports, bool anchored,
|
||||
bool eod) {
|
||||
vector<ue2_literal> lits;
|
||||
u32 lit_minBound; /* minBound of each literal in lit */
|
||||
u32 lit_length; /* length of each literal in lit */
|
||||
if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
|
||||
DEBUG_PRINTF("failed to find any lits\n");
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%zu literals, minBound=%u, length=%u\n", lits.size(),
|
||||
lit_minBound, lit_length);
|
||||
|
||||
if (lits.empty()) {
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
||||
u32 delay = mask.size() - lit_length - lit_minBound;
|
||||
assert(delay <= MAX_DELAY);
|
||||
DEBUG_PRINTF("delay=%u\n", delay);
|
||||
|
||||
shared_ptr<NGHolder> mask_graph = buildMaskLhs(anchored, mask.size(), mask);
|
||||
|
||||
u32 mask_lag = 0; /* TODO */
|
||||
|
||||
// Everyone gets the same report ID.
|
||||
ReportID mask_report = build.getNewNfaReport();
|
||||
setReportId(*mask_graph, mask_report);
|
||||
|
||||
// Build the HWLM literal mask.
|
||||
vector<u8> msk, cmp;
|
||||
if (build.cc.grey.roseHamsterMasks) {
|
||||
buildLiteralMask(mask, msk, cmp, delay);
|
||||
}
|
||||
|
||||
/* adjust bounds to be relative to trigger rather than mask */
|
||||
const u32 v_min_offset = add_rose_depth(0, mask.size());
|
||||
const u32 v_max_offset =
|
||||
add_rose_depth(anchored ? 0 : ROSE_BOUND_INF, mask.size());
|
||||
|
||||
RoseGraph &g = build.g;
|
||||
|
||||
// By default, masked literals go into the floating table (except for eod
|
||||
// cases).
|
||||
enum rose_literal_table table = ROSE_FLOATING;
|
||||
|
||||
RoseVertex eod_v = RoseGraph::null_vertex();
|
||||
if (eod) {
|
||||
eod_v = add_vertex(g);
|
||||
g[eod_v].eod_accept = true;
|
||||
insert(&g[eod_v].reports, reports);
|
||||
g[eod_v].min_offset = v_min_offset;
|
||||
g[eod_v].max_offset = v_max_offset;
|
||||
|
||||
// Note: because this is a transient mask, we know that we can match it
|
||||
// completely inside the history buffer. So, using the EOD literal
|
||||
// table is always safe.
|
||||
table = ROSE_EOD_ANCHORED;
|
||||
|
||||
// Widen the EOD table window to cover the mask.
|
||||
ENSURE_AT_LEAST(&build.ematcher_region_size, mask.size());
|
||||
}
|
||||
|
||||
const ue2::flat_set<ReportID> no_reports;
|
||||
|
||||
for (const auto &lit : lits) {
|
||||
u32 lit_id = build.getLiteralId(lit, msk, cmp, delay, table);
|
||||
const RoseVertex parent = anchored ? build.anchored_root : build.root;
|
||||
bool use_mask = delay || maskIsNeeded(lit, *mask_graph);
|
||||
|
||||
auto v = createVertex(&build, parent, 0, ROSE_BOUND_INF, lit_id,
|
||||
lit.length(), eod ? no_reports : reports);
|
||||
|
||||
if (use_mask) {
|
||||
g[v].left.graph = mask_graph;
|
||||
g[v].left.lag = mask_lag;
|
||||
g[v].left.leftfix_report = mask_report;
|
||||
} else {
|
||||
// Make sure our edge bounds are correct.
|
||||
auto e = edge_by_target(parent, v, g).first;
|
||||
g[e].minBound = 0;
|
||||
g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF;
|
||||
g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH
|
||||
: ROSE_ROLE_HISTORY_NONE;
|
||||
}
|
||||
|
||||
// Set offsets correctly.
|
||||
g[v].min_offset = v_min_offset;
|
||||
g[v].max_offset = v_max_offset;
|
||||
|
||||
if (eod) {
|
||||
auto e = add_edge(v, eod_v, g).first;
|
||||
g[e].minBound = 0;
|
||||
g[e].maxBound = 0;
|
||||
g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
unique_ptr<NGHolder> buildMaskRhs(const ue2::flat_set<ReportID> &reports,
|
||||
const vector<CharReach> &mask,
|
||||
u32 suffix_len) {
|
||||
assert(suffix_len);
|
||||
assert(mask.size() > suffix_len);
|
||||
|
||||
unique_ptr<NGHolder> rhs = ue2::make_unique<NGHolder>(NFA_SUFFIX);
|
||||
NGHolder &h = *rhs;
|
||||
|
||||
NFAVertex succ = h.accept;
|
||||
u32 m_idx = mask.size() - 1;
|
||||
while (suffix_len--) {
|
||||
NFAVertex u = add_vertex(h);
|
||||
if (succ == h.accept) {
|
||||
h[u].reports.insert(reports.begin(), reports.end());
|
||||
}
|
||||
h[u].char_reach = mask[m_idx--];
|
||||
add_edge(u, succ, h);
|
||||
succ = u;
|
||||
}
|
||||
|
||||
add_edge(h.start, succ, h);
|
||||
|
||||
return rhs;
|
||||
}
|
||||
|
||||
static
|
||||
void doAddMask(RoseBuildImpl &tbi, bool anchored,
|
||||
const vector<CharReach> &mask, const ue2_literal &lit,
|
||||
u32 prefix_len, u32 suffix_len,
|
||||
const ue2::flat_set<ReportID> &reports) {
|
||||
/* Note: bounds are relative to literal start */
|
||||
RoseInGraph ig;
|
||||
RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
|
||||
RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
|
||||
|
||||
DEBUG_PRINTF("pref + lit = %u\n", prefix_len);
|
||||
assert(prefix_len >= lit.length());
|
||||
|
||||
// prefix len is relative to end of literal.
|
||||
u32 minBound = prefix_len - lit.length();
|
||||
|
||||
if (minBound) {
|
||||
if (anchored && prefix_len > tbi.cc.grey.maxAnchoredRegion) {
|
||||
DEBUG_PRINTF("too deep\n");
|
||||
/* see if there is an anchored literal we can also hang off */
|
||||
|
||||
ue2_literal lit2;
|
||||
u32 lit2_offset;
|
||||
vector<CharReach> mask2 = mask;
|
||||
assert(mask2.size() > tbi.cc.grey.maxAnchoredRegion);
|
||||
mask2.resize(MIN(tbi.cc.grey.maxAnchoredRegion, minBound));
|
||||
|
||||
findMaskLiteral(mask2, tbi.cc.streaming, &lit2, &lit2_offset,
|
||||
tbi.cc.grey);
|
||||
|
||||
if (lit2.length() >= MIN_MASK_LIT_LEN) {
|
||||
u32 prefix2_len = lit2_offset + lit2.length();
|
||||
assert(prefix2_len < minBound);
|
||||
RoseInVertex u
|
||||
= add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
|
||||
if (lit2_offset){
|
||||
DEBUG_PRINTF("building lhs (off %u)\n", lit2_offset);
|
||||
shared_ptr<NGHolder> lhs2
|
||||
= buildMaskLhs(true, lit2_offset, mask);
|
||||
add_edge(s, u, RoseInEdgeProps(lhs2, lit2.length()), ig);
|
||||
} else {
|
||||
add_edge(s, u, RoseInEdgeProps(0, 0), ig);
|
||||
}
|
||||
|
||||
/* midfix */
|
||||
DEBUG_PRINTF("building mhs\n");
|
||||
vector<CharReach> mask3(mask.begin() + prefix2_len, mask.end());
|
||||
u32 overlap = maxOverlap(lit2, lit, 0);
|
||||
u32 delay = lit.length() - overlap;
|
||||
shared_ptr<NGHolder> mhs
|
||||
= buildMaskLhs(true, minBound - prefix2_len + overlap,
|
||||
mask3);
|
||||
mhs->kind = NFA_INFIX;
|
||||
add_edge(u, v, RoseInEdgeProps(mhs, delay), ig);
|
||||
|
||||
DEBUG_PRINTF("add anch literal too!\n");
|
||||
goto do_rhs;
|
||||
}
|
||||
}
|
||||
|
||||
shared_ptr<NGHolder> lhs = buildMaskLhs(anchored, minBound, mask);
|
||||
add_edge(s, v, RoseInEdgeProps(lhs, lit.length()), ig);
|
||||
} else {
|
||||
u32 maxBound = anchored ? minBound : ROSE_BOUND_INF;
|
||||
add_edge(s, v, RoseInEdgeProps(minBound, maxBound), ig);
|
||||
}
|
||||
|
||||
do_rhs:
|
||||
if (suffix_len) {
|
||||
shared_ptr<NGHolder> rhs = buildMaskRhs(reports, mask, suffix_len);
|
||||
RoseInVertex a =
|
||||
add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
|
||||
add_edge(v, a, RoseInEdgeProps(rhs, 0), ig);
|
||||
} else {
|
||||
/* Note: masks have no eod connections */
|
||||
RoseInVertex a
|
||||
= add_vertex(RoseInVertexProps::makeAccept(reports), ig);
|
||||
add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
|
||||
}
|
||||
|
||||
calcVertexOffsets(ig);
|
||||
|
||||
bool rv = tbi.addRose(ig, false);
|
||||
|
||||
assert(rv); /* checkAllowMask should have prevented this */
|
||||
if (!rv) {
|
||||
throw std::exception();
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool checkAllowMask(const vector<CharReach> &mask, ue2_literal *lit,
|
||||
u32 *prefix_len, u32 *suffix_len,
|
||||
const CompileContext &cc) {
|
||||
assert(!mask.empty());
|
||||
u32 lit_offset;
|
||||
findMaskLiteral(mask, cc.streaming, lit, &lit_offset, cc.grey);
|
||||
|
||||
if (lit->length() < MIN_MASK_LIT_LEN && lit->length() != mask.size()) {
|
||||
DEBUG_PRINTF("need more literal - bad mask\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("mask lit '%s', len=%zu at offset=%u\n",
|
||||
dumpString(*lit).c_str(), lit->length(), lit_offset);
|
||||
|
||||
assert(!cc.streaming || lit->length() <= cc.grey.maxHistoryAvailable + 1);
|
||||
|
||||
/* literal is included in the prefix nfa so that matches from the prefix
|
||||
* can't occur in the history buffer - probably should tweak the NFA API
|
||||
* to allow such matches not to be suppressed */
|
||||
*prefix_len = lit_offset + lit->length();
|
||||
*suffix_len = mask.size() - *prefix_len;
|
||||
DEBUG_PRINTF("prefix_len=%u, suffix_len=%u\n", *prefix_len, *suffix_len);
|
||||
|
||||
/* check if we can backtrack sufficiently */
|
||||
if (cc.streaming && *prefix_len > cc.grey.maxHistoryAvailable + 1) {
|
||||
DEBUG_PRINTF("too much lag\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (*suffix_len > MAX_MASK_SIZE || *prefix_len > MAX_MASK_SIZE) {
|
||||
DEBUG_PRINTF("too big\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports) {
|
||||
if (validateTransientMask(mask, false, cc.grey)) {
|
||||
bool eod = false;
|
||||
addTransientMask(*this, mask, reports, anchored, eod);
|
||||
return true;
|
||||
}
|
||||
|
||||
ue2_literal lit;
|
||||
u32 prefix_len = 0;
|
||||
u32 suffix_len = 0;
|
||||
|
||||
if (!checkAllowMask(mask, &lit, &prefix_len, &suffix_len, cc)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* we know that the mask can be handled now, start playing with the rose
|
||||
* graph */
|
||||
doAddMask(*this, anchored, mask, lit, prefix_len, suffix_len, reports);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RoseBuildImpl::validateMask(const vector<CharReach> &mask,
|
||||
UNUSED const ue2::flat_set<ReportID> &reports,
|
||||
UNUSED bool anchored, bool eod) const {
|
||||
return validateTransientMask(mask, eod, cc.grey);
|
||||
}
|
||||
|
||||
static
|
||||
unique_ptr<NGHolder> makeAnchoredGraph(const vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports,
|
||||
bool eod) {
|
||||
auto gp = ue2::make_unique<NGHolder>();
|
||||
NGHolder &g = *gp;
|
||||
|
||||
NFAVertex u = g.start;
|
||||
for (const auto &cr : mask) {
|
||||
NFAVertex v = add_vertex(g);
|
||||
g[v].char_reach = cr;
|
||||
add_edge(u, v, g);
|
||||
u = v;
|
||||
}
|
||||
|
||||
|
||||
g[u].reports = reports;
|
||||
add_edge(u, eod ? g.acceptEod : g.accept, g);
|
||||
|
||||
return gp;
|
||||
}
|
||||
|
||||
static
|
||||
bool addAnchoredMask(RoseBuildImpl &build, const vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports, bool eod) {
|
||||
if (!build.cc.grey.allowAnchoredAcyclic) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto g = makeAnchoredGraph(mask, reports, eod);
|
||||
assert(g);
|
||||
|
||||
return build.addAnchoredAcyclic(*g);
|
||||
}
|
||||
|
||||
void RoseBuildImpl::addMask(const vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports,
|
||||
bool anchored, bool eod) {
|
||||
if (anchored && addAnchoredMask(*this, mask, reports, eod)) {
|
||||
DEBUG_PRINTF("added mask as anchored acyclic graph\n");
|
||||
return;
|
||||
}
|
||||
|
||||
addTransientMask(*this, mask, reports, anchored, eod);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
882
src/rose/rose_build_anchored.cpp
Normal file
882
src/rose/rose_build_anchored.cpp
Normal file
@@ -0,0 +1,882 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "rose_build_anchored.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "rose_build_impl.h"
|
||||
#include "rose_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/dfa_min.h"
|
||||
#include "nfa/mcclellancompile.h"
|
||||
#include "nfa/mcclellancompile_util.h"
|
||||
#include "nfa/nfa_build_util.h"
|
||||
#include "nfa/rdfa_merge.h"
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "nfagraph/ng_repeat.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "nfagraph/ng_mcclellan_internal.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitfield.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/container.h"
|
||||
#include "util/determinise.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define ANCHORED_NFA_STATE_LIMIT 512
|
||||
#define MAX_DFA_STATES 16000
|
||||
#define DFA_PAIR_MERGE_THRESHOLD 5000
|
||||
#define MAX_SMALL_START_REACH 4
|
||||
|
||||
#define INIT_STATE (DEAD_STATE + 1)
|
||||
|
||||
// Adds a vertex with the given reach.
|
||||
static
|
||||
NFAVertex add_vertex(NGHolder &h, const CharReach &cr) {
|
||||
NFAVertex v = add_vertex(h);
|
||||
h[v].char_reach = cr;
|
||||
return v;
|
||||
}
|
||||
|
||||
static
|
||||
void add_edges(const set<NFAVertex> &parents, NFAVertex v, NGHolder &h) {
|
||||
for (auto p : parents) {
|
||||
add_edge(p, v, h);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
set<NFAVertex> addDotsToGraph(NGHolder &h, NFAVertex start, u32 min, u32 max,
|
||||
const CharReach &cr) {
|
||||
DEBUG_PRINTF("adding [%u, %u] to graph\n", min, max);
|
||||
u32 i = 0;
|
||||
set<NFAVertex> curr;
|
||||
curr.insert(start);
|
||||
for (; i < min; i++) {
|
||||
NFAVertex next = add_vertex(h, cr);
|
||||
add_edges(curr, next, h);
|
||||
curr.clear();
|
||||
curr.insert(next);
|
||||
}
|
||||
|
||||
assert(max != ROSE_BOUND_INF);
|
||||
|
||||
set<NFAVertex> orig = curr;
|
||||
for (; i < max; i++) {
|
||||
NFAVertex next = add_vertex(h, cr);
|
||||
add_edges(curr, next, h);
|
||||
curr.clear();
|
||||
curr.insert(next);
|
||||
curr.insert(orig.begin(), orig.end());
|
||||
}
|
||||
|
||||
return curr;
|
||||
}
|
||||
|
||||
static
|
||||
NFAVertex addToGraph(NGHolder &h, const set<NFAVertex> &curr,
|
||||
const ue2_literal &s) {
|
||||
DEBUG_PRINTF("adding %s to graph\n", dumpString(s).c_str());
|
||||
assert(!s.empty());
|
||||
|
||||
ue2_literal::const_iterator it = s.begin();
|
||||
NFAVertex u = add_vertex(h, *it);
|
||||
add_edges(curr, u, h);
|
||||
|
||||
for (++it; it != s.end(); ++it) {
|
||||
NFAVertex next = add_vertex(h, *it);
|
||||
add_edge(u, next, h);
|
||||
u = next;
|
||||
}
|
||||
|
||||
return u;
|
||||
}
|
||||
|
||||
static
|
||||
void mergeAnchoredDfas(vector<unique_ptr<raw_dfa>> &dfas,
|
||||
const RoseBuildImpl &build) {
|
||||
// First, group our DFAs into "small start" and "big start" sets.
|
||||
vector<unique_ptr<raw_dfa>> small_starts, big_starts;
|
||||
for (auto &rdfa : dfas) {
|
||||
u32 start_size = mcclellanStartReachSize(rdfa.get());
|
||||
if (start_size <= MAX_SMALL_START_REACH) {
|
||||
small_starts.push_back(move(rdfa));
|
||||
} else {
|
||||
big_starts.push_back(move(rdfa));
|
||||
}
|
||||
}
|
||||
dfas.clear();
|
||||
|
||||
DEBUG_PRINTF("%zu dfas with small starts, %zu dfas with big starts\n",
|
||||
small_starts.size(), big_starts.size());
|
||||
mergeDfas(small_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
|
||||
mergeDfas(big_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
|
||||
|
||||
// Rehome our groups into one vector.
|
||||
for (auto &rdfa : small_starts) {
|
||||
dfas.push_back(move(rdfa));
|
||||
}
|
||||
for (auto &rdfa : big_starts) {
|
||||
dfas.push_back(move(rdfa));
|
||||
}
|
||||
|
||||
// Final test: if we've built two DFAs here that are small enough, we can
|
||||
// try to merge them.
|
||||
if (dfas.size() == 2) {
|
||||
size_t total_states = dfas[0]->states.size() + dfas[1]->states.size();
|
||||
if (total_states < DFA_PAIR_MERGE_THRESHOLD) {
|
||||
DEBUG_PRINTF("doing small pair merge\n");
|
||||
mergeDfas(dfas, MAX_DFA_STATES, nullptr, build.cc.grey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void translateReportSet(flat_set<ReportID> *rset, const RoseBuildImpl &tbi) {
|
||||
flat_set<ReportID> old;
|
||||
old.swap(*rset);
|
||||
for (auto report_id : old) {
|
||||
DEBUG_PRINTF("updating %u -> %u\n", report_id,
|
||||
tbi.literal_info[report_id].final_id);
|
||||
rset->insert(tbi.literal_info[report_id].final_id);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void remapAnchoredReports(raw_dfa &dfa, const RoseBuildImpl &tbi) {
|
||||
for (dstate &ds : dfa.states) {
|
||||
translateReportSet(&ds.reports, tbi);
|
||||
translateReportSet(&ds.reports_eod, tbi);
|
||||
}
|
||||
}
|
||||
|
||||
/* Replaces the report ids currently in the dfas (rose graph literal ids) with
|
||||
* the final id used by the runtime. */
|
||||
static
|
||||
void remapAnchoredReports(RoseBuildImpl &tbi) {
|
||||
for (auto it = tbi.anchored_nfas.begin(); it != tbi.anchored_nfas.end();
|
||||
++it) {
|
||||
for (auto &rdfa : it->second) {
|
||||
assert(rdfa);
|
||||
remapAnchoredReports(*rdfa, tbi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void populate_holder(const simple_anchored_info &sai, const set<u32> &exit_ids,
|
||||
NGHolder *h_in) {
|
||||
DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
|
||||
sai.max_bound, dumpString(sai.literal).c_str());
|
||||
NGHolder &h = *h_in;
|
||||
set<NFAVertex> ends = addDotsToGraph(h, h.start, sai.min_bound,
|
||||
sai.max_bound, CharReach::dot());
|
||||
NFAVertex v = addToGraph(h, ends, sai.literal);
|
||||
add_edge(v, h.accept, h);
|
||||
h[v].reports.insert(exit_ids.begin(), exit_ids.end());
|
||||
}
|
||||
|
||||
u32 anchoredStateSize(const void *atable) {
|
||||
if (!atable) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct anchored_matcher_info *curr
|
||||
= (const anchored_matcher_info *)atable;
|
||||
|
||||
// Walk the list until we find the last element; total state size will be
|
||||
// that engine's state offset plus its state requirement.
|
||||
while (curr->next_offset) {
|
||||
curr = (const anchored_matcher_info *)
|
||||
((const char *)curr + curr->next_offset);
|
||||
}
|
||||
|
||||
const NFA *nfa = (const NFA *)((const char *)curr + sizeof(*curr));
|
||||
return curr->state_offset + nfa->scratchStateSize;
|
||||
}
|
||||
|
||||
bool anchoredIsMulti(const RoseEngine &engine) {
|
||||
const struct anchored_matcher_info *curr
|
||||
= (const anchored_matcher_info *)getALiteralMatcher(&engine);
|
||||
|
||||
return curr && curr->next_offset;
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
typedef bitfield<ANCHORED_NFA_STATE_LIMIT> nfa_state_set;
|
||||
|
||||
struct Holder_StateSet {
|
||||
Holder_StateSet() : wdelay(0) {}
|
||||
|
||||
nfa_state_set wrap_state;
|
||||
u32 wdelay;
|
||||
|
||||
bool operator==(const Holder_StateSet &b) const {
|
||||
return wdelay == b.wdelay && wrap_state == b.wrap_state;
|
||||
}
|
||||
};
|
||||
|
||||
size_t hash_value(const Holder_StateSet &s) {
|
||||
size_t val = 0;
|
||||
boost::hash_combine(val, s.wrap_state);
|
||||
boost::hash_combine(val, s.wdelay);
|
||||
return val;
|
||||
}
|
||||
|
||||
class Automaton_Holder {
|
||||
public:
|
||||
typedef Holder_StateSet StateSet;
|
||||
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
||||
|
||||
explicit Automaton_Holder(const NGHolder &g_in) : g(g_in), bad(false) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
vertexToIndex[v] = indexToVertex.size();
|
||||
indexToVertex.push_back(v);
|
||||
}
|
||||
|
||||
if (indexToVertex.size() > ANCHORED_NFA_STATE_LIMIT) {
|
||||
bad = true;
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("%zu states\n", indexToVertex.size());
|
||||
init.wdelay = 0;
|
||||
init.wrap_state.set(vertexToIndex[g.start]);
|
||||
|
||||
DEBUG_PRINTF("init wdelay %u\n", init.wdelay);
|
||||
|
||||
calculateAlphabet();
|
||||
cr_by_index = populateCR(g, indexToVertex, alpha);
|
||||
}
|
||||
|
||||
private:
|
||||
void calculateAlphabet() {
|
||||
vector<CharReach> esets(1, CharReach::dot());
|
||||
|
||||
for (auto v : indexToVertex) {
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
|
||||
for (size_t i = 0; i < esets.size(); i++) {
|
||||
if (esets[i].count() == 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
CharReach t = cr & esets[i];
|
||||
|
||||
if (t.any() && t != esets[i]) {
|
||||
esets[i] &= ~t;
|
||||
esets.push_back(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
|
||||
}
|
||||
|
||||
public:
|
||||
void transition(const StateSet &in, StateSet *next) {
|
||||
/* track the dfa state, reset nfa states */
|
||||
u32 wdelay = in.wdelay ? in.wdelay - 1 : 0;
|
||||
|
||||
for (symbol_t s = 0; s < alphasize; s++) {
|
||||
next[s].wrap_state.reset();
|
||||
next[s].wdelay = wdelay;
|
||||
}
|
||||
|
||||
nfa_state_set succ;
|
||||
|
||||
if (wdelay != in.wdelay) {
|
||||
DEBUG_PRINTF("enabling start\n");
|
||||
succ.set(vertexToIndex[g.startDs]);
|
||||
}
|
||||
|
||||
for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
|
||||
i = in.wrap_state.find_next(i)) {
|
||||
NFAVertex v = indexToVertex[i];
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (!contains(vertexToIndex, w)
|
||||
|| w == g.accept || w == g.acceptEod) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (w == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
|
||||
succ.set(vertexToIndex[w]);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t j = succ.find_first(); j != nfa_state_set::npos;
|
||||
j = succ.find_next(j)) {
|
||||
const CharReach &cr = cr_by_index[j];
|
||||
for (size_t s = cr.find_first(); s != CharReach::npos;
|
||||
s = cr.find_next(s)) {
|
||||
next[s].wrap_state.set(j); /* pre alpha'ed */
|
||||
}
|
||||
}
|
||||
|
||||
next[alpha[TOP]] = in;
|
||||
}
|
||||
|
||||
const vector<StateSet> initial() {
|
||||
return {init};
|
||||
}
|
||||
|
||||
void reports(const StateSet &in, flat_set<ReportID> &rv) {
|
||||
rv.clear();
|
||||
for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
|
||||
i = in.wrap_state.find_next(i)) {
|
||||
NFAVertex v = indexToVertex[i];
|
||||
if (edge(v, g.accept, g).second) {
|
||||
assert(!g[v].reports.empty());
|
||||
insert(&rv, g[v].reports);
|
||||
} else {
|
||||
assert(g[v].reports.empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void reportsEod(const StateSet &, flat_set<ReportID> &r) {
|
||||
r.clear();
|
||||
}
|
||||
|
||||
static bool canPrune(const flat_set<ReportID> &) {
|
||||
/* used by ng_ to prune states after highlander accepts */
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
const NGHolder &g;
|
||||
ue2::unordered_map<NFAVertex, u32> vertexToIndex;
|
||||
vector<NFAVertex> indexToVertex;
|
||||
vector<CharReach> cr_by_index;
|
||||
StateSet init;
|
||||
public:
|
||||
StateSet dead;
|
||||
array<u16, ALPHABET_SIZE> alpha;
|
||||
array<u16, ALPHABET_SIZE> unalpha;
|
||||
u16 alphasize;
|
||||
bool bad;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
bool check_dupe(const raw_dfa &rdfa,
|
||||
const vector<unique_ptr<raw_dfa>> &existing, ReportID *remap) {
|
||||
if (!remap) {
|
||||
DEBUG_PRINTF("no remap\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
set<ReportID> rdfa_reports;
|
||||
for (const auto &ds : rdfa.states) {
|
||||
rdfa_reports.insert(ds.reports.begin(), ds.reports.end());
|
||||
}
|
||||
if (rdfa_reports.size() != 1) {
|
||||
return false; /* too complicated for now would need mapping TODO */
|
||||
}
|
||||
|
||||
for (const auto &e_rdfa : existing) {
|
||||
assert(e_rdfa);
|
||||
const raw_dfa &b = *e_rdfa;
|
||||
|
||||
if (rdfa.start_anchored != b.start_anchored ||
|
||||
rdfa.alpha_size != b.alpha_size ||
|
||||
rdfa.states.size() != b.states.size() ||
|
||||
rdfa.alpha_remap != b.alpha_remap) {
|
||||
continue;
|
||||
}
|
||||
|
||||
set<ReportID> b_reports;
|
||||
|
||||
for (u32 i = 0; i < b.states.size(); i++) {
|
||||
assert(b.states[i].reports_eod.empty());
|
||||
assert(rdfa.states[i].reports_eod.empty());
|
||||
if (rdfa.states[i].reports.size() != b.states[i].reports.size()) {
|
||||
goto next_dfa;
|
||||
}
|
||||
b_reports.insert(b.states[i].reports.begin(),
|
||||
b.states[i].reports.end());
|
||||
|
||||
assert(rdfa.states[i].next.size() == b.states[i].next.size());
|
||||
if (!equal(rdfa.states[i].next.begin(), rdfa.states[i].next.end(),
|
||||
b.states[i].next.begin())) {
|
||||
goto next_dfa;
|
||||
}
|
||||
}
|
||||
|
||||
if (b_reports.size() != 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
*remap = *b_reports.begin();
|
||||
DEBUG_PRINTF("dupe found remapping to %u\n", *remap);
|
||||
return true;
|
||||
next_dfa:;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool check_dupe_simple(const RoseBuildImpl &tbi, u32 min_bound, u32 max_bound,
|
||||
const ue2_literal &lit, ReportID *remap) {
|
||||
if (!remap) {
|
||||
DEBUG_PRINTF("no remap\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
simple_anchored_info sai(min_bound, max_bound, lit);
|
||||
if (contains(tbi.anchored_simple, sai)) {
|
||||
*remap = *tbi.anchored_simple.at(sai).begin();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) {
|
||||
vector<NFAVertex> lit_verts;
|
||||
NFAVertex v = h.accept;
|
||||
while ((v = getSoleSourceVertex(h, v))) {
|
||||
const CharReach &cr = h[v].char_reach;
|
||||
if (cr.count() > 1 && !cr.isCaselessChar()) {
|
||||
break;
|
||||
}
|
||||
lit_verts.push_back(v);
|
||||
}
|
||||
|
||||
if (lit_verts.empty()) {
|
||||
return NFAGraph::null_vertex();
|
||||
}
|
||||
|
||||
bool nocase = false;
|
||||
bool case_set = false;
|
||||
|
||||
for (auto it = lit_verts.rbegin(), ite = lit_verts.rend(); it != ite;
|
||||
++it) {
|
||||
const CharReach &cr = h[*it].char_reach;
|
||||
if (cr.isAlpha()) {
|
||||
bool cr_nocase = cr.count() != 1;
|
||||
if (case_set && cr_nocase != nocase) {
|
||||
return NFAGraph::null_vertex();
|
||||
}
|
||||
|
||||
case_set = true;
|
||||
nocase = cr_nocase;
|
||||
lit->push_back(cr.find_first(), nocase);
|
||||
} else {
|
||||
lit->push_back(cr.find_first(), false);
|
||||
}
|
||||
}
|
||||
|
||||
return lit_verts.back();
|
||||
}
|
||||
|
||||
static
|
||||
bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound,
|
||||
ue2_literal *lit, u32 *report) {
|
||||
assert(!proper_out_degree(h.startDs, h));
|
||||
assert(in_degree(h.acceptEod, h) == 1);
|
||||
|
||||
DEBUG_PRINTF("looking for simple case\n");
|
||||
NFAVertex lit_head = extractLiteral(h, lit);
|
||||
|
||||
if (lit_head == NFAGraph::null_vertex()) {
|
||||
DEBUG_PRINTF("no literal found\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto &reps = h[*inv_adjacent_vertices(h.accept, h).first].reports;
|
||||
|
||||
if (reps.size() != 1) {
|
||||
return false;
|
||||
}
|
||||
*report = *reps.begin();
|
||||
|
||||
assert(!lit->empty());
|
||||
|
||||
set<NFAVertex> rep_exits;
|
||||
|
||||
/* lit should only be connected to dot vertices */
|
||||
for (auto u : inv_adjacent_vertices_range(lit_head, h)) {
|
||||
DEBUG_PRINTF("checking %u\n", h[u].index);
|
||||
if (!h[u].char_reach.all()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (u != h.start) {
|
||||
rep_exits.insert(u);
|
||||
}
|
||||
}
|
||||
|
||||
if (rep_exits.empty()) {
|
||||
DEBUG_PRINTF("direct anchored\n");
|
||||
assert(edge(h.start, lit_head, h).second);
|
||||
*min_bound = 0;
|
||||
*max_bound = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
NFAVertex key = *rep_exits.begin();
|
||||
|
||||
// Special-case the check for '^.foo' or '^.?foo'.
|
||||
if (rep_exits.size() == 1 && edge(h.start, key, h).second &&
|
||||
out_degree(key, h) == 1) {
|
||||
DEBUG_PRINTF("one exit\n");
|
||||
assert(edge(h.start, h.startDs, h).second);
|
||||
size_t num_enters = out_degree(h.start, h);
|
||||
if (num_enters == 2) {
|
||||
DEBUG_PRINTF("^.{1,1} prefix\n");
|
||||
*min_bound = 1;
|
||||
*max_bound = 1;
|
||||
return true;
|
||||
}
|
||||
if (num_enters == 3 && edge(h.start, lit_head, h).second) {
|
||||
DEBUG_PRINTF("^.{0,1} prefix\n");
|
||||
*min_bound = 0;
|
||||
*max_bound = 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
vector<GraphRepeatInfo> repeats;
|
||||
findRepeats(h, 2, &repeats);
|
||||
|
||||
vector<GraphRepeatInfo>::const_iterator it;
|
||||
for (it = repeats.begin(); it != repeats.end(); ++it) {
|
||||
DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
|
||||
if (find(it->vertices.begin(), it->vertices.end(), key)
|
||||
!= it->vertices.end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (it == repeats.end()) {
|
||||
DEBUG_PRINTF("no repeat found\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
set<NFAVertex> rep_verts;
|
||||
insert(&rep_verts, it->vertices);
|
||||
if (!is_subset_of(rep_exits, rep_verts)) {
|
||||
DEBUG_PRINTF("bad exit check\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
set<NFAVertex> rep_enters;
|
||||
insert(&rep_enters, adjacent_vertices(h.start, h));
|
||||
rep_enters.erase(lit_head);
|
||||
rep_enters.erase(h.startDs);
|
||||
|
||||
if (!is_subset_of(rep_enters, rep_verts)) {
|
||||
DEBUG_PRINTF("bad entry check\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 min_b = it->repeatMin;
|
||||
if (edge(h.start, lit_head, h).second) { /* jump edge */
|
||||
if (min_b != 1) {
|
||||
DEBUG_PRINTF("jump edge around repeat with min bound\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
min_b = 0;
|
||||
}
|
||||
*min_bound = min_b;
|
||||
*max_bound = it->repeatMax;
|
||||
|
||||
DEBUG_PRINTF("repeat %u %u before %s\n", *min_bound, *max_bound,
|
||||
dumpString(*lit).c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
int finalise_out(RoseBuildImpl &tbi, const NGHolder &h,
|
||||
const Automaton_Holder &autom, unique_ptr<raw_dfa> out_dfa,
|
||||
ReportID *remap) {
|
||||
u32 min_bound = ~0U;
|
||||
u32 max_bound = ~0U;
|
||||
ue2_literal lit;
|
||||
u32 simple_report = MO_INVALID_IDX;
|
||||
if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) {
|
||||
assert(simple_report != MO_INVALID_IDX);
|
||||
if (check_dupe_simple(tbi, min_bound, max_bound, lit, remap)) {
|
||||
DEBUG_PRINTF("found duplicate remapping to %u\n", *remap);
|
||||
return ANCHORED_REMAP;
|
||||
}
|
||||
DEBUG_PRINTF("add with report %u\n", simple_report);
|
||||
tbi.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)]
|
||||
.insert(simple_report);
|
||||
return ANCHORED_SUCCESS;
|
||||
}
|
||||
|
||||
out_dfa->start_anchored = INIT_STATE;
|
||||
out_dfa->start_floating = DEAD_STATE;
|
||||
out_dfa->alpha_size = autom.alphasize;
|
||||
out_dfa->alpha_remap = autom.alpha;
|
||||
auto hash = hash_dfa_no_reports(*out_dfa);
|
||||
if (check_dupe(*out_dfa, tbi.anchored_nfas[hash], remap)) {
|
||||
return ANCHORED_REMAP;
|
||||
}
|
||||
tbi.anchored_nfas[hash].push_back(move(out_dfa));
|
||||
return ANCHORED_SUCCESS;
|
||||
}
|
||||
|
||||
static
|
||||
int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) {
|
||||
Automaton_Holder autom(h);
|
||||
|
||||
if (autom.bad) {
|
||||
DEBUG_PRINTF("autom bad!\n");
|
||||
return ANCHORED_FAIL;
|
||||
}
|
||||
|
||||
unique_ptr<raw_dfa> out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX);
|
||||
if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
|
||||
return finalise_out(tbi, h, autom, move(out_dfa), remap);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("determinise failed\n");
|
||||
return ANCHORED_FAIL;
|
||||
}
|
||||
|
||||
static
|
||||
void setReports(NGHolder &h, const map<NFAVertex, set<u32>> &reportMap,
|
||||
const ue2::unordered_map<NFAVertex, NFAVertex> &orig_to_copy) {
|
||||
for (const auto &m : reportMap) {
|
||||
NFAVertex t = orig_to_copy.at(m.first);
|
||||
assert(!m.second.empty());
|
||||
add_edge(t, h.accept, h);
|
||||
insert(&h[t].reports, m.second);
|
||||
}
|
||||
}
|
||||
|
||||
int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper,
|
||||
const map<NFAVertex, set<u32>> &reportMap) {
|
||||
NGHolder h;
|
||||
ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
|
||||
cloneHolder(h, wrapper, &orig_to_copy);
|
||||
clear_in_edges(h.accept, h);
|
||||
clear_in_edges(h.acceptEod, h);
|
||||
add_edge(h.accept, h.acceptEod, h);
|
||||
clearReports(h);
|
||||
setReports(h, reportMap, orig_to_copy);
|
||||
|
||||
return addAutomaton(tbi, h, nullptr);
|
||||
}
|
||||
|
||||
int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored,
|
||||
u32 exit_id, ReportID *remap) {
|
||||
NGHolder h;
|
||||
cloneHolder(h, anchored);
|
||||
clearReports(h);
|
||||
assert(in_degree(h.acceptEod, h) == 1);
|
||||
for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
|
||||
h[v].reports.clear();
|
||||
h[v].reports.insert(exit_id);
|
||||
}
|
||||
|
||||
return addAutomaton(tbi, h, remap);
|
||||
}
|
||||
|
||||
static
|
||||
void buildSimpleDfas(const RoseBuildImpl &tbi,
|
||||
vector<unique_ptr<raw_dfa>> *anchored_dfas) {
|
||||
/* we should have determinised all of these before so there should be no
|
||||
* chance of failure. */
|
||||
for (const auto &simple : tbi.anchored_simple) {
|
||||
set<u32> exit_ids;
|
||||
for (auto lit_id : simple.second) {
|
||||
exit_ids.insert(tbi.literal_info[lit_id].final_id);
|
||||
}
|
||||
NGHolder h;
|
||||
populate_holder(simple.first, exit_ids, &h);
|
||||
Automaton_Holder autom(h);
|
||||
assert(!autom.bad);
|
||||
unique_ptr<raw_dfa> rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX);
|
||||
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
||||
assert(!rv);
|
||||
rdfa->start_anchored = INIT_STATE;
|
||||
rdfa->start_floating = DEAD_STATE;
|
||||
rdfa->alpha_size = autom.alphasize;
|
||||
rdfa->alpha_remap = autom.alpha;
|
||||
anchored_dfas->push_back(move(rdfa));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill the given vector with all of the raw_dfas we need to compile into the
|
||||
* anchored matcher. Takes ownership of the input structures, clearing them
|
||||
* from RoseBuildImpl.
|
||||
*/
|
||||
static
|
||||
void getAnchoredDfas(RoseBuildImpl &tbi,
|
||||
vector<unique_ptr<raw_dfa>> *anchored_dfas) {
|
||||
// DFAs that already exist as raw_dfas.
|
||||
for (auto &anch_dfas : tbi.anchored_nfas) {
|
||||
for (auto &rdfa : anch_dfas.second) {
|
||||
anchored_dfas->push_back(move(rdfa));
|
||||
}
|
||||
}
|
||||
tbi.anchored_nfas.clear();
|
||||
|
||||
// DFAs we currently have as simple literals.
|
||||
if (!tbi.anchored_simple.empty()) {
|
||||
buildSimpleDfas(tbi, anchored_dfas);
|
||||
tbi.anchored_simple.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Builds our anchored DFAs into runtime NFAs.
|
||||
*
|
||||
* Constructs a vector of NFA structures and a vector of their start offsets
|
||||
* (number of dots removed from the prefix) from the raw_dfa structures given.
|
||||
*
|
||||
* Note: frees the raw_dfa structures on completion.
|
||||
*
|
||||
* \return Total bytes required for the complete anchored matcher.
|
||||
*/
|
||||
static
|
||||
size_t buildNfas(vector<unique_ptr<raw_dfa>> &anchored_dfas,
|
||||
vector<aligned_unique_ptr<NFA>> *nfas, vector<u32> *start_offset,
|
||||
const CompileContext &cc) {
|
||||
const size_t num_dfas = anchored_dfas.size();
|
||||
|
||||
nfas->reserve(num_dfas);
|
||||
start_offset->reserve(num_dfas);
|
||||
|
||||
size_t total_size = 0;
|
||||
|
||||
for (auto &rdfa : anchored_dfas) {
|
||||
u32 removed_dots = remove_leading_dots(*rdfa);
|
||||
start_offset->push_back(removed_dots);
|
||||
|
||||
minimize_hopcroft(*rdfa, cc.grey);
|
||||
|
||||
aligned_unique_ptr<NFA> nfa = mcclellanCompile(*rdfa, cc);
|
||||
if (!nfa) {
|
||||
assert(0);
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
|
||||
assert(nfa->length);
|
||||
total_size += ROUNDUP_CL(sizeof(anchored_matcher_info) + nfa->length);
|
||||
nfas->push_back(move(nfa));
|
||||
}
|
||||
|
||||
// We no longer need to keep the raw_dfa structures around.
|
||||
anchored_dfas.clear();
|
||||
|
||||
return total_size;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<void> buildAnchoredAutomataMatcher(RoseBuildImpl &tbi,
|
||||
size_t *asize) {
|
||||
const CompileContext &cc = tbi.cc;
|
||||
remapAnchoredReports(tbi);
|
||||
|
||||
if (tbi.anchored_nfas.empty() && tbi.anchored_simple.empty()) {
|
||||
DEBUG_PRINTF("empty\n");
|
||||
*asize = 0;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
vector<unique_ptr<raw_dfa>> anchored_dfas;
|
||||
getAnchoredDfas(tbi, &anchored_dfas);
|
||||
|
||||
mergeAnchoredDfas(anchored_dfas, tbi);
|
||||
|
||||
vector<aligned_unique_ptr<NFA>> nfas;
|
||||
vector<u32> start_offset; // start offset for each dfa (dots removed)
|
||||
size_t total_size = buildNfas(anchored_dfas, &nfas, &start_offset, cc);
|
||||
|
||||
if (total_size > cc.grey.limitRoseAnchoredSize) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
*asize = total_size;
|
||||
aligned_unique_ptr<void> atable = aligned_zmalloc_unique<void>(total_size);
|
||||
char *curr = (char *)atable.get();
|
||||
|
||||
u32 state_offset = 0;
|
||||
for (size_t i = 0; i < nfas.size(); i++) {
|
||||
const NFA *nfa = nfas[i].get();
|
||||
anchored_matcher_info *ami = (anchored_matcher_info *)curr;
|
||||
char *prev_curr = curr;
|
||||
|
||||
curr += sizeof(anchored_matcher_info);
|
||||
|
||||
memcpy(curr, nfa, nfa->length);
|
||||
curr += nfa->length;
|
||||
curr = ROUNDUP_PTR(curr, 64);
|
||||
|
||||
if (i + 1 == nfas.size()) {
|
||||
ami->next_offset = 0U;
|
||||
} else {
|
||||
ami->next_offset = verify_u32(curr - prev_curr);
|
||||
}
|
||||
|
||||
// State must be aligned.
|
||||
u32 align_req = state_alignment(*nfa);
|
||||
assert(align_req <= 2); // only DFAs.
|
||||
while (state_offset % align_req) {
|
||||
state_offset++;
|
||||
}
|
||||
|
||||
ami->state_offset = state_offset;
|
||||
state_offset += nfa->scratchStateSize;
|
||||
ami->anchoredMinDistance = start_offset[i];
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("success %zu\n", *asize);
|
||||
return atable;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
66
src/rose/rose_build_anchored.h
Normal file
66
src/rose/rose_build_anchored.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_ANCHORED
|
||||
#define ROSE_BUILD_ANCHORED
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "rose_build.h"
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
struct RoseEngine;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class RoseBuildImpl;
|
||||
struct Grey;
|
||||
|
||||
aligned_unique_ptr<void> buildAnchoredAutomataMatcher(RoseBuildImpl &tbi,
|
||||
size_t *asize);
|
||||
u32 anchoredStateSize(const void *atable);
|
||||
bool anchoredIsMulti(const RoseEngine &engine);
|
||||
|
||||
#define ANCHORED_FAIL 0
|
||||
#define ANCHORED_SUCCESS 1
|
||||
#define ANCHORED_REMAP 2
|
||||
|
||||
int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper,
|
||||
const std::map<NFAVertex, std::set<u32>> &reportMap);
|
||||
|
||||
int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored,
|
||||
u32 exit_id, ReportID *remap);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
4349
src/rose/rose_build_bytecode.cpp
Normal file
4349
src/rose/rose_build_bytecode.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2735
src/rose/rose_build_compile.cpp
Normal file
2735
src/rose/rose_build_compile.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1142
src/rose/rose_build_convert.cpp
Normal file
1142
src/rose/rose_build_convert.cpp
Normal file
File diff suppressed because it is too large
Load Diff
43
src/rose/rose_build_convert.h
Normal file
43
src/rose/rose_build_convert.h
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_CONVERT_H
|
||||
#define ROSE_BUILD_CONVERT_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseBuildImpl;
|
||||
|
||||
void convertFloodProneSuffixes(RoseBuildImpl &tbi);
|
||||
void convertBadLeaves(RoseBuildImpl &tbi);
|
||||
void convertPrefixToBounds(RoseBuildImpl &tbi);
|
||||
void convertAnchPrefixToBounds(RoseBuildImpl &tbi);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
633
src/rose/rose_build_dump.cpp
Normal file
633
src/rose/rose_build_dump.cpp
Normal file
@@ -0,0 +1,633 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "rose_build_dump.h"
|
||||
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "rose_build_impl.h"
|
||||
#include "rose/rose_dump.h"
|
||||
#include "rose_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfagraph/ng_dump.h"
|
||||
#include "som/slot_manager_dump.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <iomanip>
|
||||
#include <ostream>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
string to_string(nfa_kind k) {
|
||||
switch (k) {
|
||||
case NFA_PREFIX:
|
||||
return "p";
|
||||
case NFA_INFIX:
|
||||
return "i";
|
||||
case NFA_SUFFIX:
|
||||
return "s";
|
||||
case NFA_OUTFIX:
|
||||
return "o";
|
||||
case NFA_REV_PREFIX:
|
||||
return "r";
|
||||
}
|
||||
assert(0);
|
||||
return "?";
|
||||
}
|
||||
|
||||
// Get the RoseRole associated with a given vertex in the build graph from the
|
||||
// RoseEngine.
|
||||
static
|
||||
const RoseRole *getRoseRole(const RoseBuildImpl &build,
|
||||
const RoseEngine *engine, RoseVertex v) {
|
||||
if (!engine) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
u32 role_idx = build.g[v].role;
|
||||
if (role_idx == MO_INVALID_IDX) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const RoseRole *roles = getRoleTable(engine);
|
||||
return &roles[role_idx];
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class RoseGraphWriter {
|
||||
public:
|
||||
RoseGraphWriter(const RoseBuildImpl &b_in, const RoseEngine *t_in) :
|
||||
build(b_in), t(t_in) {
|
||||
for (const auto &m : build.ghost) {
|
||||
ghost.insert(m.second);
|
||||
}
|
||||
}
|
||||
|
||||
void operator() (ostream &os, const RoseVertex &v) const {
|
||||
const RoseGraph &g = build.g;
|
||||
|
||||
if (v == build.root) {
|
||||
os << "[label=\"<root>\"]";
|
||||
return;
|
||||
}
|
||||
|
||||
if (v == build.anchored_root) {
|
||||
os << "[label=\"<^>\"]";
|
||||
return;
|
||||
}
|
||||
|
||||
os << "[label=\"";
|
||||
os << "role=" << g[v].role << "[i" << g[v].idx <<"]\\n";
|
||||
|
||||
for (u32 lit_id : g[v].literals) {
|
||||
writeLiteral(os, lit_id);
|
||||
os << "\\n";
|
||||
}
|
||||
|
||||
os << "min_offset=" << g[v].min_offset;
|
||||
if (g[v].max_offset >= ROSE_BOUND_INF) {
|
||||
os << ", max_offset=inf";
|
||||
} else {
|
||||
os << ", max_offset=" << g[v].max_offset;
|
||||
}
|
||||
os << "\\n";
|
||||
|
||||
if (!g[v].reports.empty()) {
|
||||
if (g[v].eod_accept) {
|
||||
os << "\\nACCEPT_EOD";
|
||||
} else {
|
||||
os << "\\nACCEPT";
|
||||
}
|
||||
os << " (rep=" << as_string_list(g[v].reports) << ")";
|
||||
}
|
||||
|
||||
const RoseRole *r = getRoseRole(v);
|
||||
|
||||
if (g[v].suffix) {
|
||||
os << "\\nSUFFIX (TOP " << g[v].suffix.top;
|
||||
if (r) {
|
||||
assert(t);
|
||||
const NFA *n = (const NFA *)((const char *)t + r->suffixOffset);
|
||||
os << ", Q" << n->queueIndex;
|
||||
} else {
|
||||
// Can't dump the queue number, but we can identify the suffix.
|
||||
if (g[v].suffix.graph) {
|
||||
os << ", graph=" << g[v].suffix.graph.get()
|
||||
<< " " << to_string(g[v].suffix.graph->kind);
|
||||
}
|
||||
if (g[v].suffix.castle) {
|
||||
os << ", castle=" << g[v].suffix.castle.get();
|
||||
}
|
||||
if (g[v].suffix.rdfa) {
|
||||
os << ", dfa=" << g[v].suffix.rdfa.get();
|
||||
}
|
||||
if (g[v].suffix.haig) {
|
||||
os << ", haig=" << g[v].suffix.haig.get();
|
||||
}
|
||||
|
||||
}
|
||||
os << ")";
|
||||
}
|
||||
|
||||
if (!g[v].literals.empty()) {
|
||||
u32 id = *g[v].literals.begin();
|
||||
if (id < build.literal_info.size()
|
||||
&& build.literal_info[id].final_id != MO_INVALID_IDX
|
||||
&& (build.literal_info[id].final_id & LITERAL_DR_FLAG)) {
|
||||
os << "\\nDIRECT REPORT";
|
||||
}
|
||||
}
|
||||
|
||||
if (g[v].escapes.any()) {
|
||||
os << "\\nescapes=";
|
||||
describeClass(os, g[v].escapes, 5, CC_OUT_DOT);
|
||||
}
|
||||
if (ghost.find(v) != ghost.end()) {
|
||||
os << "\\nGHOST";
|
||||
}
|
||||
|
||||
if (g[v].left) {
|
||||
const char *roseKind =
|
||||
build.isRootSuccessor(v) ? "PREFIX" : "INFIX";
|
||||
os << "\\nROSE " << roseKind;
|
||||
os << " (";
|
||||
if (r) {
|
||||
os << "Q" << r->leftfixQueue << ", ";
|
||||
}
|
||||
|
||||
os << "report " << g[v].left.leftfix_report << ")";
|
||||
|
||||
if (g[v].left.graph) {
|
||||
os << " " << to_string(g[v].left.graph->kind);
|
||||
}
|
||||
}
|
||||
|
||||
os << "\"";
|
||||
|
||||
// Roles with a rose prefix get a colour.
|
||||
if (g[v].left) {
|
||||
os << " color=violetred ";
|
||||
}
|
||||
|
||||
// Our accepts get different colours.
|
||||
if (!g[v].reports.empty()) {
|
||||
os << " color=blue ";
|
||||
}
|
||||
if (g[v].suffix) {
|
||||
os << " color=forestgreen ";
|
||||
}
|
||||
|
||||
os << "]";
|
||||
}
|
||||
|
||||
void operator() (ostream &os, const RoseEdge &e) const {
|
||||
const RoseGraph &g = build.g;
|
||||
|
||||
// Render the bounds on this edge.
|
||||
u32 minBound = g[e].minBound;
|
||||
u32 maxBound = g[e].maxBound;
|
||||
|
||||
os << "[label=\"";
|
||||
if (minBound == 0 && maxBound == ROSE_BOUND_INF) {
|
||||
os << ".*";
|
||||
} else if (minBound == 1 && maxBound == ROSE_BOUND_INF) {
|
||||
os << ".+";
|
||||
} else {
|
||||
os << ".{" << minBound << ",";
|
||||
if (maxBound != ROSE_BOUND_INF) {
|
||||
os << maxBound;
|
||||
}
|
||||
os << "}";
|
||||
}
|
||||
|
||||
// If we lead to an infix, display which top we're using.
|
||||
RoseVertex v = target(e, g);
|
||||
if (g[v].left) {
|
||||
os << "\\nROSE TOP " << g[e].rose_top;
|
||||
}
|
||||
|
||||
switch (g[e].history) {
|
||||
case ROSE_ROLE_HISTORY_NONE:
|
||||
break;
|
||||
case ROSE_ROLE_HISTORY_ANCH:
|
||||
os << "\\nANCH history";
|
||||
break;
|
||||
case ROSE_ROLE_HISTORY_LAST_BYTE:
|
||||
os << "\\nLAST_BYTE history";
|
||||
break;
|
||||
case ROSE_ROLE_HISTORY_INVALID:
|
||||
os << "\\nINVALID history";
|
||||
break;
|
||||
}
|
||||
|
||||
os << "\"]";
|
||||
}
|
||||
|
||||
private:
|
||||
// Render the literal associated with a vertex.
|
||||
void writeLiteral(ostream &os, u32 id) const {
|
||||
os << "lit=" << id;
|
||||
if (id < build.literal_info.size()) {
|
||||
os << "/" << build.literal_info[id].final_id << " ";
|
||||
} else {
|
||||
os << "/nofinal ";
|
||||
}
|
||||
|
||||
if (contains(build.literals.right, id)) {
|
||||
const auto &lit = build.literals.right.at(id);
|
||||
os << '\'' << dotEscapeString(lit.s.get_string()) << '\'';
|
||||
if (lit.s.any_nocase()) {
|
||||
os << " (nocase)";
|
||||
}
|
||||
if (lit.delay) {
|
||||
os << " +" << lit.delay;
|
||||
}
|
||||
} else {
|
||||
os << "<unknown>";
|
||||
}
|
||||
}
|
||||
|
||||
const RoseRole *getRoseRole(RoseVertex v) const {
|
||||
return ue2::getRoseRole(build, t, v);
|
||||
}
|
||||
|
||||
set<RoseVertex> ghost;
|
||||
const RoseBuildImpl &build;
|
||||
const RoseEngine *t;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t,
|
||||
const char *filename) {
|
||||
const RoseBuildImpl &build = dynamic_cast<const RoseBuildImpl &>(build_base);
|
||||
|
||||
const Grey &grey = build.cc.grey;
|
||||
if (!grey.dumpFlags) {
|
||||
return;
|
||||
}
|
||||
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << filename;
|
||||
|
||||
|
||||
DEBUG_PRINTF("dumping graph to %s\n", ss.str().c_str());
|
||||
ofstream os(ss.str());
|
||||
|
||||
RoseGraphWriter writer(build, t);
|
||||
writeGraphviz(os, build.g, writer, get(&RoseVertexProps::idx, build.g));
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct CompareVertexRole {
|
||||
explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {}
|
||||
inline bool operator()(const RoseVertex &a, const RoseVertex &b) const {
|
||||
return g[a].role < g[b].role;
|
||||
}
|
||||
private:
|
||||
const RoseGraph &g;
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
void lit_graph_info(const RoseBuildImpl &build, const rose_literal_info &li,
|
||||
u32 *min_offset, bool *in_root_role) {
|
||||
*min_offset = ~0U;
|
||||
*in_root_role = false;
|
||||
for (auto v : li.vertices) {
|
||||
*in_root_role |= build.isRootSuccessor(v);
|
||||
|
||||
LIMIT_TO_AT_MOST(min_offset, build.g[v].min_offset);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) {
|
||||
const RoseGraph &g = build.g;
|
||||
|
||||
DEBUG_PRINTF("dumping literals\n");
|
||||
ofstream os(filename);
|
||||
|
||||
os << "ROSE LITERALS: a total of " << build.literals.right.size()
|
||||
<< " literals and " << num_vertices(g) << " roles." << endl << endl;
|
||||
|
||||
const auto depths = findDepths(build);
|
||||
|
||||
for (const auto &e : build.literals.right) {
|
||||
u32 id = e.first;
|
||||
const ue2_literal &s = e.second.s;
|
||||
const rose_literal_info &lit_info = build.literal_info[id];
|
||||
|
||||
switch (e.second.table) {
|
||||
case ROSE_ANCHORED:
|
||||
os << "ANCHORED";
|
||||
break;
|
||||
case ROSE_FLOATING:
|
||||
os << "FLOATING";
|
||||
break;
|
||||
case ROSE_EOD_ANCHORED:
|
||||
os << "EOD-ANCHORED";
|
||||
break;
|
||||
case ROSE_ANCHORED_SMALL_BLOCK:
|
||||
os << "SMALL-BLOCK";
|
||||
break;
|
||||
case ROSE_EVENT:
|
||||
os << "EVENT";
|
||||
break;
|
||||
}
|
||||
|
||||
os << " ID " << id << "/" << lit_info.final_id << ": \""
|
||||
<< escapeString(s.get_string()) << "\""
|
||||
<< " (len " << s.length() << ",";
|
||||
if (s.any_nocase()) {
|
||||
os << " nocase,";
|
||||
}
|
||||
if (lit_info.requires_benefits) {
|
||||
os << " benefits,";
|
||||
}
|
||||
|
||||
if (e.second.delay) {
|
||||
os << " delayed "<< e.second.delay << ",";
|
||||
}
|
||||
|
||||
os << " groups 0x" << hex << setw(16) << setfill('0')
|
||||
<< lit_info.group_mask << dec << ",";
|
||||
|
||||
if (lit_info.squash_group) {
|
||||
os << " squashes group,";
|
||||
}
|
||||
|
||||
u32 min_offset;
|
||||
bool in_root_role;
|
||||
lit_graph_info(build, lit_info, &min_offset, &in_root_role);
|
||||
os << " min offset " << min_offset;
|
||||
if (in_root_role) {
|
||||
os << " root literal";
|
||||
}
|
||||
|
||||
os << ") roles=" << lit_info.vertices.size() << endl;
|
||||
|
||||
if (!lit_info.delayed_ids.empty()) {
|
||||
os << " Children:";
|
||||
for (u32 d_id : lit_info.delayed_ids) {
|
||||
os << " " << d_id;
|
||||
}
|
||||
os << endl;
|
||||
}
|
||||
|
||||
// Temporary vector, so that we can sort the output by role.
|
||||
vector<RoseVertex> verts(lit_info.vertices.begin(),
|
||||
lit_info.vertices.end());
|
||||
sort(verts.begin(), verts.end(), CompareVertexRole(g));
|
||||
|
||||
for (RoseVertex v : verts) {
|
||||
// role info
|
||||
os << " Role " << g[v].role << ": depth=" << depths.at(v)
|
||||
<< ", groups=0x" << hex << setw(16) << setfill('0')
|
||||
<< g[v].groups << dec;
|
||||
|
||||
if (g[v].reports.empty()) {
|
||||
os << ", report=NONE";
|
||||
} else {
|
||||
os << ", report={" << as_string_list(g[v].reports) << "}";
|
||||
}
|
||||
|
||||
os << ", min_offset=" << g[v].min_offset;
|
||||
os << ", max_offset=" << g[v].max_offset << endl;
|
||||
// pred info
|
||||
for (const auto &ie : in_edges_range(v, g)) {
|
||||
os << " Predecessor role=";
|
||||
u32 predRole = g[source(ie, g)].role;
|
||||
if (predRole == MO_INVALID_IDX) {
|
||||
os << "ROOT";
|
||||
} else if (predRole == g[build.anchored_root].role) {
|
||||
os << "ANCHORED_ROOT";
|
||||
} else {
|
||||
os << predRole;
|
||||
}
|
||||
os << ": bounds [" << g[ie].minBound << ", ";
|
||||
if (g[ie].maxBound == ROSE_BOUND_INF) {
|
||||
os << "inf";
|
||||
} else {
|
||||
os << g[ie].maxBound;
|
||||
}
|
||||
os << "]" << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
os.close();
|
||||
}
|
||||
|
||||
template<class Iter>
|
||||
static
|
||||
string toHex(Iter i, const Iter &end) {
|
||||
ostringstream oss;
|
||||
for (; i != end; ++i) {
|
||||
u8 c = *i;
|
||||
oss << hex << setw(2) << setfill('0') << ((unsigned)c & 0xff);
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
static
|
||||
void dumpTestLiterals(const string &filename, const vector<hwlmLiteral> &lits) {
|
||||
ofstream of(filename.c_str());
|
||||
|
||||
for (const hwlmLiteral &lit : lits) {
|
||||
of << lit.id << "=";
|
||||
if (lit.nocase) {
|
||||
of << "!";
|
||||
}
|
||||
of << toHex(lit.s.begin(), lit.s.end());
|
||||
if (!lit.msk.empty()) {
|
||||
of << " " << toHex(lit.msk.begin(), lit.msk.end());
|
||||
of << " " << toHex(lit.cmp.begin(), lit.cmp.end());
|
||||
}
|
||||
|
||||
of << endl;
|
||||
}
|
||||
|
||||
of.close();
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct LongerThanLimit {
|
||||
explicit LongerThanLimit(size_t len) : max_len(len) {}
|
||||
bool operator()(const hwlmLiteral &lit) const {
|
||||
return lit.s.length() > max_len;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_len;
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
|
||||
|
||||
vector<hwlmLiteral> lits;
|
||||
|
||||
fillHamsterLiteralList(build, ROSE_ANCHORED, &lits);
|
||||
dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits);
|
||||
|
||||
lits.clear();
|
||||
fillHamsterLiteralList(build, ROSE_FLOATING, &lits);
|
||||
dumpTestLiterals(base + "rose_float_test_literals.txt", lits);
|
||||
|
||||
lits.clear();
|
||||
fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, &lits);
|
||||
dumpTestLiterals(base + "rose_eod_test_literals.txt", lits);
|
||||
|
||||
lits.clear();
|
||||
fillHamsterLiteralList(build, ROSE_FLOATING, &lits);
|
||||
fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, &lits);
|
||||
lits.erase(remove_if(lits.begin(), lits.end(),
|
||||
LongerThanLimit(ROSE_SMALL_BLOCK_LEN)),
|
||||
lits.end());
|
||||
dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits);
|
||||
}
|
||||
|
||||
static
|
||||
CharReach bitvectorToReach(const u8 *reach) {
|
||||
CharReach cr;
|
||||
|
||||
for (size_t i = 0; i < 256; i++) {
|
||||
if (reach[i / 8] & (1U << (i % 8))) {
|
||||
cr.set(i);
|
||||
|
||||
}
|
||||
}
|
||||
return cr;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpRoseLookaround(const RoseBuildImpl &build, const RoseEngine *t,
|
||||
const Grey &grey, const string &filename) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << filename;
|
||||
ofstream os(ss.str());
|
||||
|
||||
const RoseGraph &g = build.g;
|
||||
|
||||
const u8 *base = (const u8 *)t;
|
||||
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
|
||||
const u8 *reach_base = base + t->lookaroundReachOffset;
|
||||
|
||||
for (RoseVertex v : vertices_range(g)) {
|
||||
const RoseRole *role = getRoseRole(build, t, v);
|
||||
if (!role || role->lookaroundIndex == MO_INVALID_IDX) {
|
||||
continue;
|
||||
}
|
||||
|
||||
os << "Role " << g[v].role << endl;
|
||||
os << " literals: " << as_string_list(g[v].literals) << endl;
|
||||
os << " lookaround: index=" << role->lookaroundIndex
|
||||
<< ", count=" << role->lookaroundCount << endl;
|
||||
|
||||
const s8 *look = look_base + role->lookaroundIndex;
|
||||
const s8 *look_end = look + role->lookaroundCount;
|
||||
const u8 *reach =
|
||||
reach_base + role->lookaroundIndex * REACH_BITVECTOR_LEN;
|
||||
|
||||
for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) {
|
||||
os << " " << std::setw(4) << std::setfill(' ') << int{*look}
|
||||
<< ": ";
|
||||
describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
|
||||
os << endl;
|
||||
}
|
||||
|
||||
os << endl;
|
||||
}
|
||||
|
||||
os.close();
|
||||
}
|
||||
|
||||
void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
|
||||
const Grey &grey) {
|
||||
if (!grey.dumpFlags) {
|
||||
return;
|
||||
}
|
||||
|
||||
const RoseBuildImpl &build = dynamic_cast<const RoseBuildImpl&>(build_base);
|
||||
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "rose.txt";
|
||||
|
||||
FILE *f = fopen(ss.str().c_str(), "w");
|
||||
|
||||
if (!t) {
|
||||
fprintf(f, "<< no rose >>\n");
|
||||
fclose(f);
|
||||
return;
|
||||
}
|
||||
|
||||
// Dump Rose table info
|
||||
roseDumpText(t, f);
|
||||
|
||||
fclose(f);
|
||||
|
||||
roseDumpComponents(t, false, grey.dumpPath);
|
||||
|
||||
// Graph.
|
||||
dumpRoseGraph(build, t, "rose.dot");
|
||||
|
||||
// Literals.
|
||||
ss.str("");
|
||||
ss.clear();
|
||||
ss << grey.dumpPath << "rose_literals.txt";
|
||||
dumpRoseLiterals(build, ss.str().c_str());
|
||||
dumpRoseTestLiterals(build, grey.dumpPath);
|
||||
|
||||
f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w");
|
||||
roseDumpStructRaw(t, f);
|
||||
fclose(f);
|
||||
|
||||
// Lookaround tables.
|
||||
dumpRoseLookaround(build, t, grey, "rose_lookaround.txt");
|
||||
}
|
||||
|
||||
} // namespace ue2
|
60
src/rose/rose_build_dump.h
Normal file
60
src/rose/rose_build_dump.h
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_DUMP_H
|
||||
#define ROSE_BUILD_DUMP_H
|
||||
|
||||
struct RoseEngine;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseBuild;
|
||||
struct Grey;
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
// Dump the Rose graph in graphviz representation.
|
||||
void dumpRoseGraph(const RoseBuild &build, const RoseEngine *t,
|
||||
const char *filename);
|
||||
|
||||
void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
|
||||
const Grey &grey);
|
||||
#else
|
||||
|
||||
static UNUSED
|
||||
void dumpRoseGraph(const RoseBuild &, const RoseEngine *, const char *) {
|
||||
}
|
||||
|
||||
static UNUSED
|
||||
void dumpRose(const RoseBuild &, const RoseEngine *, const Grey &) {
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
570
src/rose/rose_build_impl.h
Normal file
570
src/rose/rose_build_impl.h
Normal file
@@ -0,0 +1,570 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_IMPL_H_17E20A3C6935D6
|
||||
#define ROSE_BUILD_IMPL_H_17E20A3C6935D6
|
||||
|
||||
#include "rose_build.h"
|
||||
#include "rose_build_util.h"
|
||||
#include "rose_graph.h"
|
||||
#include "nfa/mpvcompile.h"
|
||||
#include "nfa/goughcompile.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "nfagraph/ng_revacc.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/queue_index_factory.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <deque>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <boost/bimap.hpp>
|
||||
#include <boost/functional/hash/hash.hpp>
|
||||
|
||||
struct RoseEngine;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define ROSE_GROUPS_MAX 64
|
||||
|
||||
struct BoundaryReports;
|
||||
struct CastleProto;
|
||||
struct CompileContext;
|
||||
struct hwlmLiteral;
|
||||
class ReportManager;
|
||||
class SomSlotManager;
|
||||
|
||||
struct suffix_id {
|
||||
suffix_id(const RoseSuffixInfo &in)
|
||||
: g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()),
|
||||
h(in.haig.get()), dfa_min_width(in.dfa_min_width),
|
||||
dfa_max_width(in.dfa_max_width) {
|
||||
assert(!g || g->kind == NFA_SUFFIX);
|
||||
}
|
||||
bool operator==(const suffix_id &b) const {
|
||||
bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
|
||||
assert(!rv || dfa_min_width == b.dfa_min_width);
|
||||
assert(!rv || dfa_max_width == b.dfa_max_width);
|
||||
return rv;
|
||||
}
|
||||
bool operator!=(const suffix_id &b) const { return !(*this == b); }
|
||||
bool operator<(const suffix_id &b) const {
|
||||
const suffix_id &a = *this;
|
||||
ORDER_CHECK(g);
|
||||
ORDER_CHECK(c);
|
||||
ORDER_CHECK(d);
|
||||
ORDER_CHECK(h);
|
||||
return false;
|
||||
}
|
||||
|
||||
NGHolder *graph() {
|
||||
if (!d && !h) {
|
||||
assert(dfa_min_width == depth(0));
|
||||
assert(dfa_max_width == depth::infinity());
|
||||
}
|
||||
return g;
|
||||
}
|
||||
const NGHolder *graph() const {
|
||||
if (!d && !h) {
|
||||
assert(dfa_min_width == depth(0));
|
||||
assert(dfa_max_width == depth::infinity());
|
||||
}
|
||||
return g;
|
||||
}
|
||||
CastleProto *castle() {
|
||||
if (!d && !h) {
|
||||
assert(dfa_min_width == depth(0));
|
||||
assert(dfa_max_width == depth::infinity());
|
||||
}
|
||||
return c;
|
||||
}
|
||||
const CastleProto *castle() const {
|
||||
if (!d && !h) {
|
||||
assert(dfa_min_width == depth(0));
|
||||
assert(dfa_max_width == depth::infinity());
|
||||
}
|
||||
return c;
|
||||
}
|
||||
raw_som_dfa *haig() { return h; }
|
||||
const raw_som_dfa *haig() const { return h; }
|
||||
raw_dfa *dfa() { return d; }
|
||||
const raw_dfa *dfa() const { return d; }
|
||||
|
||||
size_t hash() const;
|
||||
|
||||
private:
|
||||
NGHolder *g;
|
||||
CastleProto *c;
|
||||
raw_dfa *d;
|
||||
raw_som_dfa *h;
|
||||
depth dfa_min_width;
|
||||
depth dfa_max_width;
|
||||
|
||||
friend depth findMinWidth(const suffix_id &s);
|
||||
friend depth findMaxWidth(const suffix_id &s);
|
||||
};
|
||||
|
||||
std::set<ReportID> all_reports(const suffix_id &s);
|
||||
std::set<u32> all_tops(const suffix_id &s);
|
||||
bool has_eod_accepts(const suffix_id &s);
|
||||
bool has_non_eod_accepts(const suffix_id &s);
|
||||
depth findMinWidth(const suffix_id &s);
|
||||
depth findMaxWidth(const suffix_id &s);
|
||||
size_t hash_value(const suffix_id &s);
|
||||
|
||||
/** \brief represents an engine to the left of a rose role */
|
||||
struct left_id {
|
||||
left_id(const LeftEngInfo &in)
|
||||
: g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
|
||||
h(in.haig.get()), dfa_min_width(in.dfa_min_width),
|
||||
dfa_max_width(in.dfa_max_width) {
|
||||
assert(!g || !generates_callbacks(*g));
|
||||
}
|
||||
bool operator==(const left_id &b) const {
|
||||
bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
|
||||
assert(!rv || dfa_min_width == b.dfa_min_width);
|
||||
assert(!rv || dfa_max_width == b.dfa_max_width);
|
||||
return rv;
|
||||
}
|
||||
bool operator!=(const left_id &b) const { return !(*this == b); }
|
||||
bool operator<(const left_id &b) const {
|
||||
const left_id &a = *this;
|
||||
ORDER_CHECK(g);
|
||||
ORDER_CHECK(c);
|
||||
ORDER_CHECK(d);
|
||||
ORDER_CHECK(h);
|
||||
return false;
|
||||
}
|
||||
|
||||
NGHolder *graph() {
|
||||
if (!d && !h) {
|
||||
assert(dfa_min_width == depth(0));
|
||||
assert(dfa_max_width == depth::infinity());
|
||||
}
|
||||
return g;
|
||||
}
|
||||
const NGHolder *graph() const {
|
||||
if (!d && !h) {
|
||||
assert(dfa_min_width == depth(0));
|
||||
assert(dfa_max_width == depth::infinity());
|
||||
}
|
||||
return g;
|
||||
}
|
||||
CastleProto *castle() {
|
||||
if (!d && !h) {
|
||||
assert(dfa_min_width == depth(0));
|
||||
assert(dfa_max_width == depth::infinity());
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
const CastleProto *castle() const {
|
||||
if (!d && !h) {
|
||||
assert(dfa_min_width == depth(0));
|
||||
assert(dfa_max_width == depth::infinity());
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
raw_som_dfa *haig() { return h; }
|
||||
const raw_som_dfa *haig() const { return h; }
|
||||
raw_dfa *dfa() { return d; }
|
||||
const raw_dfa *dfa() const { return d; }
|
||||
|
||||
size_t hash() const;
|
||||
|
||||
private:
|
||||
NGHolder *g;
|
||||
CastleProto *c;
|
||||
raw_dfa *d;
|
||||
raw_som_dfa *h;
|
||||
depth dfa_min_width;
|
||||
depth dfa_max_width;
|
||||
|
||||
friend bool isAnchored(const left_id &r);
|
||||
friend depth findMinWidth(const left_id &r);
|
||||
friend depth findMaxWidth(const left_id &r);
|
||||
};
|
||||
|
||||
std::set<u32> all_tops(const left_id &r);
|
||||
bool isAnchored(const left_id &r);
|
||||
depth findMinWidth(const left_id &r);
|
||||
depth findMaxWidth(const left_id &r);
|
||||
u32 num_tops(const left_id &r);
|
||||
size_t hash_value(const left_id &r);
|
||||
|
||||
struct rose_literal_info {
|
||||
ue2::flat_set<u32> delayed_ids;
|
||||
ue2::flat_set<RoseVertex> vertices;
|
||||
rose_group group_mask = 0;
|
||||
u32 undelayed_id = MO_INVALID_IDX;
|
||||
u32 final_id = MO_INVALID_IDX; /* id reported by fdr */
|
||||
bool squash_group = false;
|
||||
bool requires_explode = false;
|
||||
bool requires_benefits = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Main literal struct used at Rose build time. Numeric literal IDs
|
||||
* used at build time point at these (via the RoseBuildImpl::literals map).
|
||||
*/
|
||||
struct rose_literal_id {
|
||||
rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in,
|
||||
u32 delay_in)
|
||||
: s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {}
|
||||
|
||||
rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in,
|
||||
const std::vector<u8> &cmp_in, rose_literal_table table_in,
|
||||
u32 delay_in);
|
||||
|
||||
ue2_literal s;
|
||||
std::vector<u8> msk;
|
||||
std::vector<u8> cmp;
|
||||
rose_literal_table table;
|
||||
u32 delay;
|
||||
u32 distinctiveness;
|
||||
|
||||
size_t elength(void) const { return s.length() + delay; }
|
||||
};
|
||||
|
||||
static inline
|
||||
bool operator<(const rose_literal_id &a, const rose_literal_id &b) {
|
||||
ORDER_CHECK(distinctiveness);
|
||||
ORDER_CHECK(table);
|
||||
ORDER_CHECK(s);
|
||||
ORDER_CHECK(delay);
|
||||
ORDER_CHECK(msk);
|
||||
ORDER_CHECK(cmp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Literals are stored in a map from (string, nocase) -> ID
|
||||
typedef boost::bimap<rose_literal_id, u32> RoseLiteralMap;
|
||||
|
||||
struct simple_anchored_info {
|
||||
simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit)
|
||||
: min_bound(min_b), max_bound(max_b), literal(lit) {}
|
||||
u32 min_bound; /**< min number of characters required before literal can
|
||||
* start matching */
|
||||
u32 max_bound; /**< max number of characters allowed before literal can
|
||||
* start matching */
|
||||
ue2_literal literal;
|
||||
};
|
||||
|
||||
static really_inline
|
||||
bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) {
|
||||
ORDER_CHECK(min_bound);
|
||||
ORDER_CHECK(max_bound);
|
||||
ORDER_CHECK(literal);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct OutfixInfo { /* TODO: poly */
|
||||
OutfixInfo() {}
|
||||
explicit OutfixInfo(std::unique_ptr<raw_dfa> r) : rdfa(std::move(r)) {
|
||||
assert(rdfa);
|
||||
}
|
||||
explicit OutfixInfo(std::unique_ptr<NGHolder> h) : holder(std::move(h)) {
|
||||
assert(holder);
|
||||
}
|
||||
explicit OutfixInfo(std::unique_ptr<raw_som_dfa> r) : haig(std::move(r)) {
|
||||
assert(haig);
|
||||
}
|
||||
|
||||
u32 get_queue(QueueIndexFactory &qif);
|
||||
|
||||
bool is_nonempty_mpv() const {
|
||||
return !puffettes.empty() || !triggered_puffettes.empty();
|
||||
}
|
||||
|
||||
bool is_dead() const {
|
||||
return !holder && !rdfa && !haig && puffettes.empty() &&
|
||||
triggered_puffettes.empty();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
holder.reset();
|
||||
rdfa.reset();
|
||||
haig.reset();
|
||||
puffettes.clear();
|
||||
triggered_puffettes.clear();
|
||||
assert(is_dead());
|
||||
}
|
||||
|
||||
std::unique_ptr<NGHolder> holder;
|
||||
std::unique_ptr<raw_dfa> rdfa;
|
||||
std::unique_ptr<raw_som_dfa> haig;
|
||||
std::vector<raw_puff> puffettes;
|
||||
std::vector<raw_puff> triggered_puffettes;
|
||||
|
||||
/** Once the outfix has been built into an engine, this will point to it. */
|
||||
NFA *nfa = nullptr;
|
||||
|
||||
RevAccInfo rev_info;
|
||||
u32 maxBAWidth = 0; //!< max bi-anchored width
|
||||
depth minWidth = depth::infinity();
|
||||
depth maxWidth = 0;
|
||||
u64a maxOffset = 0;
|
||||
bool chained = false;
|
||||
bool in_sbmatcher = false; //!< handled by small-block matcher.
|
||||
|
||||
private:
|
||||
u32 queue = ~0U;
|
||||
};
|
||||
|
||||
std::set<ReportID> all_reports(const OutfixInfo &outfix);
|
||||
|
||||
// Concrete impl class
|
||||
class RoseBuildImpl : public RoseBuild {
|
||||
public:
|
||||
RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm,
|
||||
const CompileContext &cc, const BoundaryReports &boundary);
|
||||
|
||||
~RoseBuildImpl() override;
|
||||
|
||||
// Adds a single literal.
|
||||
void add(bool anchored, bool eod, const ue2_literal &lit,
|
||||
const ue2::flat_set<ReportID> &ids) override;
|
||||
|
||||
bool addRose(const RoseInGraph &ig, bool prefilter,
|
||||
bool finalChance = false) override;
|
||||
bool addSombeRose(const RoseInGraph &ig) override;
|
||||
|
||||
bool addOutfix(const NGHolder &h) override;
|
||||
bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override;
|
||||
bool addOutfix(const raw_puff &rp) override;
|
||||
|
||||
bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override;
|
||||
|
||||
// Returns true if we were able to add it as a mask
|
||||
bool add(bool anchored, const std::vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports) override;
|
||||
|
||||
bool addAnchoredAcyclic(const NGHolder &graph) override;
|
||||
|
||||
bool validateMask(const std::vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports, bool anchored,
|
||||
bool eod) const override;
|
||||
void addMask(const std::vector<CharReach> &mask,
|
||||
const ue2::flat_set<ReportID> &reports, bool anchored,
|
||||
bool eod) override;
|
||||
|
||||
// Construct a runtime implementation.
|
||||
aligned_unique_ptr<RoseEngine> buildRose(u32 minWidth) override;
|
||||
aligned_unique_ptr<RoseEngine> buildFinalEngine(u32 minWidth);
|
||||
|
||||
void setSom() override { hasSom = true; }
|
||||
|
||||
std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override;
|
||||
|
||||
bool hasEodSideLink() const;
|
||||
|
||||
// Find the maximum bound on the edges to this vertex's successors.
|
||||
u32 calcSuccMaxBound(RoseVertex u) const;
|
||||
|
||||
// Assign roles to groups, writing the groups bitset into each role in the
|
||||
// graph.
|
||||
void assignGroupsToRoles();
|
||||
|
||||
/* Returns the ID of the given literal in the literal map, adding it if
|
||||
* necessary. */
|
||||
u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table);
|
||||
|
||||
// Variant with msk/cmp.
|
||||
u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk,
|
||||
const std::vector<u8> &cmp, u32 delay,
|
||||
rose_literal_table table);
|
||||
|
||||
bool hasLiteral(const ue2_literal &s, rose_literal_table table) const;
|
||||
|
||||
u32 getNewLiteralId(void);
|
||||
|
||||
void removeVertices(const std::vector<RoseVertex> &dead);
|
||||
|
||||
// Is the Rose anchored?
|
||||
bool hasNoFloatingRoots() const;
|
||||
bool hasDirectReports() const;
|
||||
|
||||
RoseVertex cloneVertex(RoseVertex v);
|
||||
|
||||
u32 calcHistoryRequired() const;
|
||||
|
||||
rose_group getInitialGroups() const;
|
||||
rose_group getSuccGroups(RoseVertex start) const;
|
||||
rose_group getGroups(RoseVertex v) const;
|
||||
|
||||
bool hasDelayedLiteral(RoseVertex v) const;
|
||||
bool hasDelayPred(RoseVertex v) const;
|
||||
bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const;
|
||||
bool hasAnchoredTablePred(RoseVertex v) const;
|
||||
|
||||
void assignGroupsToLiterals(void);
|
||||
|
||||
// Is the given vertex a successor of either root or anchored_root?
|
||||
bool isRootSuccessor(const RoseVertex &v) const;
|
||||
/* Is the given vertex a successor of something other than root or
|
||||
* anchored_root? */
|
||||
bool isNonRootSuccessor(const RoseVertex &v) const;
|
||||
|
||||
bool isDirectReport(u32 id) const;
|
||||
bool isDelayed(u32 id) const;
|
||||
bool hasDirectFinalId(u32 id) const;
|
||||
bool hasDirectFinalId(RoseVertex v) const;
|
||||
bool hasFinalId(u32 id) const;
|
||||
|
||||
bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored
|
||||
* table */
|
||||
bool isFloating(RoseVertex v) const; /* true iff has literal in floating
|
||||
* table */
|
||||
bool isInETable(RoseVertex v) const; /* true iff has literal in eod
|
||||
* table */
|
||||
|
||||
size_t maxLiteralLen(RoseVertex v) const;
|
||||
size_t minLiteralLen(RoseVertex v) const;
|
||||
|
||||
// max overlap considered for every pair (ulit, vlit).
|
||||
size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const;
|
||||
|
||||
void renumberVertices(void);
|
||||
|
||||
bool isPseudoStar(const RoseEdge &e) const;
|
||||
bool isPseudoStarOrFirstOnly(const RoseEdge &e) const;
|
||||
bool hasOnlyPseudoStarInEdges(RoseVertex v) const;
|
||||
|
||||
bool isAnyStart(const RoseVertex &v) const {
|
||||
return v == root || v == anchored_root;
|
||||
}
|
||||
|
||||
bool isVirtualVertex(const RoseVertex &v) const {
|
||||
return g[v].eod_accept || isAnyStart(v);
|
||||
}
|
||||
|
||||
void handleMixedSensitivity(void);
|
||||
|
||||
void findTransientLeftfixes(void);
|
||||
|
||||
const CompileContext &cc;
|
||||
RoseGraph g;
|
||||
const RoseVertex root;
|
||||
const RoseVertex anchored_root;
|
||||
RoseLiteralMap literals;
|
||||
std::map<RoseVertex, RoseVertex> ghost;
|
||||
size_t vertexIndex;
|
||||
ReportID getNewNfaReport() override {
|
||||
return next_nfa_report++;
|
||||
}
|
||||
std::deque<rose_literal_info> literal_info;
|
||||
u32 delay_base_id;
|
||||
bool hasSom; //!< at least one pattern requires SOM.
|
||||
std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas;
|
||||
std::map<simple_anchored_info, std::set<u32>> anchored_simple;
|
||||
std::map<u32, std::set<u32> > group_to_literal;
|
||||
u32 group_weak_end;
|
||||
u32 group_end;
|
||||
|
||||
std::map<CharReach, std::set<RoseVertex> > side_squash_roles;
|
||||
|
||||
u32 anchored_base_id;
|
||||
|
||||
u32 nonbenefits_base_id;
|
||||
u32 ematcher_region_size; /**< number of bytes the eod table runs over */
|
||||
|
||||
/** \brief Mapping from anchored literal ID to the original literal suffix
|
||||
* present when the literal was added to the literal matcher. Used for
|
||||
* overlap calculation in history assignment. */
|
||||
std::map<u32, rose_literal_id> anchoredLitSuffix;
|
||||
|
||||
std::map<u32, std::set<u32> > final_id_to_literal; /* final literal id to
|
||||
* literal id */
|
||||
|
||||
unordered_set<left_id> transient;
|
||||
unordered_map<left_id, rose_group> rose_squash_masks;
|
||||
|
||||
std::vector<OutfixInfo> outfixes;
|
||||
|
||||
/** \brief MPV outfix entry. Null if not used, and moved into the outfixes
|
||||
* list before we start building the bytecode (at which point it is set to
|
||||
* null again). */
|
||||
std::unique_ptr<OutfixInfo> mpv_outfix = nullptr;
|
||||
|
||||
bool floating_direct_report;
|
||||
|
||||
u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX.
|
||||
|
||||
u32 max_rose_anchored_floating_overlap;
|
||||
|
||||
/** \brief Flattened list of report IDs for multi-direct reports, indexed
|
||||
* by MDR final_id. */
|
||||
std::vector<ReportID> mdr_reports;
|
||||
|
||||
QueueIndexFactory qif;
|
||||
ReportManager &rm;
|
||||
SomSlotManager &ssm;
|
||||
const BoundaryReports &boundary;
|
||||
|
||||
private:
|
||||
ReportID next_nfa_report;
|
||||
};
|
||||
|
||||
// Free functions, in rose_build_misc.cpp
|
||||
|
||||
bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);
|
||||
bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v);
|
||||
|
||||
size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b);
|
||||
void setReportId(NGHolder &g, ReportID id);
|
||||
|
||||
#ifndef NDEBUG
|
||||
bool roseHasTops(const RoseGraph &g, RoseVertex v);
|
||||
#endif
|
||||
|
||||
u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
|
||||
|
||||
// Function that operates on a msk/cmp pair and a literal, as used in
|
||||
// hwlmLiteral, and zeroes msk elements that don't add any power to the
|
||||
// literal.
|
||||
void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
|
||||
std::vector<u8> &cmp);
|
||||
|
||||
void fillHamsterLiteralList(const RoseBuildImpl &tbi, rose_literal_table table,
|
||||
std::vector<hwlmLiteral> *hl);
|
||||
|
||||
// Find the minimum depth in hops of each role. Note that a role may be
|
||||
// accessible from both the root and the anchored root.
|
||||
std::map<RoseVertex, u32> findDepths(const RoseBuildImpl &build);
|
||||
|
||||
#ifndef NDEBUG
|
||||
bool canImplementGraphs(const RoseBuildImpl &tbi);
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif /* ROSE_BUILD_IMPL_H_17E20A3C6935D6 */
|
325
src/rose/rose_build_infix.cpp
Normal file
325
src/rose/rose_build_infix.cpp
Normal file
@@ -0,0 +1,325 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "rose/rose_build_infix.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "nfa/castlecompile.h"
|
||||
#include "nfagraph/ng_dump.h"
|
||||
#include "nfagraph/ng_width.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "rose/rose_build_impl.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool couldEndLiteral(const ue2_literal &s, NFAVertex initial,
|
||||
const NGHolder &h) {
|
||||
ue2::flat_set<NFAVertex> curr, next;
|
||||
curr.insert(initial);
|
||||
|
||||
for (auto it = s.rbegin(), ite = s.rend(); it != ite; ++it) {
|
||||
const CharReach &cr_s = *it;
|
||||
bool matched = false;
|
||||
next.clear();
|
||||
|
||||
for (auto v : curr) {
|
||||
if (v == h.start) {
|
||||
// We can't see what we had before the start, so we must assume
|
||||
// the literal could overlap with it.
|
||||
return true;
|
||||
}
|
||||
const CharReach &cr_v = h[v].char_reach;
|
||||
if (overlaps(cr_v, cr_s)) {
|
||||
insert(&next, inv_adjacent_vertices(v, h));
|
||||
matched = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!matched) {
|
||||
return false;
|
||||
}
|
||||
|
||||
curr.swap(next);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void contractVertex(NGHolder &g, NFAVertex v,
|
||||
ue2::unordered_set<pair<NFAVertex, NFAVertex>> &all_edges) {
|
||||
for (auto u : inv_adjacent_vertices_range(v, g)) {
|
||||
if (u == v) {
|
||||
continue; // self-edge
|
||||
}
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w == v) {
|
||||
continue; // self-edge
|
||||
}
|
||||
|
||||
// Construct edge (u, v) only if it doesn't already exist. We use
|
||||
// the all_edges container here, as checking existence inside the
|
||||
// graph is expensive when u or v have large degree.
|
||||
if (all_edges.emplace(u, w).second) {
|
||||
add_edge(u, w, g);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note that edges to/from v will remain in all_edges.
|
||||
clear_vertex(v, g);
|
||||
}
|
||||
|
||||
static
|
||||
u32 findMaxInfixMatches(const NGHolder &h, const set<ue2_literal> &lits) {
|
||||
DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size());
|
||||
//dumpGraph("infix.dot", h.g);
|
||||
|
||||
if (!onlyOneTop(h)) {
|
||||
DEBUG_PRINTF("more than one top!n");
|
||||
return NO_MATCH_LIMIT;
|
||||
}
|
||||
|
||||
// Indices of vertices that could terminate any of the literals in 'lits'.
|
||||
set<u32> terms;
|
||||
|
||||
for (const auto &s : lits) {
|
||||
DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
|
||||
if (s.empty()) {
|
||||
// Likely an anchored case, be conservative here.
|
||||
return NO_MATCH_LIMIT;
|
||||
}
|
||||
|
||||
for (auto v : vertices_range(h)) {
|
||||
if (is_special(v, h)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (couldEndLiteral(s, v, h)) {
|
||||
u32 idx = h[v].index;
|
||||
DEBUG_PRINTF("vertex %u could terminate lit\n", idx);
|
||||
terms.insert(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (terms.empty()) {
|
||||
DEBUG_PRINTF("literals cannot match inside infix\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
NGHolder g;
|
||||
cloneHolder(g, h);
|
||||
vector<NFAVertex> dead;
|
||||
|
||||
// The set of all edges in the graph is used for existence checks in contractVertex.
|
||||
ue2::unordered_set<pair<NFAVertex, NFAVertex>> all_edges;
|
||||
for (const auto &e : edges_range(g)) {
|
||||
all_edges.emplace(source(e, g), target(e, g));
|
||||
}
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (is_special(v, g)) {
|
||||
continue;
|
||||
}
|
||||
if (contains(terms, g[v].index)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
contractVertex(g, v, all_edges);
|
||||
dead.push_back(v);
|
||||
}
|
||||
|
||||
remove_vertices(dead, g);
|
||||
//dumpGraph("relaxed.dot", g.g);
|
||||
|
||||
depth maxWidth = findMaxWidth(g);
|
||||
DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
|
||||
assert(maxWidth.is_reachable());
|
||||
|
||||
if (maxWidth.is_infinite()) {
|
||||
// Cycle detected, so we can likely squeeze an unlimited number of
|
||||
// matches into this graph.
|
||||
return NO_MATCH_LIMIT;
|
||||
}
|
||||
|
||||
assert(terms.size() >= maxWidth);
|
||||
return maxWidth;
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct ReachMismatch {
|
||||
explicit ReachMismatch(const CharReach &cr_in) : cr(cr_in) {}
|
||||
bool operator()(const CharReach &a) const { return !overlaps(cr, a); }
|
||||
|
||||
private:
|
||||
CharReach cr;
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
u32 findMaxInfixMatches(const CastleProto &castle,
|
||||
const set<ue2_literal> &lits) {
|
||||
DEBUG_PRINTF("castle=%p, %zu literals\n", &castle, lits.size());
|
||||
|
||||
if (castle.repeats.size() > 1) {
|
||||
DEBUG_PRINTF("more than one top!\n");
|
||||
return NO_MATCH_LIMIT;
|
||||
}
|
||||
|
||||
assert(!castle.repeats.empty());
|
||||
const PureRepeat &pr = castle.repeats.begin()->second;
|
||||
DEBUG_PRINTF("repeat=%s reach=%s\n", pr.bounds.str().c_str(),
|
||||
describeClass(pr.reach).c_str());
|
||||
|
||||
size_t max_count = 0;
|
||||
|
||||
for (const auto &s : lits) {
|
||||
DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
|
||||
if (s.empty()) {
|
||||
// Likely an anchored case, be conservative here.
|
||||
return NO_MATCH_LIMIT;
|
||||
}
|
||||
|
||||
size_t count = 0;
|
||||
|
||||
auto f = find_if(s.rbegin(), s.rend(), ReachMismatch(pr.reach));
|
||||
|
||||
if (f == s.rbegin()) {
|
||||
DEBUG_PRINTF("lit can't terminate inside infix\n");
|
||||
count = 0;
|
||||
} else if (f != s.rend()) {
|
||||
size_t suffix_len = distance(s.rbegin(), f);
|
||||
DEBUG_PRINTF("suffix of len %zu matches at start\n", suffix_len);
|
||||
if (pr.bounds.max.is_finite()) {
|
||||
count = min(suffix_len, (size_t)pr.bounds.max);
|
||||
} else {
|
||||
count = suffix_len;
|
||||
}
|
||||
} else {
|
||||
DEBUG_PRINTF("whole lit can match inside infix (repeatedly)\n");
|
||||
if (pr.bounds.max.is_finite()) {
|
||||
count = pr.bounds.max;
|
||||
} else {
|
||||
DEBUG_PRINTF("inf bound\n");
|
||||
return NO_MATCH_LIMIT;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("count=%zu\n", count);
|
||||
max_count = max(max_count, count);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("max_count %zu\n", max_count);
|
||||
|
||||
if (max_count > NO_MATCH_LIMIT) {
|
||||
assert(0); // This would be a surprise.
|
||||
return NO_MATCH_LIMIT;
|
||||
}
|
||||
|
||||
return (u32)max_count;
|
||||
}
|
||||
|
||||
u32 findMaxInfixMatches(const left_id &left, const set<ue2_literal> &lits) {
|
||||
if (left.castle()) {
|
||||
return findMaxInfixMatches(*left.castle(), lits);
|
||||
}
|
||||
if (left.graph()) {
|
||||
return findMaxInfixMatches(*left.graph(), lits);
|
||||
}
|
||||
|
||||
return NO_MATCH_LIMIT;
|
||||
}
|
||||
|
||||
void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable,
|
||||
u8 *cm_count, CharReach *cm_cr) {
|
||||
DEBUG_PRINTF("hello\n");
|
||||
*cm_count = 0;
|
||||
cm_cr->clear();
|
||||
if (!left.graph()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const NGHolder &g = *left.graph();
|
||||
|
||||
auto cyclics = findVerticesInCycles(g);
|
||||
|
||||
if (!proper_out_degree(g.startDs, g)) {
|
||||
cyclics.erase(g.startDs);
|
||||
}
|
||||
|
||||
CharReach cyclic_cr;
|
||||
for (NFAVertex v : cyclics) {
|
||||
DEBUG_PRINTF("considering %u ||=%zu\n", g[v].index,
|
||||
g[v].char_reach.count());
|
||||
cyclic_cr |= g[v].char_reach;
|
||||
}
|
||||
|
||||
if (cyclic_cr.none() || cyclic_cr.all()) {
|
||||
DEBUG_PRINTF("cyclic cr width %zu\n", cyclic_cr.count());
|
||||
return; /* useless */
|
||||
}
|
||||
|
||||
*cm_cr = ~cyclic_cr;
|
||||
|
||||
/* stop character will be part of normal miracles, no need to look for them
|
||||
* here too */
|
||||
assert(stopTable.size() == N_CHARS);
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
if (stopTable[i]) {
|
||||
cm_cr->clear(i);
|
||||
}
|
||||
}
|
||||
|
||||
set<ue2_literal> lits;
|
||||
for (size_t c = cm_cr->find_first(); c != CharReach::npos;
|
||||
c = cm_cr->find_next(c)) {
|
||||
DEBUG_PRINTF("considering %hhx as stop character\n", (u8)c);
|
||||
lits.insert(ue2_literal(c, false));
|
||||
}
|
||||
|
||||
u32 count = findMaxInfixMatches(*left.graph(), lits);
|
||||
DEBUG_PRINTF("counting miracle %u\n", count + 1);
|
||||
if (count && count < 50) {
|
||||
*cm_count = count + 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
52
src/rose/rose_build_infix.h
Normal file
52
src/rose/rose_build_infix.h
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_INFIX_H
|
||||
#define ROSE_BUILD_INFIX_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
struct left_id;
|
||||
struct ue2_literal;
|
||||
|
||||
static constexpr u32 NO_MATCH_LIMIT = 0xffffffff;
|
||||
|
||||
u32 findMaxInfixMatches(const left_id &left, const std::set<ue2_literal> &lits);
|
||||
|
||||
void findCountingMiracleInfo(const left_id &left, const std::vector<u8> &stop,
|
||||
u8 *cm_count, CharReach *cm_cr);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ROSE_BUILD_INFIX_H
|
667
src/rose/rose_build_lookaround.cpp
Normal file
667
src/rose/rose_build_lookaround.cpp
Normal file
@@ -0,0 +1,667 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose compile-time analysis for lookaround masks.
|
||||
*/
|
||||
#include "rose_build_lookaround.h"
|
||||
|
||||
#include "rose_build_impl.h"
|
||||
#include "nfa/castlecompile.h"
|
||||
#include "nfa/goughcompile.h"
|
||||
#include "nfa/rdfa.h"
|
||||
#include "nfagraph/ng_repeat.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "util/container.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <queue>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Max search distance for reachability in front of a role. */
|
||||
static const u32 MAX_FWD_LEN = 64;
|
||||
|
||||
/** \brief Max search distance for reachability behind a role. */
|
||||
static const u32 MAX_BACK_LEN = 64;
|
||||
|
||||
/** \brief Max lookaround entries for a role. */
|
||||
static const u32 MAX_LOOKAROUND_ENTRIES = 16;
|
||||
|
||||
/** \brief We would rather have lookarounds with smaller reach than this. */
|
||||
static const u32 LOOKAROUND_WIDE_REACH = 200;
|
||||
|
||||
static
|
||||
void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
|
||||
ue2::flat_set<NFAVertex> curr, next;
|
||||
|
||||
// Consider only successors of start with the required top.
|
||||
for (const auto &e : out_edges_range(g.start, g)) {
|
||||
NFAVertex v = target(e, g);
|
||||
if (v == g.startDs) {
|
||||
continue;
|
||||
}
|
||||
if (g[e].top == top) {
|
||||
curr.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < MAX_FWD_LEN; i++) {
|
||||
if (curr.empty() || contains(curr, g.accept) ||
|
||||
contains(curr, g.acceptEod)) {
|
||||
break;
|
||||
}
|
||||
|
||||
next.clear();
|
||||
CharReach cr;
|
||||
|
||||
for (auto v : curr) {
|
||||
assert(!is_special(v, g));
|
||||
cr |= g[v].char_reach;
|
||||
insert(&next, adjacent_vertices(v, g));
|
||||
}
|
||||
|
||||
assert(cr.any());
|
||||
look[i] |= cr;
|
||||
curr.swap(next);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
|
||||
map<s32, CharReach> &look) {
|
||||
ue2::flat_set<NFAVertex> curr, next;
|
||||
|
||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||
if (contains(g[v].reports, report)) {
|
||||
curr.insert(v);
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
|
||||
if (curr.empty() || contains(curr, g.start) ||
|
||||
contains(curr, g.startDs)) {
|
||||
break;
|
||||
}
|
||||
|
||||
next.clear();
|
||||
CharReach cr;
|
||||
|
||||
for (auto v : curr) {
|
||||
assert(!is_special(v, g));
|
||||
cr |= g[v].char_reach;
|
||||
insert(&next, inv_adjacent_vertices(v, g));
|
||||
}
|
||||
|
||||
assert(cr.any());
|
||||
look[0 - i] |= cr;
|
||||
curr.swap(next);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void getForwardReach(const CastleProto &castle, u32 top,
|
||||
map<s32, CharReach> &look) {
|
||||
depth len = castle.repeats.at(top).bounds.min;
|
||||
len = min(len, depth(MAX_FWD_LEN));
|
||||
assert(len.is_finite());
|
||||
|
||||
const CharReach &cr = castle.reach();
|
||||
for (u32 i = 0; i < len; i++) {
|
||||
look[i] |= cr;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag,
|
||||
map<s32, CharReach> &look) {
|
||||
depth min_depth = depth::infinity();
|
||||
for (const auto &m : castle.repeats) {
|
||||
const PureRepeat &pr = m.second;
|
||||
if (contains(pr.reports, report)) {
|
||||
min_depth = min(min_depth, pr.bounds.min);
|
||||
}
|
||||
}
|
||||
|
||||
if (!min_depth.is_finite()) {
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
||||
const CharReach &cr = castle.reach();
|
||||
for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN);
|
||||
i++) {
|
||||
look[0 - i] |= cr;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
|
||||
if (rdfa.states.size() < 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
ue2::flat_set<dstate_id_t> curr, next;
|
||||
curr.insert(rdfa.start_anchored);
|
||||
|
||||
for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
|
||||
next.clear();
|
||||
CharReach cr;
|
||||
|
||||
for (const auto state_id : curr) {
|
||||
const dstate &ds = rdfa.states[state_id];
|
||||
|
||||
if (!ds.reports.empty() || !ds.reports_eod.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned c = 0; c < N_CHARS; c++) {
|
||||
dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
|
||||
if (succ != DEAD_STATE) {
|
||||
cr.set(c);
|
||||
next.insert(succ);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(cr.any());
|
||||
look[i] |= cr;
|
||||
curr.swap(next);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void getSuffixForwardReach(const suffix_id &suff, u32 top,
|
||||
map<s32, CharReach> &look) {
|
||||
if (suff.graph()) {
|
||||
getForwardReach(*suff.graph(), top, look);
|
||||
} else if (suff.castle()) {
|
||||
getForwardReach(*suff.castle(), top, look);
|
||||
} else if (suff.dfa()) {
|
||||
assert(top == 0); // DFA isn't multi-top capable.
|
||||
getForwardReach(*suff.dfa(), look);
|
||||
} else if (suff.haig()) {
|
||||
assert(top == 0); // DFA isn't multi-top capable.
|
||||
getForwardReach(*suff.haig(), look);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void getRoseForwardReach(const left_id &left, u32 top,
|
||||
map<s32, CharReach> &look) {
|
||||
if (left.graph()) {
|
||||
getForwardReach(*left.graph(), top, look);
|
||||
} else if (left.castle()) {
|
||||
getForwardReach(*left.castle(), top, look);
|
||||
} else if (left.dfa()) {
|
||||
assert(top == 0); // DFA isn't multi-top capable.
|
||||
getForwardReach(*left.dfa(), look);
|
||||
} else if (left.haig()) {
|
||||
assert(top == 0); // DFA isn't multi-top capable.
|
||||
getForwardReach(*left.haig(), look);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look,
|
||||
map<s32, CharReach> &look) {
|
||||
for (u32 i = 0; i < MAX_FWD_LEN; i++) {
|
||||
for (const auto &rlook : rose_look) {
|
||||
if (contains(rlook, i)) {
|
||||
look[i] |= rlook.at(i);
|
||||
} else {
|
||||
look[i].setall();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void findForwardReach(const RoseGraph &g, const RoseVertex v,
|
||||
map<s32, CharReach> &look) {
|
||||
if (!g[v].reports.empty()) {
|
||||
DEBUG_PRINTF("acceptor\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Non-leaf vertices can pick up a mask per successor prefix rose
|
||||
// engine.
|
||||
vector<map<s32, CharReach>> rose_look;
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
RoseVertex t = target(e, g);
|
||||
if (!g[t].left) {
|
||||
DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].idx);
|
||||
return;
|
||||
}
|
||||
rose_look.push_back(map<s32, CharReach>());
|
||||
getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back());
|
||||
}
|
||||
|
||||
if (g[v].suffix) {
|
||||
DEBUG_PRINTF("suffix engine\n");
|
||||
rose_look.push_back(map<s32, CharReach>());
|
||||
getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back());
|
||||
}
|
||||
|
||||
combineForwardMasks(rose_look, look);
|
||||
}
|
||||
|
||||
static
|
||||
void findBackwardReach(const RoseGraph &g, const RoseVertex v,
|
||||
map<s32, CharReach> &look) {
|
||||
if (!g[v].left) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report,
|
||||
g[v].left.lag);
|
||||
|
||||
if (g[v].left.graph) {
|
||||
getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report,
|
||||
g[v].left.lag, look);
|
||||
} else if (g[v].left.castle) {
|
||||
getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report,
|
||||
g[v].left.lag, look);
|
||||
}
|
||||
|
||||
// TODO: implement DFA variants if necessary.
|
||||
}
|
||||
|
||||
#if defined(DEBUG) || defined(DUMP_SUPPORT)
|
||||
#include <sstream>
|
||||
static UNUSED
|
||||
string dump(const map<s32, CharReach> &look) {
|
||||
ostringstream oss;
|
||||
for (auto it = look.begin(), ite = look.end(); it != ite; ++it) {
|
||||
if (it != look.begin()) {
|
||||
oss << ", ";
|
||||
}
|
||||
oss << "{" << it->first << ": " << describeClass(it->second) << "}";
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
void normalise(map<s32, CharReach> &look) {
|
||||
// We can erase entries where the reach is "all characters".
|
||||
vector<s32> dead;
|
||||
for (const auto &m : look) {
|
||||
if (m.second.all()) {
|
||||
dead.push_back(m.first);
|
||||
}
|
||||
}
|
||||
erase_all(&look, dead);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
struct LookPriority {
|
||||
explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {}
|
||||
|
||||
bool operator()(s32 a, s32 b) const {
|
||||
const CharReach &a_reach = look.at(a);
|
||||
const CharReach &b_reach = look.at(b);
|
||||
if (a_reach.count() != b_reach.count()) {
|
||||
return a_reach.count() < b_reach.count();
|
||||
}
|
||||
return abs(a) < abs(b);
|
||||
}
|
||||
|
||||
private:
|
||||
const map<s32, CharReach> &look;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
|
||||
for (const auto &m : look) {
|
||||
const CharReach &look_cr = m.second;
|
||||
if (!overlaps(look_cr, flood_cr)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
DEBUG_PRINTF("look can't escape flood on %s\n",
|
||||
describeClass(flood_cr).c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool isFloodProne(const map<s32, CharReach> &look,
|
||||
const set<CharReach> &flood_reach) {
|
||||
if (flood_reach.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const CharReach &flood_cr : flood_reach) {
|
||||
if (isFloodProne(look, flood_cr)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
|
||||
if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());
|
||||
|
||||
// First, remove floods that we already can't escape; they shouldn't affect
|
||||
// the analysis below.
|
||||
for (auto it = flood_reach.begin(); it != flood_reach.end();) {
|
||||
if (isFloodProne(look, *it)) {
|
||||
DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
|
||||
describeClass(*it).c_str());
|
||||
flood_reach.erase(it++);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
LookPriority cmp(look);
|
||||
priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
|
||||
for (const auto &m : look) {
|
||||
pq.push(m.first);
|
||||
}
|
||||
|
||||
while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
|
||||
s32 d = pq.top();
|
||||
assert(contains(look, d));
|
||||
const CharReach cr(look[d]); // copy
|
||||
pq.pop();
|
||||
|
||||
DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
|
||||
look.erase(d);
|
||||
|
||||
// If removing this entry would result in us becoming flood_prone on a
|
||||
// particular flood_reach case, reinstate it and move on.
|
||||
if (isFloodProne(look, flood_reach)) {
|
||||
DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
|
||||
describeClass(cr).c_str());
|
||||
look.insert(make_pair(d, cr));
|
||||
}
|
||||
}
|
||||
|
||||
while (!pq.empty()) {
|
||||
s32 d = pq.top();
|
||||
assert(contains(look, d));
|
||||
const CharReach cr(look[d]); // copy
|
||||
pq.pop();
|
||||
|
||||
if (cr.count() < LOOKAROUND_WIDE_REACH) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
|
||||
look.erase(d);
|
||||
|
||||
// If removing this entry would result in us becoming flood_prone on a
|
||||
// particular flood_reach case, reinstate it and move on.
|
||||
if (isFloodProne(look, flood_reach)) {
|
||||
DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
|
||||
describeClass(cr).c_str());
|
||||
look.insert(make_pair(d, cr));
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
|
||||
}
|
||||
|
||||
static
|
||||
void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
|
||||
set<CharReach> &flood_reach) {
|
||||
for (u32 lit_id : tbi.g[v].literals) {
|
||||
const ue2_literal &s = tbi.literals.right.at(lit_id).s;
|
||||
if (s.empty()) {
|
||||
continue;
|
||||
}
|
||||
if (is_flood(s)) {
|
||||
CharReach cr(*s.begin());
|
||||
DEBUG_PRINTF("flood-prone with reach: %s\n",
|
||||
describeClass(cr).c_str());
|
||||
flood_reach.insert(cr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
map<s32, CharReach> findLiteralReach(const RoseBuildImpl &build,
|
||||
const RoseVertex v) {
|
||||
map<s32, CharReach> look;
|
||||
for (u32 lit_id : build.g[v].literals) {
|
||||
const rose_literal_id &lit = build.literals.right.at(lit_id);
|
||||
|
||||
u32 i = lit.delay + 1;
|
||||
for (auto it = lit.s.rbegin(), ite = lit.s.rend(); it != ite; ++it) {
|
||||
look[0 - i] |= *it;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("lit lookaround: %s\n", dump(look).c_str());
|
||||
return look;
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim lookaround checks from the prefix that overlap with the literals
|
||||
* themselves.
|
||||
*/
|
||||
static
|
||||
void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
|
||||
map<s32, CharReach> &look) {
|
||||
DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str());
|
||||
|
||||
for (const auto &m : findLiteralReach(build, v)) {
|
||||
auto it = look.find(m.first);
|
||||
if (it == end(look)) {
|
||||
continue;
|
||||
}
|
||||
if (m.second.isSubsetOf(it->second)) {
|
||||
DEBUG_PRINTF("can trim entry at %d\n", it->first);
|
||||
look.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
|
||||
}
|
||||
|
||||
void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
|
||||
vector<LookEntry> &lookaround) {
|
||||
lookaround.clear();
|
||||
|
||||
const RoseGraph &g = tbi.g;
|
||||
|
||||
map<s32, CharReach> look;
|
||||
findBackwardReach(g, v, look);
|
||||
findForwardReach(g, v, look);
|
||||
trimLiterals(tbi, v, look);
|
||||
|
||||
if (look.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
normalise(look);
|
||||
|
||||
if (look.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
set<CharReach> flood_reach;
|
||||
findFloodReach(tbi, v, flood_reach);
|
||||
reduce(look, flood_reach);
|
||||
|
||||
if (look.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
|
||||
lookaround.reserve(look.size());
|
||||
for (const auto &m : look) {
|
||||
s8 offset = verify_s8(m.first);
|
||||
lookaround.emplace_back(offset, m.second);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool getTransientPrefixReach(const NGHolder &g, u32 lag,
|
||||
map<s32, CharReach> &look) {
|
||||
if (in_degree(g.accept, g) != 1) {
|
||||
DEBUG_PRINTF("more than one accept\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Currently we don't handle anchored prefixes, as we would need to be able
|
||||
// to represent the bounds from the anchor as well.
|
||||
if (out_degree(g.start, g) != 1) {
|
||||
DEBUG_PRINTF("anchored\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (out_degree(g.startDs, g) != 2) {
|
||||
DEBUG_PRINTF("more than one start\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
NFAVertex v = *(inv_adjacent_vertices(g.accept, g).first);
|
||||
u32 i = lag + 1;
|
||||
while (v != g.startDs) {
|
||||
DEBUG_PRINTF("i=%u, v=%u\n", i, g[v].index);
|
||||
if (is_special(v, g)) {
|
||||
DEBUG_PRINTF("special\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
look[0 - i] = g[v].char_reach;
|
||||
|
||||
if (in_degree(v, g) != 1) {
|
||||
DEBUG_PRINTF("branch\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
v = *(inv_adjacent_vertices(v, g).first);
|
||||
i++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("done\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
|
||||
vector<LookEntry> &lookaround) {
|
||||
lookaround.clear();
|
||||
|
||||
const RoseGraph &g = build.g;
|
||||
const left_id leftfix(g[v].left);
|
||||
|
||||
if (!contains(build.transient, leftfix)) {
|
||||
DEBUG_PRINTF("not transient\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!leftfix.graph()) {
|
||||
DEBUG_PRINTF("only supported for graphs so far\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
map<s32, CharReach> look;
|
||||
if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.lag, look)) {
|
||||
DEBUG_PRINTF("not a chain\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
trimLiterals(build, v, look);
|
||||
|
||||
if (look.size() > MAX_LOOKAROUND_ENTRIES) {
|
||||
DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (look.empty()) {
|
||||
DEBUG_PRINTF("lookaround empty; this is weird\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
lookaround.reserve(look.size());
|
||||
for (const auto &m : look) {
|
||||
s8 offset = verify_s8(m.first);
|
||||
lookaround.emplace_back(offset, m.second);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void mergeLookaround(vector<LookEntry> &lookaround,
|
||||
const vector<LookEntry> &more_lookaround) {
|
||||
if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
|
||||
DEBUG_PRINTF("big enough!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Don't merge lookarounds at offsets we already have entries for.
|
||||
ue2::flat_set<s8> offsets;
|
||||
for (const auto &e : lookaround) {
|
||||
offsets.insert(e.offset);
|
||||
}
|
||||
|
||||
map<s32, CharReach> more;
|
||||
LookPriority cmp(more);
|
||||
priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
|
||||
for (const auto &e : more_lookaround) {
|
||||
if (!contains(offsets, e.offset)) {
|
||||
more.emplace(e.offset, e.reach);
|
||||
pq.push(e.offset);
|
||||
}
|
||||
}
|
||||
|
||||
while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
|
||||
const s32 offset = pq.top();
|
||||
pq.pop();
|
||||
const auto &cr = more.at(offset);
|
||||
DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
|
||||
lookaround.emplace_back(verify_s8(offset), cr);
|
||||
}
|
||||
|
||||
// Order by offset.
|
||||
sort(begin(lookaround), end(lookaround),
|
||||
[](const LookEntry &a, const LookEntry &b) {
|
||||
return a.offset < b.offset;
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace ue2
|
82
src/rose/rose_build_lookaround.h
Normal file
82
src/rose/rose_build_lookaround.h
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose compile-time analysis for lookaround masks.
|
||||
*/
|
||||
#ifndef ROSE_ROSE_BUILD_LOOKAROUND_H
|
||||
#define ROSE_ROSE_BUILD_LOOKAROUND_H
|
||||
|
||||
#include "rose_graph.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
class RoseBuildImpl;
|
||||
|
||||
/** \brief Lookaround entry prototype, describing the reachability at a given
|
||||
* distance from the end of a role match. */
|
||||
struct LookEntry {
|
||||
LookEntry(s8 offset_in, const CharReach &reach_in)
|
||||
: offset(offset_in), reach(reach_in) {}
|
||||
s8 offset; //!< offset from role match location.
|
||||
CharReach reach; //!< reachability at given offset.
|
||||
|
||||
bool operator==(const LookEntry &other) const {
|
||||
return offset == other.offset && reach == other.reach;
|
||||
}
|
||||
};
|
||||
|
||||
static inline
|
||||
size_t hash_value(const LookEntry &l) {
|
||||
size_t val = 0;
|
||||
boost::hash_combine(val, l.offset);
|
||||
boost::hash_combine(val, l.reach);
|
||||
return val;
|
||||
}
|
||||
|
||||
void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
|
||||
std::vector<LookEntry> &lookaround);
|
||||
|
||||
/**
|
||||
* \brief If possible, render the prefix of the given vertex as a lookaround.
|
||||
*
|
||||
* Given a prefix, returns true (and fills the lookaround vector) if
|
||||
* it can be satisfied with a lookaround alone.
|
||||
*/
|
||||
bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
|
||||
std::vector<LookEntry> &lookaround);
|
||||
|
||||
void mergeLookaround(std::vector<LookEntry> &lookaround,
|
||||
const std::vector<LookEntry> &more_lookaround);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ROSE_ROSE_BUILD_LOOKAROUND_H
|
2726
src/rose/rose_build_merge.cpp
Normal file
2726
src/rose/rose_build_merge.cpp
Normal file
File diff suppressed because it is too large
Load Diff
73
src/rose/rose_build_merge.h
Normal file
73
src/rose/rose_build_merge.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose Build: functions for reducing the size of the Rose graph
|
||||
* through merging.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_MERGE_H
|
||||
#define ROSE_BUILD_MERGE_H
|
||||
|
||||
#include "rose_graph.h"
|
||||
|
||||
#include <deque>
|
||||
#include <set>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
class RoseBuildImpl;
|
||||
|
||||
void mergeDupeLeaves(RoseBuildImpl &tbi);
|
||||
void uncalcLeaves(RoseBuildImpl &tbi);
|
||||
|
||||
bool dedupeLeftfixes(RoseBuildImpl &tbi);
|
||||
void mergeLeftfixesVariableLag(RoseBuildImpl &tbi);
|
||||
void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi);
|
||||
void dedupeSuffixes(RoseBuildImpl &tbi);
|
||||
|
||||
void mergeAcyclicSuffixes(RoseBuildImpl &tbi);
|
||||
void mergeSmallSuffixes(RoseBuildImpl &tbi);
|
||||
void mergeSmallLeftfixes(RoseBuildImpl &tbi);
|
||||
void mergeCastleLeftfixes(RoseBuildImpl &tbi);
|
||||
void mergeOutfixes(RoseBuildImpl &tbi);
|
||||
void mergePuffixes(RoseBuildImpl &tbi);
|
||||
void mergeCastleSuffixes(RoseBuildImpl &tbi);
|
||||
|
||||
bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
|
||||
RoseVertex v);
|
||||
bool mergeableRoseVertices(const RoseBuildImpl &tbi,
|
||||
const std::set<RoseVertex> &v1,
|
||||
const std::set<RoseVertex> &v2);
|
||||
bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
|
||||
const std::deque<RoseVertex> &verts1);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ROSE_BUILD_MERGE_H
|
1269
src/rose/rose_build_misc.cpp
Normal file
1269
src/rose/rose_build_misc.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1989
src/rose/rose_build_role_aliasing.cpp
Normal file
1989
src/rose/rose_build_role_aliasing.cpp
Normal file
File diff suppressed because it is too large
Load Diff
40
src/rose/rose_build_role_aliasing.h
Normal file
40
src/rose/rose_build_role_aliasing.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_ROLE_ALIASING
|
||||
#define ROSE_BUILD_ROLE_ALIASING
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseBuildImpl;
|
||||
|
||||
void aliasRoles(RoseBuildImpl &build, bool mergeRoses);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
129
src/rose/rose_build_scatter.cpp
Normal file
129
src/rose/rose_build_scatter.cpp
Normal file
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "rose_build_scatter.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/container.h"
|
||||
#include "util/multibit_build.h"
|
||||
|
||||
#include <cstring> // memset
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
template<typename T>
|
||||
static
|
||||
void rebase(vector<T> *p, u32 adj) {
|
||||
for (typename vector<T>::iterator it = p->begin(); it != p->end(); ++it) {
|
||||
DEBUG_PRINTF("=%u+%u\n", it->offset, adj);
|
||||
it->offset += adj;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void rebase(scatter_plan_raw *raw, u32 adj) {
|
||||
rebase(&raw->p_u64a, adj);
|
||||
rebase(&raw->p_u32, adj);
|
||||
rebase(&raw->p_u16, adj);
|
||||
rebase(&raw->p_u8, adj);
|
||||
}
|
||||
|
||||
static
|
||||
void merge_in(scatter_plan_raw *out, const scatter_plan_raw &in) {
|
||||
insert(&out->p_u64a, out->p_u64a.end(), in.p_u64a);
|
||||
insert(&out->p_u32, out->p_u32.end(), in.p_u32);
|
||||
insert(&out->p_u16, out->p_u16.end(), in.p_u16);
|
||||
insert(&out->p_u8, out->p_u8.end(), in.p_u8);
|
||||
}
|
||||
|
||||
void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count,
|
||||
u32 left_array_count, u32 left_prefix_count,
|
||||
const RoseStateOffsets &stateOffsets,
|
||||
bool streaming, u32 leaf_array_count,
|
||||
u32 outfix_begin, u32 outfix_end,
|
||||
scatter_plan_raw *out) {
|
||||
/* init role array */
|
||||
scatter_plan_raw spr_role;
|
||||
mmbBuildClearPlan(role_state_count, &spr_role);
|
||||
rebase(&spr_role, role_state_offset);
|
||||
merge_in(out, spr_role);
|
||||
|
||||
/* init rose array: turn on prefixes */
|
||||
u32 rose_array_offset = stateOffsets.activeLeftArray;
|
||||
scatter_plan_raw spr_rose;
|
||||
mmbBuildInitRangePlan(left_array_count, 0, left_prefix_count, &spr_rose);
|
||||
rebase(&spr_rose, rose_array_offset);
|
||||
merge_in(out, spr_rose);
|
||||
|
||||
/* suffix/outfix array */
|
||||
scatter_plan_raw spr_leaf;
|
||||
if (streaming) {
|
||||
mmbBuildInitRangePlan(leaf_array_count, outfix_begin, outfix_end,
|
||||
&spr_leaf);
|
||||
} else {
|
||||
mmbBuildClearPlan(leaf_array_count, &spr_leaf);
|
||||
}
|
||||
rebase(&spr_leaf, stateOffsets.activeLeafArray);
|
||||
merge_in(out, spr_leaf);
|
||||
}
|
||||
|
||||
u32 aux_size(const scatter_plan_raw &raw) {
|
||||
u32 rv = 0;
|
||||
|
||||
rv += byte_length(raw.p_u64a);
|
||||
rv += byte_length(raw.p_u32);
|
||||
rv += byte_length(raw.p_u16);
|
||||
rv += byte_length(raw.p_u8);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
void write_out(scatter_full_plan *plan_out, void *aux_out,
|
||||
const scatter_plan_raw &raw, u32 aux_base_offset) {
|
||||
memset(plan_out, 0, sizeof(*plan_out));
|
||||
|
||||
#define DO_CASE(t) \
|
||||
if (!raw.p_##t.empty()) { \
|
||||
plan_out->s_##t##_offset = aux_base_offset; \
|
||||
plan_out->s_##t##_count = raw.p_##t.size(); \
|
||||
assert(ISALIGNED_N((char *)aux_out + aux_base_offset, \
|
||||
alignof(scatter_unit_##t))); \
|
||||
memcpy((char *)aux_out + aux_base_offset, raw.p_##t.data(), \
|
||||
byte_length(raw.p_##t)); \
|
||||
aux_base_offset += byte_length(raw.p_##t); \
|
||||
}
|
||||
|
||||
DO_CASE(u64a);
|
||||
DO_CASE(u32);
|
||||
DO_CASE(u16);
|
||||
DO_CASE(u8);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
62
src/rose/rose_build_scatter.h
Normal file
62
src/rose/rose_build_scatter.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_SCATTER_H
|
||||
#define ROSE_BUILD_SCATTER_H
|
||||
|
||||
#include "rose_internal.h"
|
||||
#include "util/scatter.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseBuildImpl;
|
||||
|
||||
struct scatter_plan_raw {
|
||||
std::vector<scatter_unit_u64a> p_u64a;
|
||||
std::vector<scatter_unit_u32> p_u32;
|
||||
std::vector<scatter_unit_u16> p_u16;
|
||||
std::vector<scatter_unit_u8> p_u8;
|
||||
};
|
||||
|
||||
void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count,
|
||||
u32 left_array_count, u32 left_prefix_count,
|
||||
const RoseStateOffsets &stateOffsets,
|
||||
bool streaming, u32 leaf_array_count,
|
||||
u32 outfix_begin, u32 outfix_end,
|
||||
scatter_plan_raw *out);
|
||||
|
||||
u32 aux_size(const scatter_plan_raw &raw);
|
||||
|
||||
void write_out(scatter_full_plan *plan_out, void *aux_out,
|
||||
const scatter_plan_raw &raw, u32 aux_base_offset);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
99
src/rose/rose_build_util.h
Normal file
99
src/rose/rose_build_util.h
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_UTIL_H
|
||||
#define ROSE_BUILD_UTIL_H
|
||||
|
||||
#include "rose_graph.h"
|
||||
#include "util/graph.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
// Calculate the minimum depth for the given set of vertices, ignoring those
|
||||
// with depth 1.
|
||||
template<class Cont>
|
||||
static
|
||||
u8 calcMinDepth(const std::map<RoseVertex, u32> &depths, const Cont &verts) {
|
||||
u8 d = 255;
|
||||
for (RoseVertex v : verts) {
|
||||
u8 vdepth = (u8)std::min((u32)255, depths.at(v));
|
||||
if (vdepth > 1) {
|
||||
d = std::min(d, vdepth);
|
||||
}
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
// Comparator for vertices using their index property.
|
||||
struct VertexIndexComp {
|
||||
VertexIndexComp(const RoseGraph &gg) : g(gg) {}
|
||||
|
||||
bool operator()(const RoseVertex &a, const RoseVertex &b) const {
|
||||
const RoseVertexProps &pa = g[a];
|
||||
const RoseVertexProps &pb = g[b];
|
||||
|
||||
if (pa.idx < pb.idx) {
|
||||
return true;
|
||||
}
|
||||
if (pa.idx > pb.idx) {
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(a == b); // All vertex indices should be distinct.
|
||||
return a < b;
|
||||
}
|
||||
|
||||
const RoseGraph &g;
|
||||
};
|
||||
|
||||
// Vertex set type, ordered by index. Construct with a graph reference.
|
||||
typedef std::set<RoseVertex, VertexIndexComp> RoseVertexSet;
|
||||
|
||||
/**
|
||||
* \brief Add two Rose depths together, coping correctly with infinity at
|
||||
* ROSE_BOUND_INF.
|
||||
*/
|
||||
static inline
|
||||
u32 add_rose_depth(u32 a, u32 b) {
|
||||
assert(a <= ROSE_BOUND_INF);
|
||||
assert(b <= ROSE_BOUND_INF);
|
||||
|
||||
if (a == ROSE_BOUND_INF || b == ROSE_BOUND_INF) {
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
|
||||
u32 rv = a + b;
|
||||
assert(rv >= a && rv >= b);
|
||||
return rv;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ROSE_BUILD_UTIL_H
|
252
src/rose/rose_build_width.cpp
Normal file
252
src/rose/rose_build_width.cpp
Normal file
@@ -0,0 +1,252 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "rose_build_width.h"
|
||||
|
||||
#include "nfagraph/ng_holder.h"
|
||||
#include "nfagraph/ng_dump.h"
|
||||
#include "nfagraph/ng_width.h"
|
||||
#include "rose_build_impl.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool is_end_anchored(const RoseGraph &g, RoseVertex v) {
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (g[w].eod_accept) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
|
||||
if (table != ROSE_FLOATING && table != ROSE_ANCHORED &&
|
||||
table != ROSE_EOD_ANCHORED) {
|
||||
/* handle other tables if ever required */
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const RoseGraph &g = tbi.g;
|
||||
|
||||
vector<RoseVertex> table_verts;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (tbi.hasLiteralInTable(v, table)) {
|
||||
table_verts.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
set<RoseVertex> reachable;
|
||||
find_reachable(g, table_verts, &reachable);
|
||||
|
||||
u32 minWidth = ROSE_BOUND_INF;
|
||||
for (auto v : reachable) {
|
||||
if (g[v].eod_accept) {
|
||||
DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].idx);
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 w = g[v].min_offset;
|
||||
|
||||
if (!g[v].reports.empty()) {
|
||||
DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].idx, w);
|
||||
minWidth = min(minWidth, w);
|
||||
}
|
||||
|
||||
if (is_end_anchored(g, v)) {
|
||||
DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].idx, w);
|
||||
minWidth = min(minWidth, w);
|
||||
}
|
||||
|
||||
if (g[v].suffix) {
|
||||
depth suffix_width = findMinWidth(g[v].suffix);
|
||||
assert(suffix_width.is_reachable());
|
||||
DEBUG_PRINTF("%zu has suffix (width %s), can fire report at %u\n",
|
||||
g[v].idx, suffix_width.str().c_str(),
|
||||
w + suffix_width);
|
||||
minWidth = min(minWidth, w + suffix_width);
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: take into account the chain relationship between the mpv and other
|
||||
* engines */
|
||||
DEBUG_PRINTF("min width %u\n", minWidth);
|
||||
return minWidth;
|
||||
}
|
||||
|
||||
u32 findMaxBAWidth(const RoseBuildImpl &tbi) {
|
||||
const RoseGraph &g = tbi.g;
|
||||
if (!isLeafNode(tbi.root, g)) {
|
||||
DEBUG_PRINTF("floating literal -> no max width\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
|
||||
u64a maxWidth = 0;
|
||||
|
||||
for (const auto &outfix : tbi.outfixes) {
|
||||
maxWidth = max(maxWidth, (u64a)outfix.maxBAWidth);
|
||||
if (maxWidth >= ROSE_BOUND_INF) {
|
||||
DEBUG_PRINTF("outfix with no max ba width\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
}
|
||||
|
||||
// Everyone's anchored, so the max width can be taken from the max
|
||||
// max_offset on our vertices (so long as all accepts are EOD).
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (!g[v].reports.empty() && !g[v].eod_accept) {
|
||||
DEBUG_PRINTF("accept not at eod\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
|
||||
if (g[v].reports.empty() && !g[v].suffix) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(g[v].eod_accept || g[v].suffix);
|
||||
|
||||
u64a w = g[v].max_offset;
|
||||
|
||||
if (g[v].suffix) {
|
||||
if (has_non_eod_accepts(g[v].suffix)) {
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
depth suffix_width = findMaxWidth(g[v].suffix);
|
||||
DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str());
|
||||
assert(suffix_width.is_reachable());
|
||||
if (!suffix_width.is_finite()) {
|
||||
DEBUG_PRINTF("suffix too wide\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
|
||||
w += suffix_width;
|
||||
}
|
||||
|
||||
maxWidth = max(maxWidth, w);
|
||||
if (maxWidth >= ROSE_BOUND_INF) {
|
||||
DEBUG_PRINTF("too wide\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("max ba width %llu\n", maxWidth);
|
||||
assert(maxWidth < ROSE_BOUND_INF);
|
||||
return maxWidth;
|
||||
}
|
||||
|
||||
u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
|
||||
const RoseGraph &g = tbi.g;
|
||||
if (!isLeafNode(tbi.root, g) && table == ROSE_FLOATING) {
|
||||
DEBUG_PRINTF("floating literal -> no max width\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
|
||||
if (table != ROSE_FLOATING && table != ROSE_ANCHORED) {
|
||||
/* handle other tables if ever required */
|
||||
assert(0);
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("looking for a max ba width for %s\n",
|
||||
table == ROSE_FLOATING ? "floating" : "anchored");
|
||||
|
||||
vector<RoseVertex> table_verts;
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if ((table == ROSE_FLOATING && tbi.isFloating(v))
|
||||
|| (table == ROSE_ANCHORED && tbi.isAnchored(v))) {
|
||||
table_verts.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
set<RoseVertex> reachable;
|
||||
find_reachable(g, table_verts, &reachable);
|
||||
|
||||
u64a maxWidth = 0;
|
||||
// Everyone's anchored, so the max width can be taken from the max
|
||||
// max_offset on our vertices (so long as all accepts are ACCEPT_EOD).
|
||||
for (auto v : reachable) {
|
||||
DEBUG_PRINTF("inspecting vert %zu\n", g[v].idx);
|
||||
|
||||
if (g[v].eod_accept) {
|
||||
DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].idx);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!g[v].reports.empty()) {
|
||||
DEBUG_PRINTF("accept not at eod\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
|
||||
u64a w = g[v].max_offset;
|
||||
|
||||
u64a follow_max = tbi.calcSuccMaxBound(v); /* may have a long bound to
|
||||
accept_eod node */
|
||||
|
||||
if (g[v].suffix) {
|
||||
if (has_non_eod_accepts(g[v].suffix)) {
|
||||
DEBUG_PRINTF("has accept\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
depth suffix_width = findMaxWidth(g[v].suffix);
|
||||
DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str());
|
||||
assert(suffix_width.is_reachable());
|
||||
if (!suffix_width.is_finite()) {
|
||||
DEBUG_PRINTF("suffix too wide\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
follow_max = max(follow_max, (u64a)suffix_width);
|
||||
}
|
||||
|
||||
w += follow_max;
|
||||
|
||||
DEBUG_PRINTF("w %llu\n", w);
|
||||
|
||||
maxWidth = max(maxWidth, w);
|
||||
if (maxWidth >= ROSE_BOUND_INF) {
|
||||
DEBUG_PRINTF("too wide\n");
|
||||
return ROSE_BOUND_INF;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("max ba width %llu\n", maxWidth);
|
||||
assert(maxWidth < ROSE_BOUND_INF);
|
||||
return maxWidth;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
66
src/rose/rose_build_width.h
Normal file
66
src/rose/rose_build_width.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_BUILD_WIDTH_H
|
||||
#define ROSE_BUILD_WIDTH_H
|
||||
|
||||
#include "rose_build_impl.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class RoseBuildImpl;
|
||||
|
||||
/* returns a lower bound on the minimum number of bytes required for match to be
|
||||
* raised up to the user which requires the given literal table to be used
|
||||
*
|
||||
* returns ROSE_BOUND_INF if the table can never produce matches */
|
||||
u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
|
||||
|
||||
/* returns an upper bound on the maximum length of a buffer that can result in
|
||||
* matches. If there are any patterns which are not bianchored (start and end
|
||||
* anchored), then there is no such limit and ROSE_BOUND_INF is returned.
|
||||
*/
|
||||
u32 findMaxBAWidth(const RoseBuildImpl &tbi);
|
||||
|
||||
/* returns an upper bound on the maximum length of a buffer that can result in
|
||||
* matches and requires that the given table to be used. If there are any
|
||||
* patterns which are not bianchored (start and end anchored), then there is no
|
||||
* such limit and ROSE_BOUND_INF is returned.
|
||||
*/
|
||||
u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
|
||||
|
||||
/**
|
||||
* Note: there is no function for determining the min width of the whole rose
|
||||
* as this is more easily done by the NG layer which has access to the full
|
||||
* nfagraphs before they are chopped into little pieces.
|
||||
*/
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
44
src/rose/rose_common.h
Normal file
44
src/rose/rose_common.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_COMMON_H
|
||||
#define ROSE_COMMON_H
|
||||
|
||||
// Common defs available to build-time clients as well as runtime.
|
||||
|
||||
#define ROSE_BOUND_INF (~0U)
|
||||
#define MAX_MASK2_WIDTH 32
|
||||
|
||||
// Max block width to use the combined small-block matcher on, instead of
|
||||
// running the floating and anchored tables.
|
||||
#define ROSE_SMALL_BLOCK_LEN 32
|
||||
|
||||
/** \brief Length in bytes of a reach bitvector, used by the lookaround code. */
|
||||
#define REACH_BITVECTOR_LEN 32
|
||||
|
||||
#endif // ROSE_COMMON_H
|
1034
src/rose/rose_dump.cpp
Normal file
1034
src/rose/rose_dump.cpp
Normal file
File diff suppressed because it is too large
Load Diff
50
src/rose/rose_dump.h
Normal file
50
src/rose/rose_dump.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_DUMP_H
|
||||
#define ROSE_DUMP_H
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
|
||||
struct RoseEngine;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void roseDumpText(const RoseEngine *t, FILE *f);
|
||||
void roseDumpInternals(const RoseEngine *t, const std::string &base);
|
||||
void roseDumpComponents(const RoseEngine *t, bool dump_raw,
|
||||
const std::string &base);
|
||||
void roseDumpStructRaw(const RoseEngine *t, FILE *f);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
#endif
|
228
src/rose/rose_graph.h
Normal file
228
src/rose/rose_graph.h
Normal file
@@ -0,0 +1,228 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief BGL graph structures used internally by the Rose build process.
|
||||
*
|
||||
* BGL graph structures used internally by the build-time portion of Rose. The
|
||||
* graph used for input is in rose_in_graph.h since it's part of the RoseBuild
|
||||
* external API.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_GRAPH_H
|
||||
#define ROSE_GRAPH_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "rose_build.h"
|
||||
#include "rose_internal.h" /* role history, etc */
|
||||
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
|
||||
#include "util/charreach.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <boost/graph/adjacency_list.hpp>
|
||||
#include <boost/graph/graph_traits.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CastleProto;
|
||||
struct raw_dfa;
|
||||
struct raw_som_dfa;
|
||||
|
||||
/** \brief Table type for a literal. */
|
||||
enum rose_literal_table {
|
||||
ROSE_ANCHORED, //!< literals anchored to start
|
||||
ROSE_FLOATING, //!< general floating literals
|
||||
ROSE_EOD_ANCHORED, //!< literals that match near EOD
|
||||
ROSE_ANCHORED_SMALL_BLOCK, //!< anchored literals for small block table
|
||||
ROSE_EVENT //!< "literal-like" events, such as EOD
|
||||
};
|
||||
|
||||
#include "util/order_check.h"
|
||||
|
||||
/** \brief Provides information about the (pre|in)fix engine to the left of a
|
||||
* role. */
|
||||
struct LeftEngInfo {
|
||||
std::shared_ptr<NGHolder> graph;
|
||||
std::shared_ptr<CastleProto> castle;
|
||||
std::shared_ptr<raw_dfa> dfa;
|
||||
std::shared_ptr<raw_som_dfa> haig;
|
||||
u32 lag = 0U;
|
||||
ReportID leftfix_report = MO_INVALID_IDX;
|
||||
depth dfa_min_width = 0;
|
||||
depth dfa_max_width = depth::infinity();
|
||||
|
||||
bool operator==(const LeftEngInfo &other) const {
|
||||
return other.graph == graph
|
||||
&& other.castle == castle
|
||||
&& other.dfa == dfa
|
||||
&& other.haig == haig
|
||||
&& other.lag == lag
|
||||
&& other.leftfix_report == leftfix_report;
|
||||
}
|
||||
bool operator!=(const LeftEngInfo &other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
bool operator<(const LeftEngInfo &b) const {
|
||||
const LeftEngInfo &a = *this;
|
||||
ORDER_CHECK(graph);
|
||||
ORDER_CHECK(castle);
|
||||
ORDER_CHECK(dfa);
|
||||
ORDER_CHECK(haig);
|
||||
ORDER_CHECK(lag);
|
||||
ORDER_CHECK(leftfix_report);
|
||||
return false;
|
||||
}
|
||||
void reset(void);
|
||||
operator bool() const;
|
||||
bool tracksSom() const { return !!haig; }
|
||||
};
|
||||
|
||||
/** \brief Provides information about the suffix engine to the right of a
|
||||
* role. */
|
||||
struct RoseSuffixInfo {
|
||||
u32 top = 0;
|
||||
std::shared_ptr<NGHolder> graph; /* if triggers a trailing nfa */
|
||||
std::shared_ptr<CastleProto> castle;
|
||||
std::shared_ptr<raw_som_dfa> haig;
|
||||
std::shared_ptr<raw_dfa> rdfa;
|
||||
depth dfa_min_width = 0;
|
||||
depth dfa_max_width = depth::infinity();
|
||||
|
||||
bool operator==(const RoseSuffixInfo &b) const;
|
||||
bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); }
|
||||
bool operator<(const RoseSuffixInfo &b) const;
|
||||
void reset(void);
|
||||
operator bool() const { return graph || castle || haig || rdfa; }
|
||||
};
|
||||
|
||||
/** \brief Properties attached to each Rose graph vertex. */
|
||||
struct RoseVertexProps {
|
||||
/** \brief Unique dense vertex index. Used for BGL algorithms. */
|
||||
size_t idx = ~size_t{0};
|
||||
|
||||
/** \brief IDs of literals in the Rose literal map. */
|
||||
flat_set<u32> literals;
|
||||
|
||||
/**
|
||||
* \brief If true, this vertex is a virtual vertex for firing reports at
|
||||
* EOD. These vertices must have reports and have no associated literals.
|
||||
*/
|
||||
bool eod_accept = false;
|
||||
|
||||
/** \brief Report IDs to fire. */
|
||||
flat_set<ReportID> reports;
|
||||
|
||||
/** \brief Role ID for this vertex. These are what end up in the bytecode. */
|
||||
u32 role = ~u32{0};
|
||||
|
||||
/** \brief Bitmask of groups that this role sets. */
|
||||
rose_group groups = 0;
|
||||
|
||||
/** \brief Characters that escape and squash this role. */
|
||||
CharReach escapes;
|
||||
|
||||
/** \brief Minimum role (end of literal) offset depth in bytes. */
|
||||
u32 min_offset = ~u32{0};
|
||||
|
||||
/** \brief Maximum role (end of literal) offset depth in bytes */
|
||||
u32 max_offset = 0;
|
||||
|
||||
/** \brief SOM for the role is offset from end match offset */
|
||||
u32 som_adjust = 0;
|
||||
|
||||
/** \brief Prefix/infix engine to the left of this role. */
|
||||
LeftEngInfo left;
|
||||
|
||||
/**
|
||||
* \brief Suffix engine to the right of this role.
|
||||
*
|
||||
* Note: information about triggered infixes is associated with the left of
|
||||
* the destination role.
|
||||
*/
|
||||
RoseSuffixInfo suffix;
|
||||
|
||||
bool isBoring(void) const;
|
||||
bool fixedOffset(void) const;
|
||||
};
|
||||
|
||||
/** \brief Properties attached to each Rose graph edge. */
|
||||
/* bounds are distance from end of prev to start of the next */
|
||||
struct RoseEdgeProps {
|
||||
/**
|
||||
* \brief Minimum distance from the end of the source role's match to the
|
||||
* start of the target role's match.
|
||||
*
|
||||
* Not used when the target has a left engine (as the engine represents
|
||||
* bounds).
|
||||
*/
|
||||
u32 minBound = 0;
|
||||
|
||||
/**
|
||||
* \brief Maximum distance from the end of the source role's match to the
|
||||
* start of the target role's match.
|
||||
*
|
||||
* Not used when the target has a left engine (as the engine represents
|
||||
* bounds).
|
||||
*/
|
||||
u32 maxBound = 0;
|
||||
|
||||
/** \brief Which top to trigger on the target role's left engine. */
|
||||
u32 rose_top = 0;
|
||||
|
||||
/** \brief True if the rose_top can clear all other previous tops. */
|
||||
u8 rose_cancel_prev_top = false;
|
||||
|
||||
/** \brief History required by this edge. */
|
||||
RoseRoleHistory history = ROSE_ROLE_HISTORY_INVALID;
|
||||
};
|
||||
|
||||
bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b);
|
||||
|
||||
/**
|
||||
* \brief Core Rose graph structure.
|
||||
*
|
||||
* Note that we use the list selector for the edge and vertex lists: we depend
|
||||
* on insertion order for determinism, so we must use these containers.
|
||||
*/
|
||||
using RoseGraph = boost::adjacency_list<boost::listS, // out edge list per vertex
|
||||
boost::listS, // vertex list
|
||||
boost::bidirectionalS, // bidirectional
|
||||
RoseVertexProps, // bundled vertex properties
|
||||
RoseEdgeProps, // bundled edge properties
|
||||
boost::listS // graph edge list
|
||||
>;
|
||||
|
||||
using RoseVertex = RoseGraph::vertex_descriptor;
|
||||
using RoseEdge = RoseGraph::edge_descriptor;
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ROSE_GRAPH_H
|
132
src/rose/rose_in_dump.cpp
Normal file
132
src/rose/rose_in_dump.cpp
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "rose_in_dump.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfagraph/ng_dump.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey,
|
||||
const char *filename) {
|
||||
if (!grey.dumpFlags) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!filename) {
|
||||
filename = "pre_rose.dot";
|
||||
}
|
||||
DEBUG_PRINTF("dumping rose graphs\n");
|
||||
FILE *f = fopen((grey.dumpPath + filename).c_str(), "w");
|
||||
fprintf(f, "digraph NFA {\n");
|
||||
fprintf(f, "rankdir=LR;\n");
|
||||
fprintf(f, "size=\"11.5,8\"\n");
|
||||
fprintf(f, "node [ shape = circle ];\n");
|
||||
|
||||
u32 next_id = 0;
|
||||
map<RoseInVertex, u32> i_map;
|
||||
for (auto v : vertices_range(ig)) {
|
||||
u32 id = next_id++;
|
||||
i_map[v] = id;
|
||||
const RoseInVertexProps &vp = ig[v];
|
||||
fprintf(f, "%u [ width = 1, fontsize = 12, label = \"%u:", id, id);
|
||||
switch(vp.type) {
|
||||
case RIV_LITERAL:
|
||||
fprintf(f, "%s", dotEscapeString(dumpString(vp.s)).c_str());
|
||||
break;
|
||||
case RIV_START:
|
||||
fprintf(f, "[START]");
|
||||
break;
|
||||
case RIV_ANCHORED_START:
|
||||
fprintf(f, "[ANCHOR]");
|
||||
break;
|
||||
case RIV_ACCEPT:
|
||||
if (!vp.reports.empty()) {
|
||||
fprintf(f, "[ACCEPT %s]", as_string_list(vp.reports).c_str());
|
||||
} else {
|
||||
fprintf(f, "[ACCEPT]");
|
||||
}
|
||||
break;
|
||||
case RIV_ACCEPT_EOD:
|
||||
fprintf(f, "[EOD %s]", as_string_list(vp.reports).c_str());
|
||||
break;
|
||||
}
|
||||
fprintf(f, "\" ]; \n");
|
||||
}
|
||||
|
||||
map<NGHolder *, size_t> graph_ids;
|
||||
|
||||
for (const auto &e : edges_range(ig)) {
|
||||
u32 u = i_map[source(e, ig)];
|
||||
u32 v = i_map[target(e, ig)];
|
||||
fprintf(f, "%u -> %u [label=\"", u, v);
|
||||
if (ig[e].graph) {
|
||||
if (!contains(graph_ids, &*ig[e].graph)) {
|
||||
size_t id = graph_ids.size();
|
||||
graph_ids[&*ig[e].graph] = id;
|
||||
}
|
||||
fprintf(f, "graph %zu", graph_ids[&*ig[e].graph]);
|
||||
}
|
||||
if (ig[e].haig) {
|
||||
fprintf(f, "haig ");
|
||||
}
|
||||
fprintf(f, "\"]\n");
|
||||
}
|
||||
|
||||
for (const auto &e : graph_ids) {
|
||||
NGHolder *h = e.first;
|
||||
size_t id = e.second;
|
||||
|
||||
ostringstream name;
|
||||
name << grey.dumpPath << "pre_rose_" << id << ".dot";
|
||||
dumpGraph(name.str().c_str(), h->g);
|
||||
assert(allMatchStatesHaveReports(*h));
|
||||
}
|
||||
|
||||
fprintf(f, "}\n");
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
}
|
49
src/rose/rose_in_dump.h
Normal file
49
src/rose/rose_in_dump.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_IN_DUMP_H
|
||||
#define ROSE_IN_DUMP_H
|
||||
|
||||
#include "rose_in_graph.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct Grey;
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey,
|
||||
const char *filename = nullptr);
|
||||
#else
|
||||
static UNUSED
|
||||
void dumpPreRoseGraph(const RoseInGraph &, const Grey &,
|
||||
const char * = nullptr) { }
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
187
src/rose/rose_in_graph.h
Normal file
187
src/rose/rose_in_graph.h
Normal file
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose Input Graph: Used for ng_rose -> rose_build_add communication.
|
||||
*
|
||||
* The input graph MUST be a DAG.
|
||||
* There MUST be exactly 1 START or ANCHORED_START vertex.
|
||||
* The edges MUST be of the form START->LITERAL, LITERAL->LITERAL,
|
||||
* LITERAL->ACCEPT or LITERAL->ACCEPT_EOD.
|
||||
* Every non START/ANCHORED_START vertex MUST have an in-edge.
|
||||
* Every non ACCEPT/ACCEPT_EOD vertex MUST have an out-edge.
|
||||
*
|
||||
* Edges are either a graph or have bounds associated with them.
|
||||
* Graphs on edges to accepts use their internal report ids.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_IN_GRAPH_H
|
||||
#define ROSE_IN_GRAPH_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "rose/rose_common.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/ue2string.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <boost/graph/graph_traits.hpp>
|
||||
#include <boost/graph/adjacency_list.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class NGHolder;
|
||||
struct raw_som_dfa;
|
||||
|
||||
enum RoseInVertexType {
|
||||
RIV_LITERAL,
|
||||
RIV_START,
|
||||
RIV_ANCHORED_START,
|
||||
RIV_ACCEPT,
|
||||
RIV_ACCEPT_EOD
|
||||
};
|
||||
|
||||
struct RoseInVertexProps {
|
||||
RoseInVertexProps()
|
||||
: type(RIV_LITERAL), delay(0), min_offset(0),
|
||||
max_offset(ROSE_BOUND_INF) {}
|
||||
|
||||
private:
|
||||
template <class ReportContainer>
|
||||
RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
|
||||
const ReportContainer &reports_in, u32 min_offset_in,
|
||||
u32 max_offset_in)
|
||||
: type(type_in), s(s_in), delay(0),
|
||||
reports(begin(reports_in), end(reports_in)),
|
||||
min_offset(min_offset_in), max_offset(max_offset_in) {}
|
||||
|
||||
// Constructor for a vertex with no reports.
|
||||
RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
|
||||
u32 min_offset_in, u32 max_offset_in)
|
||||
: type(type_in), s(s_in), delay(0), min_offset(min_offset_in),
|
||||
max_offset(max_offset_in) {}
|
||||
|
||||
public:
|
||||
static RoseInVertexProps makeLiteral(const ue2_literal &lit) {
|
||||
DEBUG_PRINTF("making literal %s\n", dumpString(lit).c_str());
|
||||
return RoseInVertexProps(RIV_LITERAL, lit, 0, ROSE_BOUND_INF);
|
||||
}
|
||||
|
||||
template <class ReportContainer>
|
||||
static RoseInVertexProps makeAccept(const ReportContainer &rep) {
|
||||
DEBUG_PRINTF("making accept for %zu reports\n", rep.size());
|
||||
return RoseInVertexProps(RIV_ACCEPT, ue2_literal(), rep, 0,
|
||||
ROSE_BOUND_INF);
|
||||
}
|
||||
|
||||
template <class ReportContainer>
|
||||
static RoseInVertexProps makeAcceptEod(const ReportContainer &rep) {
|
||||
DEBUG_PRINTF("making accept-eod for %zu reports\n", rep.size());
|
||||
return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), rep, 0,
|
||||
ROSE_BOUND_INF);
|
||||
}
|
||||
|
||||
static RoseInVertexProps makeStart(bool anchored) {
|
||||
DEBUG_PRINTF("making %s\n", anchored ? "anchored start" : "start");
|
||||
if (anchored) {
|
||||
return RoseInVertexProps(RIV_ANCHORED_START, ue2_literal(), 0, 0);
|
||||
} else {
|
||||
return RoseInVertexProps(RIV_START, ue2_literal(), 0,
|
||||
ROSE_BOUND_INF);
|
||||
}
|
||||
}
|
||||
|
||||
RoseInVertexType type; /* polymorphic vertices are probably a bad idea */
|
||||
ue2_literal s; /**< for RIV_LITERAL */
|
||||
u32 delay; /**< for RIV_LITERAL, delay applied to literal. */
|
||||
flat_set<ReportID> reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */
|
||||
u32 min_offset; /**< Minimum offset at which this vertex can match. */
|
||||
u32 max_offset; /**< Maximum offset at which this vertex can match. */
|
||||
};
|
||||
|
||||
struct RoseInEdgeProps {
|
||||
RoseInEdgeProps()
|
||||
: minBound(0), maxBound(0), graph(), haig(), graph_lag(0) {}
|
||||
|
||||
RoseInEdgeProps(u32 min_in, u32 max_in)
|
||||
: minBound(min_in), maxBound(max_in), graph(), graph_lag(0) {
|
||||
assert(minBound <= maxBound);
|
||||
assert(minBound != ROSE_BOUND_INF);
|
||||
}
|
||||
|
||||
/* haig rosefixes (prefix/infix) require their corresponding holders */
|
||||
RoseInEdgeProps(std::shared_ptr<NGHolder> g, std::shared_ptr<raw_som_dfa> h,
|
||||
u32 lag)
|
||||
: minBound(0), maxBound(ROSE_BOUND_INF), graph(g), haig(h),
|
||||
graph_lag(lag) {
|
||||
assert(graph);
|
||||
assert(haig);
|
||||
}
|
||||
|
||||
/* haig suffixes do not require their corresponding holders */
|
||||
explicit RoseInEdgeProps(std::shared_ptr<raw_som_dfa> h)
|
||||
: minBound(0), maxBound(ROSE_BOUND_INF), haig(h), graph_lag(0) {
|
||||
assert(haig);
|
||||
}
|
||||
|
||||
RoseInEdgeProps(std::shared_ptr<NGHolder> g, u32 lag)
|
||||
: minBound(0), maxBound(ROSE_BOUND_INF), graph(g), graph_lag(lag) {
|
||||
assert(graph);
|
||||
}
|
||||
|
||||
/** \brief Minimum bound on 'dot' repeat between literals. ie pred end ->
|
||||
* succ begin. */
|
||||
u32 minBound;
|
||||
|
||||
/** \brief Maximum bound on 'dot' repeat between literals. */
|
||||
u32 maxBound;
|
||||
|
||||
/** \brief Prefix graph. Graph is end to (end - lag). */
|
||||
std::shared_ptr<NGHolder> graph;
|
||||
|
||||
/** \brief Haig version of graph, if required. */
|
||||
std::shared_ptr<raw_som_dfa> haig;
|
||||
|
||||
u32 graph_lag;
|
||||
|
||||
/** \brief Escape characters, can be used instead of graph.
|
||||
*
|
||||
* currently must not intersect with succ literal and must be a literal -
|
||||
* literal edge, TODO: handle */
|
||||
CharReach escapes;
|
||||
};
|
||||
|
||||
typedef boost::adjacency_list<boost::listS, boost::listS, boost::bidirectionalS,
|
||||
RoseInVertexProps,
|
||||
RoseInEdgeProps> RoseInGraph;
|
||||
typedef RoseInGraph::vertex_descriptor RoseInVertex;
|
||||
typedef RoseInGraph::edge_descriptor RoseInEdge;
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
267
src/rose/rose_in_util.cpp
Normal file
267
src/rose/rose_in_util.cpp
Normal file
@@ -0,0 +1,267 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "rose_in_util.h"
|
||||
|
||||
#include "rose_build_util.h"
|
||||
#include "nfa/goughcompile.h"
|
||||
#include "nfagraph/ng_depth.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "nfagraph/ng_width.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <boost/graph/copy.hpp>
|
||||
#include <boost/graph/reverse_graph.hpp>
|
||||
#include <boost/graph/topological_sort.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void populateIndexMap(const RoseInGraph &in,
|
||||
map<RoseInVertex, size_t> *index_map) {
|
||||
size_t i = 0;
|
||||
for (auto v : vertices_range(in)) {
|
||||
(*index_map)[v] = i++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns a topological ordering of the vertices in g. That is the starts are
|
||||
* at the front and all the predecessors of a vertex occur earlier in the list
|
||||
* than the vertex. */
|
||||
vector<RoseInVertex> topo_order(const RoseInGraph &g) {
|
||||
map<RoseInVertex, size_t> index_map;
|
||||
populateIndexMap(g, &index_map);
|
||||
|
||||
vector<RoseInVertex> v_order;
|
||||
v_order.reserve(index_map.size());
|
||||
|
||||
topological_sort(g, back_inserter(v_order),
|
||||
vertex_index_map(boost::make_assoc_property_map(index_map)));
|
||||
|
||||
reverse(v_order.begin(), v_order.end()); /* put starts at the front */
|
||||
|
||||
return v_order;
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct RoseEdgeCopier {
|
||||
typedef unordered_map<const NGHolder *, shared_ptr<NGHolder>> GraphMap;
|
||||
typedef unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> HaigMap;
|
||||
|
||||
RoseEdgeCopier(const RoseInGraph &g1, RoseInGraph &g2,
|
||||
const GraphMap &graph_map_in, const HaigMap &haig_map_in)
|
||||
: ig(g1), out(g2), graph_map(graph_map_in), haig_map(haig_map_in) {}
|
||||
|
||||
void operator()(const RoseInEdge &e1, RoseInEdge &e2) {
|
||||
// Clone all properties.
|
||||
put(boost::edge_all, out, e2, get(boost::edge_all, ig, e1));
|
||||
// Substitute in cloned graphs.
|
||||
if (ig[e1].graph) {
|
||||
out[e2].graph = graph_map.at(ig[e1].graph.get());
|
||||
}
|
||||
if (ig[e1].haig) {
|
||||
out[e2].haig = haig_map.at(ig[e1].haig.get());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const RoseInGraph &ig;
|
||||
RoseInGraph &out;
|
||||
const GraphMap &graph_map;
|
||||
const HaigMap &haig_map;
|
||||
};
|
||||
}
|
||||
|
||||
unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) {
|
||||
unique_ptr<RoseInGraph> out = make_unique<RoseInGraph>();
|
||||
|
||||
unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map;
|
||||
unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map;
|
||||
|
||||
for (const auto &e : edges_range(ig)) {
|
||||
const RoseInEdgeProps &ep = ig[e];
|
||||
if (ep.graph && !contains(graph_map, ep.graph.get())) {
|
||||
graph_map[ep.graph.get()] = cloneHolder(*ep.graph);
|
||||
}
|
||||
if (ep.haig && !contains(haig_map, ep.haig.get())) {
|
||||
haig_map[ep.haig.get()] = make_shared<raw_som_dfa>(*ep.haig);
|
||||
}
|
||||
}
|
||||
|
||||
map<RoseInVertex, size_t> index_map;
|
||||
populateIndexMap(ig, &index_map);
|
||||
|
||||
copy_graph(ig, *out,
|
||||
boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map))
|
||||
.vertex_index_map(boost::make_assoc_property_map(index_map)));
|
||||
return out;
|
||||
}
|
||||
|
||||
void calcVertexOffsets(RoseInGraph &g) {
|
||||
vector<RoseInVertex> v_order = topo_order(g);
|
||||
|
||||
for (RoseInVertex v : v_order) {
|
||||
if (g[v].type == RIV_START) {
|
||||
g[v].min_offset = 0;
|
||||
g[v].max_offset = ROSE_BOUND_INF;
|
||||
continue;
|
||||
} else if (g[v].type == RIV_ANCHORED_START) {
|
||||
g[v].min_offset = 0;
|
||||
g[v].max_offset = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("vertex '%s'\n", dumpString(g[v].s).c_str());
|
||||
|
||||
// Min and max predecessor depths.
|
||||
u32 min_d = ROSE_BOUND_INF;
|
||||
u32 max_d = 0;
|
||||
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
RoseInVertex u = source(e, g);
|
||||
u32 e_min = g[u].min_offset;
|
||||
u32 e_max = g[u].max_offset;
|
||||
|
||||
DEBUG_PRINTF("in-edge from u with offsets [%u,%u]\n", e_min, e_max);
|
||||
|
||||
if (g[e].graph) {
|
||||
const NGHolder &h = *g[e].graph;
|
||||
depth g_min_width = findMinWidth(h);
|
||||
depth g_max_width =
|
||||
isAnchored(h) ? findMaxWidth(h) : depth::infinity();
|
||||
u32 graph_lag = g[e].graph_lag;
|
||||
|
||||
DEBUG_PRINTF("edge has graph, depths [%s,%s] and lag %u\n",
|
||||
g_min_width.str().c_str(),
|
||||
g_max_width.str().c_str(), graph_lag);
|
||||
g_min_width += graph_lag;
|
||||
g_max_width += graph_lag;
|
||||
e_min = add_rose_depth(e_min, g_min_width);
|
||||
if (g_max_width.is_finite()) {
|
||||
e_max = add_rose_depth(e_max, g_max_width);
|
||||
} else {
|
||||
e_max = ROSE_BOUND_INF;
|
||||
}
|
||||
} else {
|
||||
DEBUG_PRINTF("edge has bounds [%u,%u]\n", g[e].minBound,
|
||||
g[e].maxBound);
|
||||
e_min = add_rose_depth(e_min, g[e].minBound);
|
||||
e_max = add_rose_depth(e_max, g[e].maxBound);
|
||||
if (g[v].type == RIV_LITERAL) {
|
||||
u32 len = g[v].s.length();
|
||||
DEBUG_PRINTF("lit len %u\n", len);
|
||||
e_min = add_rose_depth(e_min, len);
|
||||
e_max = add_rose_depth(e_max, len);
|
||||
}
|
||||
}
|
||||
|
||||
min_d = min(min_d, e_min);
|
||||
max_d = max(max_d, e_max);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("vertex depths [%u,%u]\n", min_d, max_d);
|
||||
|
||||
assert(max_d >= min_d);
|
||||
g[v].min_offset = min_d;
|
||||
g[v].max_offset = max_d;
|
||||
}
|
||||
|
||||
// It's possible that we may have literal delays assigned to vertices here
|
||||
// as well. If so, these need to be added to the min/max offsets.
|
||||
for (RoseInVertex v : v_order) {
|
||||
const u32 delay = g[v].delay;
|
||||
g[v].min_offset = add_rose_depth(g[v].min_offset, delay);
|
||||
g[v].max_offset = add_rose_depth(g[v].max_offset, delay);
|
||||
}
|
||||
}
|
||||
|
||||
nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e) {
|
||||
RoseInVertex u = source(e, in);
|
||||
RoseInVertex v = target(e, in);
|
||||
|
||||
bool start = in[u].type == RIV_START || in[u].type == RIV_ANCHORED_START;
|
||||
bool end = in[v].type == RIV_ACCEPT || in[v].type == RIV_ACCEPT_EOD;
|
||||
|
||||
if (start && !end) {
|
||||
return NFA_PREFIX;
|
||||
} else if (!start && end) {
|
||||
return NFA_SUFFIX;
|
||||
} else if (!start && !end) {
|
||||
return NFA_INFIX;
|
||||
} else {
|
||||
assert(in[v].type == RIV_ACCEPT_EOD);
|
||||
return NFA_OUTFIX;
|
||||
}
|
||||
}
|
||||
|
||||
void pruneUseless(RoseInGraph &g) {
|
||||
DEBUG_PRINTF("pruning useless vertices\n");
|
||||
|
||||
set<RoseInVertex> dead;
|
||||
RoseInVertex dummy_start
|
||||
= add_vertex(RoseInVertexProps::makeStart(true), g);
|
||||
RoseInVertex dummy_end
|
||||
= add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g);
|
||||
dead.insert(dummy_start);
|
||||
dead.insert(dummy_end);
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (v == dummy_start || v == dummy_end) {
|
||||
continue;
|
||||
}
|
||||
switch (g[v].type) {
|
||||
case RIV_ANCHORED_START:
|
||||
case RIV_START:
|
||||
add_edge(dummy_start, v, g);
|
||||
break;
|
||||
case RIV_ACCEPT:
|
||||
case RIV_ACCEPT_EOD:
|
||||
add_edge(v, dummy_end, g);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
find_unreachable(g, vector<RoseInVertex>(1, dummy_start), &dead);
|
||||
find_unreachable(boost::reverse_graph<RoseInGraph, RoseInGraph &>(g),
|
||||
vector<RoseInVertex>(1, dummy_end), &dead);
|
||||
|
||||
for (auto v : dead) {
|
||||
clear_vertex(v, g);
|
||||
remove_vertex(v, g);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
51
src/rose/rose_in_util.h
Normal file
51
src/rose/rose_in_util.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_IN_UTIL_H
|
||||
#define ROSE_IN_UTIL_H
|
||||
|
||||
#include "rose_in_graph.h"
|
||||
#include "nfa/nfa_kind.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/* Returns a topological ordering of the vertices in g. That is the starts are
|
||||
* at the front and all the predecessors of a vertex occur earlier in the list
|
||||
* than the vertex. */
|
||||
std::vector<RoseInVertex> topo_order(const RoseInGraph &g);
|
||||
|
||||
std::unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig);
|
||||
void calcVertexOffsets(RoseInGraph &ig);
|
||||
enum nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e);
|
||||
void pruneUseless(RoseInGraph &g);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
831
src/rose/rose_internal.h
Normal file
831
src/rose/rose_internal.h
Normal file
@@ -0,0 +1,831 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Rose data structures.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_INTERNAL_H
|
||||
#define ROSE_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "rose_common.h"
|
||||
#include "util/scatter.h"
|
||||
|
||||
#define ROSE_OFFSET_INVALID 0xffffffff
|
||||
|
||||
// Group constants
|
||||
typedef u64a rose_group;
|
||||
|
||||
// Delayed literal stuff
|
||||
#define DELAY_BITS 5
|
||||
#define DELAY_SLOT_COUNT (1U << DELAY_BITS)
|
||||
#define MAX_DELAY (DELAY_SLOT_COUNT - 1)
|
||||
#define DELAY_MASK (DELAY_SLOT_COUNT - 1)
|
||||
|
||||
#define DELAY_FLOAT_DIRTY (1U << 7) /* delay literal matched in history */
|
||||
|
||||
// Direct report stuff
|
||||
#define LITERAL_DR_FLAG (1U << 31)
|
||||
#define LITERAL_MDR_FLAG ((1U << 30) | (1U << 31))
|
||||
|
||||
/** \brief True if literal is either a direct report or a multi-direct report.
|
||||
* */
|
||||
static really_inline
|
||||
u32 isLiteralDR(u32 id) {
|
||||
return id & LITERAL_DR_FLAG;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 isLiteralMDR(u32 id) {
|
||||
return (id & LITERAL_MDR_FLAG) == LITERAL_MDR_FLAG;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
ReportID literalToReport(u32 id) {
|
||||
assert(id & LITERAL_DR_FLAG);
|
||||
assert(!(id & (LITERAL_MDR_FLAG ^ LITERAL_DR_FLAG)));
|
||||
return id & ~LITERAL_DR_FLAG;
|
||||
}
|
||||
|
||||
// Structure representing a literal. Each literal may have many roles.
|
||||
struct RoseLiteral {
|
||||
u32 rootRoleOffset; /**< If rootRoleCount == 1, this is an offset relative
|
||||
* to the rose engine to the root role associated with
|
||||
* the literal.
|
||||
* If rootRoleCount > 1, this is the first index into
|
||||
* the rootRoleTable indicating the root roles.
|
||||
*/
|
||||
u32 rootRoleCount; // number of root roles
|
||||
u32 iterOffset; // offset of sparse iterator, relative to rose
|
||||
u32 iterMapOffset; // offset of the iter mapping table, relative to rose
|
||||
rose_group groups; // bitset of groups that cause this literal to fire.
|
||||
u8 minDepth; // the minimum of this literal's roles' depths (for depths > 1)
|
||||
u8 squashesGroup; /**< literal switches off its group behind it if it sets a
|
||||
* role */
|
||||
u8 requires_side; // need to catch up sidecar for this literal
|
||||
u32 delay_mask; /**< bit set indicates that the literal inserts a delayed
|
||||
* match at the given offset */
|
||||
u32 delayIdsOffset; // offset to array of ids to poke in the delay structure
|
||||
};
|
||||
|
||||
/* properties for sidecar entries, yay */
|
||||
struct RoseSide {
|
||||
u32 squashIterOffset; // offset of the squash sparse iterator, rose relative
|
||||
rose_group squashGroupMask; // squash literal squash masks
|
||||
};
|
||||
|
||||
/* Allocation of Rose literal ids
|
||||
*
|
||||
* The rose literal id space is segmented:
|
||||
*
|
||||
* ---- 0
|
||||
* | | Normal undelayed literals in the e, or f tables which require a
|
||||
* | | manual benefits confirm on match [a table never requires benefits]
|
||||
* | |
|
||||
* ---- nonbenefits_base_id
|
||||
* | | 'Normal' undelayed literals in either e or f tables
|
||||
* | |
|
||||
* | |
|
||||
* | |
|
||||
* ---- anchored_base_id
|
||||
* | | literals from the a table
|
||||
* | |
|
||||
* ---- delay_base_id
|
||||
* | | Delayed version of normal literals
|
||||
* | |
|
||||
* ---- literalCount
|
||||
* ...
|
||||
* ...
|
||||
* ...
|
||||
* ---- LITERAL_DR_FLAG
|
||||
* | | Direct Report literals: immediately raise an internal report with id
|
||||
* | | given by (lit_id & ~LITERAL_DR_FLAG). Raised by a or f tables (or e??).
|
||||
* | | No RoseLiteral structure
|
||||
* | |
|
||||
* | |
|
||||
* ----
|
||||
*
|
||||
* Note: sidecar 'literals' are in a complete separate space
|
||||
*/
|
||||
|
||||
/* Rose Literal Sources
|
||||
*
|
||||
* Rose currently gets events (mainly roseProcessMatch calls) from 8 sources:
|
||||
* 1) The floating table
|
||||
* 2) The anchored table
|
||||
* 3) Delayed literals
|
||||
* 4) Sidecar literal matcher
|
||||
* 5) suffixes NFAs
|
||||
* 6) masksv2 (literals with benefits)
|
||||
* 7) End anchored table
|
||||
* 8) prefix / infix nfas
|
||||
*
|
||||
* Care is required to ensure that events appear to come into Rose in order
|
||||
* (or sufficiently ordered for Rose to cope). Generally the progress of the
|
||||
* floating table is considered the canonical position in the buffer.
|
||||
*
|
||||
* Anchored table:
|
||||
* The anchored table is run before the floating table as nothing in it can
|
||||
* depend on a floating literal. Order is achieved by two measures:
|
||||
* a) user matches^1 are logged and held until the floating matcher passes that
|
||||
* point;
|
||||
* b) any floating role with an anchored predecessor has a history relationship
|
||||
* to enforce the ordering.
|
||||
*
|
||||
* Delayed literals:
|
||||
* Delayed literal ordering is handled by delivering any pending delayed
|
||||
* literals before processing any floating match.
|
||||
*
|
||||
* Sidecar:
|
||||
* The sidecar matcher is unique in that it does not return match
|
||||
* location information. Sidecar literals are escapes between two normal
|
||||
* roles. The sidecar matcher is caught up to the floating matcher
|
||||
* before any possible predecessor role, any possible successor role, and
|
||||
* at stream boundaries^3.
|
||||
*
|
||||
* Suffix:
|
||||
* Suffixes are always pure terminal roles. Prior to raising a match^2, pending
|
||||
* NFA queues are run to the current point (floating or delayed literal) as
|
||||
* appropriate.
|
||||
*
|
||||
* Maskv2:
|
||||
* These are triggered from either floating literals or delayed literals and
|
||||
* inspect the data behind them. Matches are raised at the same location as the
|
||||
* trigger literal so there are no ordering issues. Masks are always pure
|
||||
* terminal roles.
|
||||
*
|
||||
* Lookaround:
|
||||
* These are tests run on receipt of a role that "look around" the match,
|
||||
* checking characters at nearby offsets against reachability masks. Each role
|
||||
* can have a list of these lookaround offset/reach pairs, ordered in offset
|
||||
* order, and any failure will prevent the role from being switched on. Offsets
|
||||
* are relative to the byte after a literal match, and can be negative.
|
||||
*
|
||||
* Prefix / Infix:
|
||||
* TODO: remember / discuss
|
||||
*
|
||||
* End anchored table:
|
||||
* All user matches occur at the last byte. We do this last, so no problems
|
||||
* (yippee)
|
||||
*
|
||||
* ^1 User matches which occur before any possible match from the other tables
|
||||
* are not delayed.
|
||||
* ^2 Queues may also be run to the current location if a queue is full and
|
||||
* needs to be emptied.
|
||||
* ^3 There is no need to catch up at the end of a block scan as it contains no
|
||||
* terminals.
|
||||
*/
|
||||
|
||||
// We have different types of role history storage.
|
||||
enum RoseRoleHistory {
|
||||
ROSE_ROLE_HISTORY_NONE, // I'm sorry, I don't recall.
|
||||
ROSE_ROLE_HISTORY_ANCH, // used when previous role is at a fixed offset
|
||||
ROSE_ROLE_HISTORY_LAST_BYTE, /* used when previous role can only match at the
|
||||
* last byte of a stream */
|
||||
ROSE_ROLE_HISTORY_INVALID // history not yet assigned
|
||||
};
|
||||
|
||||
struct RoseCountingMiracle {
|
||||
char shufti; /** 1: count shufti class; 0: count a single character */
|
||||
u8 count; /** minimum number of occurrences for the counting
|
||||
* miracle char to kill the leftfix. */
|
||||
u8 c; /** character to look for if not shufti */
|
||||
u8 poison; /** character not in the shufti mask */
|
||||
m128 lo; /** shufti lo mask */
|
||||
m128 hi; /** shufti hi mask */
|
||||
};
|
||||
|
||||
struct LeftNfaInfo {
|
||||
u32 maxQueueLen;
|
||||
u32 maxLag; // maximum of successor roles' lag
|
||||
u32 lagIndex; // iff lag != 0, index into leftfixLagTable
|
||||
u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID
|
||||
u8 transient; /**< 0 if not transient, else max width of transient prefix */
|
||||
char infix; /* TODO: make flags */
|
||||
char eod_check; /**< nfa is used by the event eod literal */
|
||||
u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */
|
||||
rose_group squash_mask; /* & mask applied when rose nfa dies */
|
||||
};
|
||||
|
||||
// A list of these is used to trigger prefix/infix roses.
|
||||
struct RoseTrigger {
|
||||
u32 queue; // queue index of leftfix
|
||||
u32 event; // from enum mqe_event
|
||||
u8 cancel_prev_top;
|
||||
};
|
||||
|
||||
struct NfaInfo {
|
||||
u32 nfaOffset;
|
||||
u32 stateOffset;
|
||||
u32 fullStateOffset; /* offset in scratch, relative to ??? */
|
||||
u32 ekeyListOffset; /* suffix, relative to base of rose, 0 if no ekeys */
|
||||
u8 no_retrigger; /* TODO */
|
||||
u8 only_external; /**< does not raise any som internal events or chained
|
||||
* rose events */
|
||||
u8 in_sbmatcher; /**< this outfix should not be run in small-block
|
||||
* execution, as it will be handled by the sbmatcher
|
||||
* HWLM table. */
|
||||
u8 eod; /* suffix is triggered by the etable --> can only produce eod
|
||||
* matches */
|
||||
};
|
||||
|
||||
#define ROSE_ROLE_FLAG_ANCHOR_TABLE (1U << 0) /**< role is triggered from
|
||||
* anchored table */
|
||||
#define ROSE_ROLE_FLAG_ACCEPT_EOD (1U << 2) /**< "fake" role, fires callback
|
||||
* at EOD */
|
||||
#define ROSE_ROLE_FLAG_ONLY_AT_END (1U << 3) /**< role can only be switched on
|
||||
* at end of block */
|
||||
#define ROSE_ROLE_FLAG_PRED_OF_EOD (1U << 4) /**< eod is a successor literal
|
||||
* of the role */
|
||||
#define ROSE_ROLE_FLAG_EOD_TABLE (1U << 5) /**< role is triggered from eod
|
||||
* table */
|
||||
#define ROSE_ROLE_FLAG_ROSE (1U << 6) /**< rose style prefix nfa for
|
||||
* role */
|
||||
#define ROSE_ROLE_FLAG_SOM_REPORT (1U << 7) /**< report id is only used to
|
||||
* manipulate som */
|
||||
#define ROSE_ROLE_FLAG_REPORT_START (1U << 8) /**< som som som som */
|
||||
#define ROSE_ROLE_FLAG_CHAIN_REPORT (1U << 9) /**< report id is only used to
|
||||
* start an outfix engine */
|
||||
#define ROSE_ROLE_FLAG_SOM_ADJUST (1U << 10) /**< som value to use is offset
|
||||
* from match end location */
|
||||
#define ROSE_ROLE_FLAG_SOM_ROSEFIX (1U << 11) /**< som value to use is provided
|
||||
* by prefix/infix */
|
||||
|
||||
/* We allow different types of role-predecessor relationships. These are stored
|
||||
* in with the flags */
|
||||
#define ROSE_ROLE_PRED_NONE (1U << 20) /**< the only pred is the root,
|
||||
* [0, inf] bounds */
|
||||
#define ROSE_ROLE_PRED_SIMPLE (1U << 21) /**< single [0,inf] pred, no
|
||||
* offset tracking */
|
||||
#define ROSE_ROLE_PRED_ROOT (1U << 22) /**< pred is root or anchored
|
||||
* root, and we have bounds */
|
||||
#define ROSE_ROLE_PRED_ANY (1U << 23) /**< any of our preds can match */
|
||||
|
||||
#define ROSE_ROLE_PRED_CLEAR_MASK (~(ROSE_ROLE_PRED_NONE \
|
||||
| ROSE_ROLE_PRED_SIMPLE \
|
||||
| ROSE_ROLE_PRED_ROOT \
|
||||
| ROSE_ROLE_PRED_ANY))
|
||||
|
||||
#define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one
|
||||
* whole byte (OWB) (streaming only). Other
|
||||
* values in OWB are reserved for zombie
|
||||
* status */
|
||||
#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
|
||||
* prefix checks */
|
||||
|
||||
// Structure representing a literal role.
|
||||
struct RoseRole {
|
||||
u32 flags;
|
||||
u32 predOffset; // either offset of pred sparse iterator, or
|
||||
// (for ROSE_ROLE_PRED_ROOT) index of single RosePred.
|
||||
rose_group groups; /**< groups to enable when role is set (groups of succ
|
||||
* literals) */
|
||||
ReportID reportId; // report ID, or MO_INVALID_IDX
|
||||
u32 stateIndex; /**< index into state multibit, or MMB_INVALID. Roles do not
|
||||
* require a state bit if they are terminal */
|
||||
u32 suffixEvent; // from enum mqe_event
|
||||
u8 depth; /**< depth of this vertex from root in the tree, or 255 if greater.
|
||||
*/
|
||||
u32 suffixOffset; /**< suffix nfa: 0 if no suffix associated with the role,
|
||||
* relative to base of the rose. */
|
||||
ReportID leftfixReport; // (pre|in)fix report to check, or MO_INVALID_IDX.
|
||||
u32 leftfixLag; /**< distance behind match where we need to check the
|
||||
* leftfix engine status */
|
||||
u32 leftfixQueue; /**< queue index of the prefix/infix before role */
|
||||
u32 infixTriggerOffset; /* offset to list of infix roses to trigger */
|
||||
u32 sidecarEnableOffset; /**< offset to list of sidecar literals to enable
|
||||
*/
|
||||
u32 somAdjust; /**< som for the role is offset from end match offset */
|
||||
|
||||
u32 lookaroundIndex; /**< index of lookaround offset/reach in table, or
|
||||
* MO_INVALID_IDX. */
|
||||
u32 lookaroundCount; /**< number of lookaround entries. */
|
||||
};
|
||||
|
||||
// Structure representing a predecessor relationship
|
||||
struct RosePred {
|
||||
u32 role; // index of predecessor role
|
||||
u32 minBound; // min bound on distance from pred (_ANCH ->absolute offset)
|
||||
u32 maxBound; /* max bound on distance from pred, or ROSE_BOUND_INF
|
||||
* (_ANCH -> absolute offset ) */
|
||||
u8 historyCheck; // from enum RoseRoleHistory
|
||||
};
|
||||
|
||||
// Structure mapping between the dense index produced by the literal sparse
|
||||
// iterator and a list of roles.
|
||||
struct RoseIterMapping {
|
||||
u32 offset; // offset into iter role table
|
||||
u32 count; // number of roles
|
||||
};
|
||||
|
||||
struct RoseIterRole {
|
||||
u32 role;
|
||||
u32 pred;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Rose state offsets.
|
||||
*
|
||||
* Stores pre-calculated offsets (in bytes) to MOST of the state structures
|
||||
* used by Rose, relative to the start of stream state.
|
||||
*
|
||||
* State not covered by this structure includes:
|
||||
*
|
||||
* -# the RoseRuntimeState structure
|
||||
* -# the role state multibit
|
||||
*/
|
||||
struct RoseStateOffsets {
|
||||
/** History buffer.
|
||||
*
|
||||
* First byte is an 8-bit count of the number of valid history bytes
|
||||
* available, followed by the history itself. Max size of history is
|
||||
* RoseEngine::historyRequired. */
|
||||
u32 history;
|
||||
|
||||
/** Exhausted bitvector.
|
||||
*
|
||||
* 1 bit per exhaustible key (used by Highlander mode). If a bit is set,
|
||||
* reports with that ekey should not be delivered to the user. */
|
||||
u32 exhausted;
|
||||
|
||||
/** Sidecar state. */
|
||||
u32 sidecar;
|
||||
|
||||
/** Size of sidecar state, in bytes. */
|
||||
u32 sidecar_size;
|
||||
|
||||
/** Multibit for active suffix/outfix engines. */
|
||||
u32 activeLeafArray;
|
||||
|
||||
/** Multibit for active Rose (prefix/infix) engines. */
|
||||
u32 activeLeftArray;
|
||||
|
||||
/** Size of the active Rose array multibit, in bytes. */
|
||||
u32 activeLeftArray_size;
|
||||
|
||||
/** Table of lag information (stored as one byte per engine) for active
|
||||
* Rose leftfix engines. */
|
||||
u32 leftfixLagTable;
|
||||
|
||||
/** State for anchored matchers (McClellan DFAs). */
|
||||
u32 anchorState;
|
||||
|
||||
/** Packed Rose groups value. */
|
||||
u32 groups;
|
||||
|
||||
/** Size of packed Rose groups value, in bytes. */
|
||||
u32 groups_size;
|
||||
|
||||
/** State for floating literal matcher (managed by HWLM). */
|
||||
u32 floatingMatcherState;
|
||||
|
||||
/** Packed SOM location slots. */
|
||||
u32 somLocation;
|
||||
|
||||
/** Multibit guarding SOM location slots. */
|
||||
u32 somValid;
|
||||
|
||||
/** Multibit guarding SOM location slots. */
|
||||
u32 somWritable;
|
||||
|
||||
/** Total size of Rose state, in bytes. */
|
||||
u32 end;
|
||||
};
|
||||
|
||||
struct RoseBoundaryReports {
|
||||
u32 reportEodOffset; /**< 0 if no reports lits, otherwise offset of
|
||||
* MO_INVALID_IDX terminated list to report at EOD */
|
||||
u32 reportZeroOffset; /**< 0 if no reports lits, otherwise offset of
|
||||
* MO_INVALID_IDX terminated list to report at offset
|
||||
* 0 */
|
||||
u32 reportZeroEodOffset; /**< 0 if no reports lits, otherwise offset of
|
||||
* MO_INVALID_IDX terminated list to report if eod
|
||||
* is at offset 0. Superset of other lists. */
|
||||
};
|
||||
|
||||
/* NFA Queue Assignment
|
||||
*
|
||||
* --- 0
|
||||
* (|) chained mpv (if present)
|
||||
* #
|
||||
* --- outfixBeginQueue -
|
||||
* | outfixes. enabled at offset 0.
|
||||
* |
|
||||
* #
|
||||
* --- outfixEndQueue -
|
||||
* | suffixes. enabled by rose roles.
|
||||
* |
|
||||
* #
|
||||
* --- leftfixBeginQueue -
|
||||
* | prefixes
|
||||
* |
|
||||
* #
|
||||
* --- ?
|
||||
* | infixes
|
||||
* |
|
||||
* #
|
||||
*/
|
||||
|
||||
#define ROSE_RUNTIME_FULL_ROSE 0
|
||||
#define ROSE_RUNTIME_PURE_LITERAL 1
|
||||
#define ROSE_RUNTIME_SINGLE_OUTFIX 2
|
||||
|
||||
// Runtime structure header for Rose.
|
||||
// In memory, we follow this with:
|
||||
// 1a. anchored 'literal' matcher table
|
||||
// 1b. floating literal matcher table
|
||||
// 1c. sidecar 'literal' matcher table
|
||||
// 1d. eod-anchored literal matcher table
|
||||
// 1e. small block table
|
||||
// 2. array of RoseLiteral (literalCount entries)
|
||||
// 3. array of RoseRole (roleCount entries)
|
||||
// 4. array of RosePred (predCount entries)
|
||||
// 8. array of NFA offsets, one per queue
|
||||
// 9. array of state offsets, one per queue (+)
|
||||
// 10. array of role ids for the set of all root roles
|
||||
// 12. multi-direct report array
|
||||
/*
|
||||
* (+) stateOffset array note: Offsets in the array are either into the stream
|
||||
* state (normal case) or into the tstate region of scratch (for transient rose
|
||||
* nfas). Rose nfa info table can distinguish the cases.
|
||||
*/
|
||||
struct RoseEngine {
|
||||
u8 hasFloatingDirectReports; // has at least one floating direct report literal
|
||||
u8 noFloatingRoots; /* only need to run the anchored table if something
|
||||
* matched in the anchored table */
|
||||
u8 requiresEodCheck; /* stuff happens at eod time */
|
||||
u8 requiresEodSideCatchup; /* we need to do a sidecar catchup before eod
|
||||
* checks */
|
||||
u8 hasEodEventLiteral; // fires a ROSE_EVENT literal at eod time.
|
||||
u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
|
||||
in small block scans. */
|
||||
u8 runtimeImpl; /**< can we just run the floating table or a single outfix?
|
||||
* or do we need a full rose? */
|
||||
u8 mpvTriggeredByLeaf; /**< need to check (suf|out)fixes for mpv trigger */
|
||||
u8 canExhaust; /**< every pattern has an exhaustion key */
|
||||
u8 hasSom; /**< has at least one pattern which tracks SOM. */
|
||||
u8 somHorizon; /**< width in bytes of SOM offset storage (governed by
|
||||
SOM precision) */
|
||||
u8 simpleCallback; /**< has only external reports with no bounds checks,
|
||||
plus no exhaustion keys */
|
||||
u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */
|
||||
u32 historyRequired; /**< max amount of history required for streaming */
|
||||
u32 ekeyCount; /**< number of exhaustion keys */
|
||||
u32 dkeyCount; /**< number of dedupe keys */
|
||||
u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
|
||||
* report ids */
|
||||
u32 somLocationCount; /**< number of som locations required */
|
||||
u32 rolesWithStateCount; // number of roles with entries in state bitset
|
||||
u32 stateSize; /* size of the state bitset
|
||||
* WARNING: not the size of the rose state */
|
||||
u32 anchorStateSize; /* size of the state for the anchor dfas */
|
||||
u32 nfaStateSize; /* total size of the state for the mask/rose nfas */
|
||||
u32 tStateSize; /* total size of the state for transient rose nfas */
|
||||
u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch;
|
||||
* used for sizing scratch only. */
|
||||
u32 smallWriteOffset; /**< offset of small-write matcher */
|
||||
u32 amatcherOffset; // offset of the anchored literal matcher (bytes)
|
||||
u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
|
||||
u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
|
||||
u32 smatcherOffset; // offset of the sidecar literal matcher (bytes)
|
||||
u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
|
||||
u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
|
||||
* involved with the anchored table to produce a full
|
||||
* match. */
|
||||
u32 fmatcherMinWidth; /**< minimum number of bytes required for a pattern
|
||||
* involved with the floating table to produce a full
|
||||
* match. */
|
||||
u32 eodmatcherMinWidth; /**< minimum number of bytes required for a pattern
|
||||
* involved with the eod table to produce a full
|
||||
* match. */
|
||||
u32 amatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
|
||||
* produce a match for a pattern involved
|
||||
* with the anchored table. */
|
||||
u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
|
||||
* produce a match for a pattern involved
|
||||
* with the anchored table. */
|
||||
u32 intReportOffset; /**< offset of array of internal_report structures */
|
||||
u32 intReportCount; /**< number of internal_report structures */
|
||||
u32 literalOffset; // offset of RoseLiteral array (bytes)
|
||||
u32 literalCount; // number of RoseLiteral entries [NOT number of literals]
|
||||
u32 sideOffset; /**< offset of RoseSide array (bytes), indexed by
|
||||
*sidecar ids */
|
||||
u32 sideCount; /**< number of RoseSide entries */
|
||||
u32 multidirectOffset; /**< offset of multi-direct report list. */
|
||||
u32 activeArrayCount; //number of nfas tracked in the active array
|
||||
u32 activeLeftCount; //number of nfas tracked in the active rose array
|
||||
u32 queueCount; /**< number of nfa queues */
|
||||
u32 roleOffset; // offset of RoseRole array (bytes)
|
||||
u32 roleCount; // number of RoseRole entries
|
||||
u32 predOffset; // offset of RosePred array (bytes)
|
||||
u32 predCount; // number of RosePred entries
|
||||
u32 rootRoleOffset;
|
||||
u32 rootRoleCount;
|
||||
|
||||
u32 leftOffset;
|
||||
u32 roseCount;
|
||||
u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values)
|
||||
u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32
|
||||
* bytes each) */
|
||||
|
||||
u32 eodIterOffset; // or 0 if no eod iterator
|
||||
u32 eodIterMapOffset;
|
||||
|
||||
u32 lastByteHistoryIterOffset; // if non-zero
|
||||
|
||||
/** \brief Minimum number of bytes required to match. */
|
||||
u32 minWidth;
|
||||
|
||||
/** \brief Minimum number of bytes required to match, excluding boundary
|
||||
* reports. */
|
||||
u32 minWidthExcludingBoundaries;
|
||||
|
||||
u32 maxBiAnchoredWidth; /* ROSE_BOUND_INF if any non bianchored patterns
|
||||
* present */
|
||||
u32 anchoredDistance; // region to run the anchored table over
|
||||
u32 anchoredMinDistance; /* start of region to run anchored table over */
|
||||
u32 floatingDistance; /* end of region to run the floating table over
|
||||
ROSE_BOUND_INF if not bounded */
|
||||
u32 floatingMinDistance; /* start of region to run floating table over */
|
||||
u32 smallBlockDistance; /* end of region to run the floating table over
|
||||
ROSE_BOUND_INF if not bounded */
|
||||
u32 maxSafeAnchoredDROffset; /* the maximum offset that we can safely raise
|
||||
* a direct report from the anchored table
|
||||
* without delaying it */
|
||||
u32 floatingMinLiteralMatchOffset; /* the minimum offset that we can get a
|
||||
* 'valid' match from the floating
|
||||
* table */
|
||||
u32 nfaInfoOffset; /* offset to the nfa info offset array */
|
||||
u32 anchoredReportMapOffset; /* am_log index --> reportid */
|
||||
u32 anchoredReportInverseMapOffset; /* reportid --> am_log index */
|
||||
rose_group initialGroups;
|
||||
u32 size; // (bytes)
|
||||
u32 anchoredMatches; /* number of anchored roles generating matches */
|
||||
u32 delay_count; /* number of delayed literal ids. */
|
||||
u32 delay_slot_size; /* size of delay slot mmbit. */
|
||||
u32 delay_base_id; /* literal id of the first delayed literal.
|
||||
* delayed literal ids are contiguous */
|
||||
u32 anchored_count; /* number of anchored literal ids */
|
||||
u32 anchored_base_id; /* literal id of the first literal in the A table.
|
||||
* anchored literal ids are contiguous */
|
||||
u32 nonbenefits_base_id; /* first literal id without benefit conf.
|
||||
* contiguous, blah, blah */
|
||||
u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can
|
||||
* usefully be reported */
|
||||
u32 delayRebuildLength; /* length of the history region which needs to be
|
||||
* rescanned when we are doing a delayed literal
|
||||
* rebuild scan. */
|
||||
struct RoseStateOffsets stateOffsets;
|
||||
struct RoseBoundaryReports boundary;
|
||||
u32 totalNumLiterals; /* total number of literals including dr */
|
||||
u32 asize; /* size of the atable */
|
||||
u32 initSideEnableOffset; /* sidecar literals enabled initially */
|
||||
u32 outfixBeginQueue; /* first outfix queue */
|
||||
u32 outfixEndQueue; /* one past the last outfix queue */
|
||||
u32 leftfixBeginQueue; /* first prefix/infix queue */
|
||||
u32 initMpvNfa; /* (allegedly chained) mpv to force on at init */
|
||||
u32 rosePrefixCount; /* number of rose prefixes */
|
||||
u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */
|
||||
u32 ematcherRegionSize; /* max region size to pass to ematcher */
|
||||
u32 literalBenefitsOffsets; /* offset to array of benefits indexed by lit
|
||||
id */
|
||||
u32 somRevCount; /**< number of som reverse nfas */
|
||||
u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
|
||||
u32 nfaRegionBegin; /* start of the nfa region, debugging only */
|
||||
u32 nfaRegionEnd; /* end of the nfa region, debugging only */
|
||||
u32 group_weak_end; /* end of weak groups, debugging only */
|
||||
u32 floatingStreamState; // size in bytes
|
||||
u32 eodLiteralId; // literal ID for eod ROSE_EVENT if used, otherwise 0.
|
||||
|
||||
struct scatter_full_plan state_init;
|
||||
};
|
||||
|
||||
struct lit_benefits {
|
||||
union {
|
||||
u64a a64[MAX_MASK2_WIDTH/sizeof(u64a)];
|
||||
u8 a8[MAX_MASK2_WIDTH];
|
||||
} and_mask;
|
||||
union {
|
||||
u64a e64[MAX_MASK2_WIDTH/sizeof(u64a)];
|
||||
u8 e8[MAX_MASK2_WIDTH];
|
||||
} expected;
|
||||
};
|
||||
|
||||
#if defined(_WIN32)
|
||||
#pragma pack(push, 1)
|
||||
#endif
|
||||
// Rose runtime state
|
||||
struct RoseRuntimeState {
|
||||
u8 stored_depth; /* depth at stream boundary */
|
||||
u8 flags; /* high bit true if delay rebuild needed */
|
||||
u8 broken; /* user has requested that we stop matching */
|
||||
#if defined(_WIN32)
|
||||
};
|
||||
#pragma pack(pop)
|
||||
#else
|
||||
} __attribute__((packed));
|
||||
#endif
|
||||
|
||||
struct ALIGN_CL_DIRECTIVE anchored_matcher_info {
|
||||
u32 next_offset; /* relative to this, 0 for end */
|
||||
u32 state_offset; /* relative to anchorState */
|
||||
u32 anchoredMinDistance; /* start of region to run anchored table over */
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const struct anchored_matcher_info *getALiteralMatcher(
|
||||
const struct RoseEngine *t) {
|
||||
if (!t->amatcherOffset) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *lt = (const char *)t + t->amatcherOffset;
|
||||
assert(ISALIGNED_CL(lt));
|
||||
return (const struct anchored_matcher_info *)lt;
|
||||
}
|
||||
|
||||
struct HWLM;
|
||||
|
||||
static really_inline
|
||||
const struct HWLM *getFLiteralMatcher(const struct RoseEngine *t) {
|
||||
if (!t->fmatcherOffset) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *lt = (const char *)t + t->fmatcherOffset;
|
||||
assert(ISALIGNED_CL(lt));
|
||||
return (const struct HWLM *)lt;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const void *getSLiteralMatcher(const struct RoseEngine *t) {
|
||||
if (!t->smatcherOffset) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *st = (const char *)t + t->smatcherOffset;
|
||||
assert(ISALIGNED_N(st, 8));
|
||||
return st;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const void *getELiteralMatcher(const struct RoseEngine *t) {
|
||||
if (!t->ematcherOffset) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *et = (const char *)t + t->ematcherOffset;
|
||||
assert(ISALIGNED_N(et, 8));
|
||||
return et;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const void *getSBLiteralMatcher(const struct RoseEngine *t) {
|
||||
if (!t->sbmatcherOffset) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *matcher = (const char *)t + t->sbmatcherOffset;
|
||||
assert(ISALIGNED_N(matcher, 8));
|
||||
return matcher;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) {
|
||||
const struct RoseLiteral *tl
|
||||
= (const struct RoseLiteral *)((const char *)t + t->literalOffset);
|
||||
assert(ISALIGNED_N(tl, 4));
|
||||
return tl;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct RoseSide *getSideEntryTable(const struct RoseEngine *t) {
|
||||
const struct RoseSide *rs
|
||||
= (const struct RoseSide *)((const char *)t + t->sideOffset);
|
||||
assert(ISALIGNED(rs));
|
||||
return rs;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct RoseRole *getRoleTable(const struct RoseEngine *t) {
|
||||
const struct RoseRole *r
|
||||
= (const struct RoseRole *)((const char *)t + t->roleOffset);
|
||||
assert(ISALIGNED_N(r, 4));
|
||||
return r;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct RosePred *getPredTable(const struct RoseEngine *t) {
|
||||
const struct RosePred *p
|
||||
= (const struct RosePred *)((const char *)t + t->predOffset);
|
||||
assert(ISALIGNED_N(p, 4));
|
||||
return p;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
|
||||
const struct LeftNfaInfo *r
|
||||
= (const struct LeftNfaInfo *)((const char *)t + t->leftOffset);
|
||||
assert(ISALIGNED_N(r, 4));
|
||||
return r;
|
||||
}
|
||||
|
||||
struct mmbit_sparse_iter; // forward decl
|
||||
|
||||
static really_inline
|
||||
const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) {
|
||||
assert(t->activeLeftIterOffset);
|
||||
const struct mmbit_sparse_iter *it = (const struct mmbit_sparse_iter *)
|
||||
((const char *)t + t->activeLeftIterOffset);
|
||||
assert(ISALIGNED_N(it, 4));
|
||||
return it;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u32 *getRootRoleTable(const struct RoseEngine *t) {
|
||||
const u32 *r = (const u32 *)((const char *)t + t->rootRoleOffset);
|
||||
assert(ISALIGNED_N(r, 4));
|
||||
return r;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct lit_benefits *getLiteralBenefitsTable(
|
||||
const struct RoseEngine *t) {
|
||||
return (const struct lit_benefits *)
|
||||
((const char *)t + t->literalBenefitsOffsets);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) {
|
||||
const struct NfaInfo *infos
|
||||
= (const struct NfaInfo *)((const char *)t + t->nfaInfoOffset);
|
||||
assert(ISALIGNED_N(infos, sizeof(u32)));
|
||||
|
||||
return &infos[qi];
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct NFA *getNfaByInfo(const struct RoseEngine *t,
|
||||
const struct NfaInfo *info) {
|
||||
return (const struct NFA *)((const char *)t + info->nfaOffset);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct NFA *getNfaByQueue(const struct RoseEngine *t, u32 qi) {
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
return getNfaByInfo(t, info);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 queueToLeftIndex(const struct RoseEngine *t, u32 qi) {
|
||||
assert(qi >= t->leftfixBeginQueue);
|
||||
return qi - t->leftfixBeginQueue;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct LeftNfaInfo *getLeftInfoByQueue(const struct RoseEngine *t,
|
||||
u32 qi) {
|
||||
const struct LeftNfaInfo *infos = getLeftTable(t);
|
||||
return &infos[queueToLeftIndex(t, qi)];
|
||||
}
|
||||
|
||||
struct SmallWriteEngine;
|
||||
|
||||
static really_inline
|
||||
const struct SmallWriteEngine *getSmallWrite(const struct RoseEngine *t) {
|
||||
if (!t->smallWriteOffset) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct SmallWriteEngine *smwr =
|
||||
(const struct SmallWriteEngine *)((const char *)t + t->smallWriteOffset);
|
||||
return smwr;
|
||||
}
|
||||
|
||||
#endif // ROSE_INTERNAL_H
|
101
src/rose/rose_sidecar_runtime.h
Normal file
101
src/rose/rose_sidecar_runtime.h
Normal file
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_SIDECAR_RUNTIME_H_1F746F6F237176
|
||||
#define ROSE_SIDECAR_RUNTIME_H_1F746F6F237176
|
||||
|
||||
#include "hwlm/hwlm.h"
|
||||
#include "scratch.h"
|
||||
#include "sidecar/sidecar.h"
|
||||
#include "rose_common.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
// Callback defined in match.c
|
||||
void roseSidecarCallback(u64a offset, u32 side_id, void *context);
|
||||
|
||||
static really_inline
|
||||
void catchup_sidecar(struct RoseContext *tctxt, u64a end) {
|
||||
DEBUG_PRINTF("catching up the sidecar from %llu to %llu\n",
|
||||
tctxt->side_curr, end);
|
||||
const struct sidecar *sidecar = getSLiteralMatcher(tctxt->t);
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
|
||||
if (!sidecar || tctxt->side_curr == end) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u8 *start;
|
||||
if (tctxt->side_curr >= ci->buf_offset) {
|
||||
start = ci->buf + tctxt->side_curr - ci->buf_offset;
|
||||
assert(end <= ci->buf_offset + ci->len);
|
||||
} else {
|
||||
/* at eod time we are called running over the histroy */
|
||||
start = ci->hbuf + tctxt->side_curr - ci->buf_offset + ci->hlen;
|
||||
assert(end <= ci->buf_offset);
|
||||
}
|
||||
size_t len = end - tctxt->side_curr;
|
||||
|
||||
DEBUG_PRINTF("enabled-->%02hhx\n", *(u8 *)&scratch->side_enabled.arb);
|
||||
sidecarExec(sidecar, start, len, &scratch->side_enabled.arb,
|
||||
scratch->side_scratch, tctxt->side_curr, roseSidecarCallback,
|
||||
tctxt);
|
||||
tctxt->side_curr = end;
|
||||
|
||||
DEBUG_PRINTF("finished catching up the sidecar to %llu\n", end);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void enable_sidecar(struct RoseContext *tctxt, const struct RoseRole *tr) {
|
||||
assert(tr->sidecarEnableOffset);
|
||||
const struct sidecar *sidecar = getSLiteralMatcher(tctxt->t);
|
||||
assert(sidecar);
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
DEBUG_PRINTF("welcome to the sidecar\n");
|
||||
sidecarEnabledUnion(sidecar, &scratch->side_enabled.arb,
|
||||
(const void *)((const char *)tctxt->t + tr->sidecarEnableOffset));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void sidecar_enabled_populate(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, const u8 *state) {
|
||||
DEBUG_PRINTF("enabled-->%02hhx\n", *(state + t->stateOffsets.sidecar));
|
||||
memcpy(&scratch->side_enabled, state + t->stateOffsets.sidecar,
|
||||
t->stateOffsets.sidecar_size);
|
||||
DEBUG_PRINTF("enabled-->%02hhx\n", *(u8 *)&scratch->side_enabled.arb);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void sidecar_enabled_preserve(const struct RoseEngine *t,
|
||||
const struct hs_scratch *scratch, u8 *state) {
|
||||
memcpy(state + t->stateOffsets.sidecar, &scratch->side_enabled,
|
||||
t->stateOffsets.sidecar_size);
|
||||
}
|
||||
|
||||
|
||||
#endif /* ROSE_SIDECAR_RUNTIME_H_1F746F6F237176 */
|
41
src/rose/rose_types.h
Normal file
41
src/rose/rose_types.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_TYPES_H
|
||||
#define ROSE_TYPES_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
struct RoseEngine;
|
||||
|
||||
// Note: identical signature to NfaCallback
|
||||
typedef int (*RoseCallback)(u64a offset, ReportID id, void *context);
|
||||
typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id,
|
||||
void *context);
|
||||
|
||||
#endif
|
217
src/rose/runtime.h
Normal file
217
src/rose/runtime.h
Normal file
@@ -0,0 +1,217 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime functions shared between various Rose runtime code.
|
||||
*/
|
||||
|
||||
#ifndef ROSE_RUNTIME_H
|
||||
#define ROSE_RUNTIME_H
|
||||
|
||||
#include "scratch.h"
|
||||
#include "rose_internal.h"
|
||||
#include "util/exhaust.h" // for isExhausted
|
||||
#include "util/internal_report.h"
|
||||
#include "util/partial_store.h"
|
||||
|
||||
/*
|
||||
* ROSE STATE LAYOUT:
|
||||
* state multibit
|
||||
* runtime state structure
|
||||
* full history table
|
||||
* last history table
|
||||
* short history table
|
||||
* short queues (two multibits)
|
||||
* last queues (two multibits)
|
||||
* active array
|
||||
* delay rb dirty
|
||||
* nfa state
|
||||
*/
|
||||
|
||||
#define rose_inline really_inline
|
||||
|
||||
/** \brief Fetch runtime state ptr. */
|
||||
static really_inline
|
||||
struct RoseRuntimeState *getRuntimeState(u8 *state) {
|
||||
struct RoseRuntimeState *rs = (struct RoseRuntimeState *)(state);
|
||||
assert(ISALIGNED_N(rs, 8));
|
||||
return rs;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const void *getByOffset(const struct RoseEngine *t, u32 offset) {
|
||||
assert(offset < t->size);
|
||||
return (const u8 *)t + offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void *getRoleState(u8 *state) {
|
||||
return state + sizeof(struct RoseRuntimeState);
|
||||
}
|
||||
|
||||
/** \brief Fetch the active array for suffix nfas. */
|
||||
static really_inline
|
||||
u8 *getActiveLeafArray(const struct RoseEngine *t, u8 *state) {
|
||||
return state + t->stateOffsets.activeLeafArray;
|
||||
}
|
||||
|
||||
/** \brief Fetch the active array for rose nfas. */
|
||||
static really_inline
|
||||
u8 *getActiveLeftArray(const struct RoseEngine *t, u8 *state) {
|
||||
return state + t->stateOffsets.activeLeftArray;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u32 *getAnchoredInverseMap(const struct RoseEngine *t) {
|
||||
return (const u32 *)(((const u8 *)t) + t->anchoredReportInverseMapOffset);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u32 *getAnchoredMap(const struct RoseEngine *t) {
|
||||
return (const u32 *)(((const u8 *)t) + t->anchoredReportMapOffset);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
rose_group loadGroups(const struct RoseEngine *t, const u8 *state) {
|
||||
return partial_load_u64a(state + t->stateOffsets.groups,
|
||||
t->stateOffsets.groups_size);
|
||||
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void storeGroups(const struct RoseEngine *t, u8 *state, rose_group groups) {
|
||||
partial_store_u64a(state + t->stateOffsets.groups, groups,
|
||||
t->stateOffsets.groups_size);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 * getFloatingMatcherState(const struct RoseEngine *t, u8 *state) {
|
||||
return state + t->stateOffsets.floatingMatcherState;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 *getLeftfixLagTable(const struct RoseEngine *t, u8 *state) {
|
||||
return state + t->stateOffsets.leftfixLagTable;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 *getLeftfixLagTableConst(const struct RoseEngine *t, const u8 *state) {
|
||||
return state + t->stateOffsets.leftfixLagTable;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
char roseSuffixInfoIsExhausted(const struct RoseEngine *t,
|
||||
const struct NfaInfo *info,
|
||||
const char *exhausted) {
|
||||
if (!info->ekeyListOffset) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("check exhaustion -> start at %u\n", info->ekeyListOffset);
|
||||
|
||||
/* END_EXHAUST terminated list */
|
||||
const u32 *ekeys = (const u32 *)((const char *)t + info->ekeyListOffset);
|
||||
while (*ekeys != END_EXHAUST) {
|
||||
DEBUG_PRINTF("check %u\n", *ekeys);
|
||||
if (!isExhausted(exhausted, *ekeys)) {
|
||||
DEBUG_PRINTF("not exhausted -> alive\n");
|
||||
return 0;
|
||||
}
|
||||
++ekeys;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("all ekeys exhausted -> dead\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
char roseSuffixIsExhausted(const struct RoseEngine *t, u32 qi,
|
||||
const char *exhausted) {
|
||||
DEBUG_PRINTF("check queue %u\n", qi);
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
return roseSuffixInfoIsExhausted(t, info, exhausted);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 has_chained_nfas(const struct RoseEngine *t) {
|
||||
return t->outfixBeginQueue;
|
||||
}
|
||||
|
||||
/** \brief Fetch \ref internal_report structure for this internal ID. */
|
||||
static really_inline
|
||||
const struct internal_report *getInternalReport(const struct RoseEngine *t,
|
||||
ReportID intId) {
|
||||
const struct internal_report *reports =
|
||||
(const struct internal_report *)((const u8 *)t + t->intReportOffset);
|
||||
assert(intId < t->intReportCount);
|
||||
return reports + intId;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct RoseRole *getRoleByOffset(const struct RoseEngine *t, u32 offset) {
|
||||
const struct RoseRole *tr = (const void *)((const char *)t + offset);
|
||||
|
||||
assert((size_t)(tr - getRoleTable(t)) < t->roleCount);
|
||||
DEBUG_PRINTF("get root role %zu\n", tr - getRoleTable(t));
|
||||
return tr;
|
||||
}
|
||||
|
||||
#define ANCHORED_MATCH_SENTINEL (~0U)
|
||||
|
||||
static really_inline
|
||||
void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) {
|
||||
DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
|
||||
tctxt->lastMatchOffset);
|
||||
|
||||
assert(offset >= tctxt->minMatchOffset);
|
||||
assert(offset >= tctxt->lastMatchOffset);
|
||||
tctxt->lastMatchOffset = offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) {
|
||||
DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
|
||||
tctxt->minMatchOffset);
|
||||
|
||||
assert(offset >= tctxt->minMatchOffset);
|
||||
assert(offset >= tctxt->minNonMpvMatchOffset);
|
||||
tctxt->minMatchOffset = offset;
|
||||
tctxt->minNonMpvMatchOffset = offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void updateMinMatchOffsetFromMpv(struct RoseContext *tctxt, u64a offset) {
|
||||
DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
|
||||
tctxt->minMatchOffset);
|
||||
|
||||
assert(offset >= tctxt->minMatchOffset);
|
||||
assert(tctxt->minNonMpvMatchOffset >= tctxt->minMatchOffset);
|
||||
tctxt->minMatchOffset = offset;
|
||||
tctxt->minNonMpvMatchOffset = MAX(tctxt->minNonMpvMatchOffset, offset);
|
||||
}
|
||||
#endif
|
582
src/rose/stream.c
Normal file
582
src/rose/stream.c
Normal file
@@ -0,0 +1,582 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "catchup.h"
|
||||
#include "counting_miracle.h"
|
||||
#include "infix.h"
|
||||
#include "match.h"
|
||||
#include "miracle.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
#include "nfa/mcclellan.h"
|
||||
#include "nfa/nfa_api.h"
|
||||
#include "nfa/nfa_api_queue.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "util/fatbit.h"
|
||||
#include "rose_sidecar_runtime.h"
|
||||
#include "rose.h"
|
||||
|
||||
static rose_inline
|
||||
void runAnchoredTableStream(const struct RoseEngine *t, const void *atable,
|
||||
size_t alen, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
char *state_base
|
||||
= (char *)scratch->tctxt.state + t->stateOffsets.anchorState;
|
||||
|
||||
const struct anchored_matcher_info *curr = atable;
|
||||
|
||||
do {
|
||||
DEBUG_PRINTF("--anchored nfa (+%u) no %u so %u\n",
|
||||
curr->anchoredMinDistance, curr->next_offset,
|
||||
curr->state_offset);
|
||||
const struct NFA *nfa
|
||||
= (const struct NFA *)((const char *)curr + sizeof(*curr));
|
||||
assert(ISALIGNED_CL(nfa));
|
||||
assert(isMcClellanType(nfa->type));
|
||||
|
||||
char *state = state_base + curr->state_offset;
|
||||
|
||||
char start = 0;
|
||||
size_t adj = 0;
|
||||
|
||||
if (offset <= curr->anchoredMinDistance) {
|
||||
adj = curr->anchoredMinDistance - offset;
|
||||
if (adj >= alen) {
|
||||
goto next_nfa;
|
||||
}
|
||||
|
||||
start = 1;
|
||||
} else {
|
||||
// (No state decompress necessary.)
|
||||
if (nfa->type == MCCLELLAN_NFA_8) {
|
||||
if (!*(u8 *)state) {
|
||||
goto next_nfa;
|
||||
}
|
||||
} else {
|
||||
if (!*(u16 *)state) {
|
||||
goto next_nfa;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nfa->type == MCCLELLAN_NFA_8) {
|
||||
nfaExecMcClellan8_SimpStream(nfa, state, scratch->core_info.buf,
|
||||
start, adj, alen, roseAnchoredCallback,
|
||||
&scratch->tctxt);
|
||||
} else {
|
||||
nfaExecMcClellan16_SimpStream(nfa, state, scratch->core_info.buf,
|
||||
start, adj, alen, roseAnchoredCallback,
|
||||
&scratch->tctxt);
|
||||
}
|
||||
|
||||
next_nfa:
|
||||
if (!curr->next_offset) {
|
||||
break;
|
||||
}
|
||||
|
||||
curr = (const void *)((const char *)curr + curr->next_offset);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
void saveStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
|
||||
DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
|
||||
q->offset, q->length, q->hlength, loc);
|
||||
nfaQueueCompressState(nfa, q, loc);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u8 getByteBefore(const struct core_info *ci, s64a sp) {
|
||||
if (sp > 0) { // in main buffer
|
||||
assert(sp <= (s64a)ci->len);
|
||||
return ci->buf[sp - 1];
|
||||
}
|
||||
// in history buffer
|
||||
assert(-sp < (s64a)ci->hlen);
|
||||
return ci->hbuf[ci->hlen + sp - 1];
|
||||
}
|
||||
|
||||
/** \brief Return value for \ref roseScanForMiracles. */
|
||||
enum MiracleAction {
|
||||
MIRACLE_DEAD, //!< kill off this engine
|
||||
MIRACLE_SAVED, //!< engine has been caught up and state saved
|
||||
MIRACLE_CONTINUE //!< continue running and catch up engine
|
||||
};
|
||||
|
||||
static really_inline
|
||||
enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch, u32 qi,
|
||||
const struct LeftNfaInfo *left,
|
||||
const struct NFA *nfa) {
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
const u32 qCount = t->queueCount;
|
||||
struct mq *q = scratch->queues + qi;
|
||||
|
||||
const char q_active = fatbit_isset(scratch->aqa, qCount, qi);
|
||||
DEBUG_PRINTF("q_active=%d\n", q_active);
|
||||
|
||||
const s64a begin_loc = q_active ? q_cur_loc(q) : 0;
|
||||
const s64a end_loc = ci->len;
|
||||
|
||||
s64a miracle_loc;
|
||||
if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
|
||||
goto found_miracle;
|
||||
}
|
||||
|
||||
if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
|
||||
&miracle_loc)) {
|
||||
goto found_miracle;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("no miracle\n");
|
||||
return MIRACLE_CONTINUE;
|
||||
|
||||
found_miracle:
|
||||
DEBUG_PRINTF("miracle at %lld\n", miracle_loc);
|
||||
|
||||
if (left->infix) {
|
||||
if (!q_active) {
|
||||
DEBUG_PRINTF("killing infix\n");
|
||||
return MIRACLE_DEAD;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc);
|
||||
q_skip_forward_to(q, miracle_loc);
|
||||
if (q->items[q->end - 1].type == MQE_START) {
|
||||
DEBUG_PRINTF("miracle caused infix to die\n");
|
||||
return MIRACLE_DEAD;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("re-init infix state\n");
|
||||
assert(q->items[q->cur].type == MQE_START);
|
||||
q->items[q->cur].location = miracle_loc;
|
||||
nfaQueueInitState(q->nfa, q);
|
||||
} else {
|
||||
if (miracle_loc > end_loc - t->historyRequired) {
|
||||
u8 *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset;
|
||||
u64a offset = ci->buf_offset + miracle_loc;
|
||||
u8 key = offset ? getByteBefore(ci, miracle_loc) : 0;
|
||||
DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset);
|
||||
if (!nfaInitCompressedState(nfa, offset, streamState, key)) {
|
||||
return MIRACLE_DEAD;
|
||||
}
|
||||
storeRoseDelay(t, state, left, (s64a)ci->len - miracle_loc);
|
||||
return MIRACLE_SAVED;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("re-init prefix (skip %lld->%lld)\n", begin_loc,
|
||||
miracle_loc);
|
||||
if (!q_active) {
|
||||
fatbit_set(scratch->aqa, qCount, qi);
|
||||
initRoseQueue(t, qi, left, &scratch->tctxt);
|
||||
}
|
||||
q->cur = q->end = 0;
|
||||
pushQueueAt(q, 0, MQE_START, miracle_loc);
|
||||
pushQueueAt(q, 1, MQE_TOP, miracle_loc);
|
||||
nfaQueueInitState(q->nfa, q);
|
||||
}
|
||||
|
||||
return MIRACLE_CONTINUE;
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
char roseCatchUpLeftfix(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch, u32 qi,
|
||||
const struct LeftNfaInfo *left) {
|
||||
assert(!left->transient); // active roses only
|
||||
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
const u32 qCount = t->queueCount;
|
||||
struct mq *q = scratch->queues + qi;
|
||||
const struct NFA *nfa = getNfaByQueue(t, qi);
|
||||
|
||||
if (nfaSupportsZombie(nfa)
|
||||
&& ci->buf_offset /* prefix can be alive with no q */
|
||||
&& !fatbit_isset(scratch->aqa, qCount, qi)
|
||||
&& isZombie(t, state, left)) {
|
||||
DEBUG_PRINTF("yawn - zombie\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (left->stopTable) {
|
||||
enum MiracleAction mrv =
|
||||
roseScanForMiracles(t, state, scratch, qi, left, nfa);
|
||||
switch (mrv) {
|
||||
case MIRACLE_DEAD:
|
||||
return 0;
|
||||
case MIRACLE_SAVED:
|
||||
return 1;
|
||||
default:
|
||||
assert(mrv == MIRACLE_CONTINUE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!fatbit_set(scratch->aqa, qCount, qi)) {
|
||||
initRoseQueue(t, qi, left, &scratch->tctxt);
|
||||
|
||||
s32 sp;
|
||||
if (ci->buf_offset) {
|
||||
sp = -(s32)loadRoseDelay(t, state, left);
|
||||
} else {
|
||||
sp = 0;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("ci->len=%zu, sp=%d, historyRequired=%u\n", ci->len, sp,
|
||||
t->historyRequired);
|
||||
|
||||
if ( ci->len - sp + 1 < t->historyRequired) {
|
||||
// we'll end up safely in the history region.
|
||||
DEBUG_PRINTF("safely in history, skipping\n");
|
||||
storeRoseDelay(t, state, left, (s64a)ci->len - sp);
|
||||
return 1;
|
||||
}
|
||||
|
||||
pushQueueAt(q, 0, MQE_START, sp);
|
||||
if (left->infix || ci->buf_offset + sp > 0) {
|
||||
loadStreamState(nfa, q, sp);
|
||||
} else {
|
||||
pushQueueAt(q, 1, MQE_TOP, sp);
|
||||
nfaQueueInitState(nfa, q);
|
||||
}
|
||||
} else {
|
||||
DEBUG_PRINTF("queue already active\n");
|
||||
if (q->end - q->cur == 1 && q_cur_type(q) == MQE_START) {
|
||||
DEBUG_PRINTF("empty queue, start loc=%lld\n", q_cur_loc(q));
|
||||
s64a last_loc = q_cur_loc(q);
|
||||
if (ci->len - last_loc + 1 < t->historyRequired) {
|
||||
// we'll end up safely in the history region.
|
||||
DEBUG_PRINTF("safely in history, saving state and skipping\n");
|
||||
saveStreamState(nfa, q, last_loc);
|
||||
storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine whether the byte before last_loc will be in the history
|
||||
// buffer on the next stream write.
|
||||
s64a last_loc = q_last_loc(q);
|
||||
s64a leftovers = ci->len - last_loc;
|
||||
if (leftovers + 1 >= t->historyRequired) {
|
||||
u32 catchup_offset = left->maxLag ? left->maxLag - 1 : 0;
|
||||
last_loc = (s64a)ci->len - catchup_offset;
|
||||
}
|
||||
|
||||
if (left->infix) {
|
||||
if (infixTooOld(q, last_loc)) {
|
||||
DEBUG_PRINTF("infix died of old age\n");
|
||||
return 0;
|
||||
}
|
||||
reduceQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("end scan at %lld\n", last_loc);
|
||||
pushQueueNoMerge(q, MQE_END, last_loc);
|
||||
|
||||
#ifdef DEBUG
|
||||
debugQueue(q);
|
||||
#endif
|
||||
|
||||
char rv = nfaQueueExecRose(nfa, q, MO_INVALID_IDX);
|
||||
if (!rv) { /* nfa is dead */
|
||||
DEBUG_PRINTF("died catching up to stream boundary\n");
|
||||
return 0;
|
||||
} else {
|
||||
DEBUG_PRINTF("alive, saving stream state\n");
|
||||
if (nfaSupportsZombie(nfa) &&
|
||||
nfaGetZombieStatus(nfa, q, last_loc) == NFA_ZOMBIE_ALWAYS_YES) {
|
||||
DEBUG_PRINTF("not so fast - zombie\n");
|
||||
setAsZombie(t, state, left);
|
||||
} else {
|
||||
saveStreamState(nfa, q, last_loc);
|
||||
storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void roseCatchUpLeftfixes(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch) {
|
||||
if (!t->activeLeftIterOffset) {
|
||||
// No sparse iter, no non-transient roses.
|
||||
return;
|
||||
}
|
||||
|
||||
// As per UE-1629, we catch up leftfix engines to:
|
||||
// * current position (last location in the queue, or last location we
|
||||
// executed to if the queue is empty) if that position (and the byte
|
||||
// before so we can decompress the stream state) will be in the history
|
||||
// buffer on the next stream write; OR
|
||||
// * (stream_boundary - max_delay) other
|
||||
|
||||
u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
|
||||
* left_table */
|
||||
const u32 arCount = t->activeLeftCount;
|
||||
const struct LeftNfaInfo *left_table = getLeftTable(t);
|
||||
const struct mmbit_sparse_iter *it = getActiveLeftIter(t);
|
||||
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
|
||||
|
||||
u32 idx = 0;
|
||||
u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, s);
|
||||
for (; ri != MMB_INVALID;
|
||||
ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, s)) {
|
||||
const struct LeftNfaInfo *left = left_table + ri;
|
||||
u32 qi = ri + t->leftfixBeginQueue;
|
||||
DEBUG_PRINTF("leftfix %u of %u, maxLag=%u, infix=%d\n", ri, arCount,
|
||||
left->maxLag, (int)left->infix);
|
||||
if (!roseCatchUpLeftfix(t, state, scratch, qi, left)) {
|
||||
DEBUG_PRINTF("removing rose %u from active list\n", ri);
|
||||
DEBUG_PRINTF("groups old=%016llx mask=%016llx\n",
|
||||
scratch->tctxt.groups, left->squash_mask);
|
||||
scratch->tctxt.groups &= left->squash_mask;
|
||||
mmbit_unset(ara, arCount, ri);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Saves out stream state for all our active suffix NFAs.
|
||||
static rose_inline
|
||||
void roseSaveNfaStreamState(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch) {
|
||||
struct mq *queues = scratch->queues;
|
||||
u8 *aa = getActiveLeafArray(t, state);
|
||||
u32 aaCount = t->activeArrayCount;
|
||||
|
||||
if (scratch->tctxt.mpv_inactive) {
|
||||
DEBUG_PRINTF("mpv is dead as a doornail\n");
|
||||
/* mpv if it exists is queue 0 */
|
||||
mmbit_unset(aa, aaCount, 0);
|
||||
}
|
||||
|
||||
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
|
||||
qi = mmbit_iterate(aa, aaCount, qi)) {
|
||||
DEBUG_PRINTF("saving stream state for qi=%u\n", qi);
|
||||
|
||||
struct mq *q = queues + qi;
|
||||
|
||||
// If it's active, it should have an active queue (as we should have
|
||||
// done some work!)
|
||||
assert(fatbit_isset(scratch->aqa, t->queueCount, qi));
|
||||
|
||||
const struct NFA *nfa = getNfaByQueue(t, qi);
|
||||
saveStreamState(nfa, q, q_cur_loc(q));
|
||||
}
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void ensureStreamNeatAndTidy(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch, size_t length,
|
||||
u64a offset, u8 delay_rb_status) {
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
|
||||
if (roseCatchUpTo(t, state, length + scratch->core_info.buf_offset, scratch,
|
||||
0)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return; /* dead; no need to clean up state. */
|
||||
}
|
||||
roseSaveNfaStreamState(t, state, scratch);
|
||||
roseCatchUpLeftfixes(t, state, scratch);
|
||||
roseFlushLastByteHistory(t, state, offset + length, tctxt);
|
||||
tctxt->lastEndOffset = offset + length;
|
||||
catchup_sidecar(tctxt, offset + length);
|
||||
sidecar_enabled_preserve(t, scratch, state);
|
||||
storeGroups(t, state, tctxt->groups);
|
||||
struct RoseRuntimeState *rstate = getRuntimeState(state);
|
||||
rstate->stored_depth = tctxt->depth;
|
||||
rstate->flags = delay_rb_status;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable,
|
||||
struct hs_scratch *scratch) {
|
||||
assert(!can_stop_matching(scratch));
|
||||
size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength);
|
||||
const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len;
|
||||
DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len);
|
||||
|
||||
hwlmExec(ftable, buf, len, 0, roseDelayRebuildCallback, scratch,
|
||||
scratch->tctxt.groups);
|
||||
assert(!can_stop_matching(scratch));
|
||||
}
|
||||
|
||||
void roseStreamExec(const struct RoseEngine *t, u8 *state,
|
||||
struct hs_scratch *scratch, RoseCallback callback,
|
||||
RoseCallbackSom som_callback, void *ctx) {
|
||||
DEBUG_PRINTF("OH HAI\n");
|
||||
assert(t);
|
||||
assert(state);
|
||||
assert(scratch->core_info.hbuf);
|
||||
assert(scratch->core_info.buf);
|
||||
|
||||
assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
|
||||
< MAX_SPARSE_ITER_STATES);
|
||||
|
||||
size_t length = scratch->core_info.len;
|
||||
u64a offset = scratch->core_info.buf_offset;
|
||||
|
||||
// We may have a maximum width (for engines constructed entirely
|
||||
// of bi-anchored patterns). If this write would result in us progressing
|
||||
// beyond this point, we cannot possibly match.
|
||||
if (t->maxBiAnchoredWidth != ROSE_BOUND_INF
|
||||
&& offset + length > t->maxBiAnchoredWidth) {
|
||||
DEBUG_PRINTF("bailing, write would progress beyond maxBAWidth\n");
|
||||
return;
|
||||
}
|
||||
|
||||
struct RoseRuntimeState *rstate = getRuntimeState(state);
|
||||
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
tctxt->t = t;
|
||||
tctxt->depth = rstate->stored_depth;
|
||||
tctxt->mpv_inactive = 0;
|
||||
tctxt->groups = loadGroups(t, state);
|
||||
tctxt->lit_offset_adjust = offset + 1; // index after last byte
|
||||
tctxt->delayLastEndOffset = offset;
|
||||
tctxt->lastEndOffset = offset;
|
||||
tctxt->filledDelayedSlots = 0;
|
||||
tctxt->state = state;
|
||||
tctxt->cb = callback;
|
||||
tctxt->cb_som = som_callback;
|
||||
tctxt->userCtx = ctx;
|
||||
tctxt->lastMatchOffset = 0;
|
||||
tctxt->minMatchOffset = offset;
|
||||
tctxt->minNonMpvMatchOffset = offset;
|
||||
tctxt->next_mpv_offset = 0;
|
||||
tctxt->curr_anchored_loc = MMB_INVALID;
|
||||
tctxt->curr_row_offset = 0;
|
||||
tctxt->side_curr = offset;
|
||||
|
||||
DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu\n",
|
||||
scratch->core_info.hlen, scratch->core_info.len);
|
||||
|
||||
fatbit_clear(scratch->aqa);
|
||||
scratch->am_log_sum = 0; /* clear the anchored logs */
|
||||
scratch->al_log_sum = 0;
|
||||
scratch->catchup_pq.qm_size = 0;
|
||||
|
||||
if (t->outfixBeginQueue != t->outfixEndQueue) {
|
||||
streamInitSufPQ(t, state, scratch);
|
||||
}
|
||||
|
||||
sidecar_enabled_populate(t, scratch, state);
|
||||
|
||||
u8 delay_rb_status = rstate->flags;
|
||||
|
||||
u32 alen = t->anchoredDistance > offset ?
|
||||
MIN(length + offset, t->anchoredDistance) - offset : 0;
|
||||
|
||||
const struct anchored_matcher_info *atable = getALiteralMatcher(t);
|
||||
if (atable && alen) {
|
||||
DEBUG_PRINTF("BEGIN ANCHORED %zu/%u\n", scratch->core_info.hlen, alen);
|
||||
runAnchoredTableStream(t, atable, alen, offset, scratch);
|
||||
|
||||
if (can_stop_matching(scratch)) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
resetAnchoredLog(t, scratch);
|
||||
}
|
||||
|
||||
const struct HWLM *ftable = getFLiteralMatcher(t);
|
||||
if (ftable) {
|
||||
if (t->noFloatingRoots && tctxt->depth == 1) {
|
||||
DEBUG_PRINTF("skip FLOATING: no inflight matches\n");
|
||||
goto flush_delay_and_exit;
|
||||
}
|
||||
|
||||
size_t flen = length;
|
||||
if (t->floatingDistance != ROSE_BOUND_INF) {
|
||||
flen = t->floatingDistance > offset ?
|
||||
MIN(t->floatingDistance, length + offset) - offset : 0;
|
||||
}
|
||||
|
||||
size_t hlength = scratch->core_info.hlen;
|
||||
|
||||
char rebuild = hlength && (delay_rb_status & DELAY_FLOAT_DIRTY)
|
||||
&& (t->maxFloatingDelayedMatch == ROSE_BOUND_INF
|
||||
|| offset < t->maxFloatingDelayedMatch);
|
||||
DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
|
||||
rebuild, delay_rb_status, t->maxFloatingDelayedMatch,
|
||||
offset);
|
||||
|
||||
if (!flen) {
|
||||
if (rebuild) { /* rebuild floating delayed match stuff */
|
||||
do_rebuild(t, ftable, scratch);
|
||||
}
|
||||
goto flush_delay_and_exit;
|
||||
}
|
||||
|
||||
if (rebuild) { /* rebuild floating delayed match stuff */
|
||||
do_rebuild(t, ftable, scratch);
|
||||
}
|
||||
|
||||
if (flen + offset <= t->floatingMinDistance) {
|
||||
DEBUG_PRINTF("skip FLOATING: before floating min\n");
|
||||
goto flush_delay_and_exit;
|
||||
}
|
||||
|
||||
size_t start = 0;
|
||||
if (offset < t->floatingMinDistance) {
|
||||
// This scan crosses the floating min distance, so we can use that
|
||||
// to set HWLM's "start" offset.
|
||||
start = t->floatingMinDistance - offset;
|
||||
}
|
||||
DEBUG_PRINTF("start=%zu\n", start);
|
||||
|
||||
u8 *stream_state;
|
||||
if (t->floatingStreamState) {
|
||||
stream_state = getFloatingMatcherState(t, state);
|
||||
} else {
|
||||
stream_state = NULL;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
|
||||
hwlmExecStreaming(ftable, scratch, flen, start, roseCallback, tctxt,
|
||||
tctxt->groups, stream_state);
|
||||
}
|
||||
|
||||
flush_delay_and_exit:
|
||||
DEBUG_PRINTF("flushing floating\n");
|
||||
if (cleanUpDelayed(length, offset, tctxt, &delay_rb_status)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
exit:
|
||||
DEBUG_PRINTF("CLEAN UP TIME\n");
|
||||
if (!can_stop_matching(scratch)) {
|
||||
ensureStreamNeatAndTidy(t, state, scratch, length, offset,
|
||||
delay_rb_status);
|
||||
}
|
||||
DEBUG_PRINTF("DONE STREAMING SCAN, dirty = %hhu\n", delay_rb_status);
|
||||
return;
|
||||
}
|
Reference in New Issue
Block a user