Initial commit of Hyperscan

This commit is contained in:
Matthew Barr
2015-10-20 09:13:35 +11:00
commit 904e436f11
610 changed files with 213627 additions and 0 deletions

293
src/rose/block.c Normal file
View File

@@ -0,0 +1,293 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "catchup.h"
#include "init.h"
#include "match.h"
#include "nfa/nfa_api.h"
#include "nfa/nfa_internal.h"
#include "nfa/nfa_rev_api.h"
#include "nfa/mcclellan.h"
#include "util/fatbit.h"
#include "rose_sidecar_runtime.h"
#include "rose.h"
#include "rose_common.h"
static rose_inline
void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable,
struct hs_scratch *scratch) {
const u8 *buffer = scratch->core_info.buf;
size_t length = scratch->core_info.len;
size_t alen = MIN(length, t->anchoredDistance);
const struct anchored_matcher_info *curr = atable;
DEBUG_PRINTF("BEGIN ANCHORED (over %zu/%zu)\n", alen, length);
do {
const struct NFA *nfa
= (const struct NFA *)((const char *)curr + sizeof(*curr));
assert(t->anchoredDistance > curr->anchoredMinDistance);
if (length >= curr->anchoredMinDistance) {
size_t local_alen = alen - curr->anchoredMinDistance;
const u8 *local_buffer = buffer + curr->anchoredMinDistance;
DEBUG_PRINTF("--anchored nfa (+%u)\n", curr->anchoredMinDistance);
assert(isMcClellanType(nfa->type));
if (nfa->type == MCCLELLAN_NFA_8) {
nfaExecMcClellan8_B(nfa, curr->anchoredMinDistance,
local_buffer, local_alen,
roseAnchoredCallback, &scratch->tctxt);
} else {
nfaExecMcClellan16_B(nfa, curr->anchoredMinDistance,
local_buffer, local_alen,
roseAnchoredCallback, &scratch->tctxt);
}
}
if (!curr->next_offset) {
break;
}
curr = (const void *)((const char *)curr + curr->next_offset);
} while (1);
}
static really_inline
void init_sidecar(const struct RoseEngine *t, struct hs_scratch *scratch) {
if (!t->smatcherOffset) {
return;
}
DEBUG_PRINTF("welcome to the sidecar\n");
assert(t->initSideEnableOffset);
// We have to enable some sidecar literals
const char *template = (const char *)t + t->initSideEnableOffset;
memcpy(&scratch->side_enabled, template, t->stateOffsets.sidecar_size);
}
static really_inline
void init_state_for_block(const struct RoseEngine *t, u8 *state) {
assert(t);
assert(state);
DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n",
t, t->roleCount, t->rolesWithStateCount);
// Rose is guaranteed 8-aligned state
assert(ISALIGNED_N(state, 8));
init_state(t, state);
}
static really_inline
void init_outfixes_for_block(const struct RoseEngine *t,
struct hs_scratch *scratch, u8 *state,
char is_small_block) {
/* active leaf array has been cleared by the init scatter */
if (t->initMpvNfa != MO_INVALID_IDX) {
assert(t->initMpvNfa == 0);
const struct NFA *nfa = getNfaByQueue(t, 0);
DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
scratch->core_info.len);
size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
scratch->core_info.len);
if (len) {
struct RoseContext *tctxt = &scratch->tctxt;
u8 *activeArray = getActiveLeafArray(t, state);
const u32 activeArraySize = t->activeArrayCount;
const u32 qCount = t->queueCount;
mmbit_set(activeArray, activeArraySize, 0);
fatbit_set(scratch->aqa, qCount, 0);
struct mq *q = scratch->queues;
initQueue(q, 0, t, tctxt);
q->length = len; /* adjust for rev_accel */
nfaQueueInitState(nfa, q);
pushQueueAt(q, 0, MQE_START, 0);
pushQueueAt(q, 1, MQE_TOP, 0);
}
}
if (is_small_block && !t->hasOutfixesInSmallBlock) {
DEBUG_PRINTF("all outfixes in small block table\n");
return;
}
if (t->outfixBeginQueue != t->outfixEndQueue) {
blockInitSufPQ(t, state, scratch, is_small_block);
}
}
static really_inline
void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch,
RoseCallback callback, RoseCallbackSom som_callback,
void *ctxt, u8 *state, char is_small_block) {
init_state_for_block(t, state);
struct RoseContext *tctxt = &scratch->tctxt;
tctxt->t = t;
tctxt->depth = 1;
tctxt->groups = t->initialGroups;
tctxt->lit_offset_adjust = 1; // index after last byte
tctxt->delayLastEndOffset = 0;
tctxt->lastEndOffset = 0;
tctxt->filledDelayedSlots = 0;
tctxt->state = state;
tctxt->cb = callback;
tctxt->cb_som = som_callback;
tctxt->userCtx = ctxt;
tctxt->lastMatchOffset = 0;
tctxt->minMatchOffset = 0;
tctxt->minNonMpvMatchOffset = 0;
tctxt->next_mpv_offset = 0;
tctxt->curr_anchored_loc = MMB_INVALID;
tctxt->curr_row_offset = 0;
tctxt->side_curr = 0;
scratch->am_log_sum = 0; /* clear the anchored logs */
scratch->al_log_sum = 0;
fatbit_clear(scratch->aqa);
init_sidecar(t, scratch); /* Init the sidecar enabled state */
scratch->catchup_pq.qm_size = 0;
init_outfixes_for_block(t, scratch, state, is_small_block);
}
void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch,
RoseCallback callback, RoseCallbackSom som_callback,
void *ctx) {
assert(t);
assert(scratch);
assert(scratch->core_info.buf);
assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
< MAX_SPARSE_ITER_STATES);
const size_t length = scratch->core_info.len;
// We have optimizations for small block scans: we run a single coalesced
// HWLM scan instead of running the anchored and floating matchers. Some
// outfixes are disabled as well (for SEP scans of single-byte literals,
// which are also run in the HWLM scan).
const char is_small_block =
(length < ROSE_SMALL_BLOCK_LEN && t->sbmatcherOffset);
u8 *state = (u8 *)scratch->core_info.state;
init_for_block(t, scratch, callback, som_callback, ctx, state,
is_small_block);
struct RoseContext *tctxt = &scratch->tctxt;
if (is_small_block) {
const void *sbtable = getSBLiteralMatcher(t);
assert(sbtable);
size_t sblen = MIN(length, t->smallBlockDistance);
DEBUG_PRINTF("BEGIN SMALL BLOCK (over %zu/%zu)\n", sblen, length);
DEBUG_PRINTF("-- %016llx\n", tctxt->groups);
hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback,
tctxt, tctxt->groups);
goto exit;
}
const void *atable = getALiteralMatcher(t);
if (atable) {
if (t->amatcherMaxBiAnchoredWidth != ROSE_BOUND_INF
&& length > t->amatcherMaxBiAnchoredWidth) {
goto skip_atable;
}
if (length < t->amatcherMinWidth) {
goto skip_atable;
}
runAnchoredTableBlock(t, atable, scratch);
if (can_stop_matching(scratch)) {
goto exit;
}
resetAnchoredLog(t, scratch);
skip_atable:;
}
const struct HWLM *ftable = getFLiteralMatcher(t);
if (ftable) {
DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance,
t->floatingMinDistance);
if (t->noFloatingRoots && tctxt->depth == 1) {
DEBUG_PRINTF("skip FLOATING: no inflight matches\n");
goto exit;
}
if (t->fmatcherMaxBiAnchoredWidth != ROSE_BOUND_INF
&& length > t->fmatcherMaxBiAnchoredWidth) {
goto exit;
}
if (length < t->fmatcherMinWidth) {
goto exit;
}
const u8 *buffer = scratch->core_info.buf;
size_t flen = length;
if (t->floatingDistance != ROSE_BOUND_INF) {
flen = MIN(t->floatingDistance, length);
}
if (flen <= t->floatingMinDistance) {
goto exit;
}
DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
DEBUG_PRINTF("-- %016llx\n", tctxt->groups);
hwlmExec(ftable, buffer, flen, t->floatingMinDistance,
roseCallback, tctxt, tctxt->groups);
}
exit:;
u8 dummy_delay_mask = 0;
if (cleanUpDelayed(length, 0, tctxt, &dummy_delay_mask)
== HWLM_TERMINATE_MATCHING) {
return;
}
assert(!can_stop_matching(scratch));
roseCatchUpTo(t, state, length, scratch, 0);
}

1281
src/rose/catchup.c Normal file

File diff suppressed because it is too large Load Diff

229
src/rose/catchup.h Normal file
View File

@@ -0,0 +1,229 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_CATCHUP_H
#define ROSE_CATCHUP_H
#include "hwlm/hwlm.h"
#include "runtime.h"
#include "scratch.h"
#include "rose_common.h"
#include "rose_internal.h"
#include "ue2common.h"
#include "nfa/nfa_internal.h"
#include "util/bitutils.h"
#include "util/multibit.h"
/*
* Rose has several components which run behind the main (floating table) clock
* and need to be caught up before we report matches.
*
* Currently we have to deal with:
* 1) Stored matches from the anchored matcher
* 2) Suffix/Outfix nfas
* 3) a single MPV nfa (chained) (which may also be triggered by (1) and (2)).
*
* The approach is to:
* A) build a priority queue of the suffix/outfixes based on their first match
* location
* B) process the matches from the anchored matches in order
* C) As we report a match from (B) we interleave matches from the suffixes
* D) As we report matches from (B) and (C) we interleave matches from the
* mpv if it exists.
*/
/* Callbacks, defined in catchup.c */
hwlmcb_rv_t roseCatchUpSufAndChains(s64a loc, struct hs_scratch *scratch);
hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch);
hwlmcb_rv_t roseCatchUpAnchoredOnly(s64a loc, struct hs_scratch *scratch);
/* will only catch mpv upto last reported external match */
hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch);
/* will only catch mpv upto last reported external match */
hwlmcb_rv_t roseCatchUpAnchoredAndSuf(s64a loc, struct hs_scratch *scratch);
hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, u8 *state, s64a loc,
struct hs_scratch *scratch);
void blockInitSufPQ(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch, char is_small_block);
void streamInitSufPQ(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch);
static really_inline
hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, u8 *state,
s64a loc, struct hs_scratch *scratch) {
u64a cur_offset = loc + scratch->core_info.buf_offset;
assert(cur_offset >= scratch->tctxt.minMatchOffset);
if (0) {
quick_exit:
updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset);
return HWLM_CONTINUE_MATCHING;
}
if (!has_chained_nfas(t)) {
goto quick_exit;
}
/* note: we may have to run at less than tctxt.minMatchOffset as we may
* have a full queue of postponed events that we need to flush */
if (cur_offset < scratch->tctxt.next_mpv_offset) {
DEBUG_PRINTF("skipping cur_offset %lld min %lld, mpv %lld\n",
cur_offset, scratch->tctxt.minMatchOffset,
scratch->tctxt.next_mpv_offset);
goto quick_exit;
}
assert(t->activeArrayCount);
DEBUG_PRINTF("cur offset offset: %lld\n", cur_offset);
DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
DEBUG_PRINTF("roseCatchUpMPV to %lld\n", loc);
assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */
u8 *aa = getActiveLeafArray(t, state);
u32 aaCount = t->activeArrayCount;
if (!mmbit_isset(aa, aaCount, 0)){
goto quick_exit;
}
/* Note: chained tails MUST not participate in the priority queue as
* they may have events pushed on during this process which may be before
* the catch up point */
return roseCatchUpMPV_i(t, state, loc, scratch);
}
static really_inline
u64a currentAnchoredEnd(const struct RoseEngine *t, struct RoseContext *tctxt) {
if (tctxt->curr_anchored_loc == MMB_INVALID) {
return ANCHORED_MATCH_SENTINEL;
} else {
return tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1;
}
}
/* catches up nfas, anchored matches and the mpv */
static rose_inline
hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, u8 *state, u64a end,
struct hs_scratch *scratch, char in_anchored) {
/* no need to catch up if we are at the same offset as last time */
if (end <= scratch->tctxt.minMatchOffset) {
/* we must already be up to date */
DEBUG_PRINTF("skip\n");
return HWLM_CONTINUE_MATCHING;
}
s64a loc = end - scratch->core_info.buf_offset;
if (end <= scratch->tctxt.minNonMpvMatchOffset) {
/* only need to catch up the mpv */
return roseCatchUpMPV(t, state, loc, scratch);
}
assert(t == scratch->tctxt.t);
assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt);
hwlmcb_rv_t rv;
if (in_anchored
|| curr_anchored_end == ANCHORED_MATCH_SENTINEL
|| curr_anchored_end > end) {
if (!t->activeArrayCount
|| !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
updateMinMatchOffset(&scratch->tctxt, end);
rv = HWLM_CONTINUE_MATCHING;
} else {
rv = roseCatchUpSufAndChains(loc, scratch);
}
} else {
if (!t->activeArrayCount) {
rv = roseCatchUpAnchoredOnly(loc, scratch);
} else {
rv = roseCatchUpAll(loc, scratch);
}
}
assert(rv != HWLM_CONTINUE_MATCHING
|| scratch->tctxt.minMatchOffset == end);
assert(rv != HWLM_CONTINUE_MATCHING
|| scratch->tctxt.minNonMpvMatchOffset == end);
return rv;
}
/* Catches up anything which may add triggers on the mpv: anchored matches
* and suf/outfixes. The MPV will be run only to intersperse matches in
* the output match stream if external matches are raised. */
static rose_inline
hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, u8 *state,
u64a end, struct hs_scratch *scratch,
char in_anchored) {
/* no need to catch up if we are at the same offset as last time */
if (end <= scratch->tctxt.minNonMpvMatchOffset) {
/* we must already be up to date */
DEBUG_PRINTF("skip\n");
return HWLM_CONTINUE_MATCHING;
}
s64a loc = end - scratch->core_info.buf_offset;
assert(t == scratch->tctxt.t);
assert(t->activeArrayCount); /* mpv is in active array */
assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
u64a curr_anchored_end = currentAnchoredEnd(t, &scratch->tctxt);
if (in_anchored
|| curr_anchored_end == ANCHORED_MATCH_SENTINEL
|| curr_anchored_end > end) {
if (!t->mpvTriggeredByLeaf) {
/* no need to check as they never put triggers onto the mpv */
return HWLM_CONTINUE_MATCHING;
}
/* sadly, this branch rarely gets taken as the mpv itself is usually
* alive. */
if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
scratch->tctxt.minNonMpvMatchOffset = end;
return HWLM_CONTINUE_MATCHING;
}
return roseCatchUpSuf(loc, scratch);
} else {
return roseCatchUpAnchoredAndSuf(loc, scratch);
}
}
#endif

264
src/rose/counting_miracle.h Normal file
View File

@@ -0,0 +1,264 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_COUNTING_MIRACLE_H
#define ROSE_COUNTING_MIRACLE_H
#include "ue2common.h"
#include "runtime.h"
#include "rose_internal.h"
#include "nfa/nfa_api_queue.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
/** \brief Maximum number of bytes to scan when looking for a "counting miracle"
* stop character. */
#define COUNTING_MIRACLE_LEN_MAX 256
static really_inline
char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end,
u32 target_count, u32 *count_inout,
const u8 **d_out) {
assert(d <= d_end);
u32 count = *count_inout;
m128 chars = set16x8(c);
for (; d + 16 <= d_end; d_end -= 16) {
m128 data = loadu128(d_end - 16);
u32 z1 = movemask128(eq128(chars, data));
count += popcount32(z1);
if (count >= target_count) {
*d_out = d_end - 16;
*count_inout = count;
return 1;
}
}
if (d != d_end) {
char temp[sizeof(m128)];
assert(d + sizeof(temp) > d_end);
memset(temp, c + 1, sizeof(temp));
memcpy(temp, d, d_end - d);
m128 data = loadu128(temp);
u32 z1 = movemask128(eq128(chars, data));
count += popcount32(z1);
if (count >= target_count) {
*d_out = d;
*count_inout = count;
return 1;
}
}
*count_inout = count;
return 0;
}
#define GET_LO_4(chars) and128(chars, low4bits)
#define GET_HI_4(chars) rshift2x64(andnot128(low4bits, chars), 4)
static really_inline
u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
const u8 *d, const u8 *d_end,
u32 target_count, u32 *count_inout,
const u8 **d_out) {
assert(d <= d_end);
u32 count = *count_inout;
const m128 zeroes = zeroes128();
const m128 low4bits = _mm_set1_epi8(0xf);
for (; d + 16 <= d_end; d_end -= 16) {
m128 data = loadu128(d_end - 16);
m128 c_lo = pshufb(mask_lo, GET_LO_4(data));
m128 c_hi = pshufb(mask_hi, GET_HI_4(data));
m128 t = and128(c_lo, c_hi);
u32 z1 = movemask128(eq128(t, zeroes));
count += popcount32(z1 ^ 0xffff);
if (count >= target_count) {
*d_out = d_end - 16;
*count_inout = count;
return 1;
}
}
if (d != d_end) {
char temp[sizeof(m128)];
assert(d + sizeof(temp) > d_end);
memset(temp, poison, sizeof(temp));
memcpy(temp, d, d_end - d);
m128 data = loadu128(temp);
m128 c_lo = pshufb(mask_lo, GET_LO_4(data));
m128 c_hi = pshufb(mask_hi, GET_HI_4(data));
m128 t = and128(c_lo, c_hi);
u32 z1 = movemask128(eq128(t, zeroes));
count += popcount32(z1 ^ 0xffff);
if (count >= target_count) {
*d_out = d;
*count_inout = count;
return 1;
}
}
*count_inout = count;
return 0;
}
/**
* \brief "Counting Miracle" scan: If we see more than N instances of a
* particular character class we know that the engine must be dead.
*
* Scans the buffer/history between relative locations \a begin_loc and \a
* end_loc, and returns a miracle location (if any) that appears in the stream
* after \a begin_loc.
*
* Returns 1 if some bytes can be skipped and sets \a miracle_loc
* appropriately, 0 otherwise.
*/
static never_inline
int roseCountingMiracleOccurs(const struct RoseEngine *t,
const struct LeftNfaInfo *left,
const struct core_info *ci, s64a begin_loc,
const s64a end_loc, s64a *miracle_loc) {
if (!left->countingMiracleOffset) {
return 0;
}
const struct RoseCountingMiracle *cm
= (const void *)((const char *)t + left->countingMiracleOffset);
assert(!left->transient);
assert(cm->count > 1); /* should be a normal miracle then */
DEBUG_PRINTF("looking for counting miracle over [%lld,%lld], maxLag=%u\n",
begin_loc, end_loc, left->maxLag);
DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
assert(begin_loc <= end_loc);
assert(begin_loc >= -(s64a)ci->hlen);
assert(end_loc <= (s64a)ci->len);
const s64a scan_end_loc = end_loc - left->maxLag;
if (scan_end_loc <= begin_loc) {
DEBUG_PRINTF("nothing to scan\n");
return 0;
}
const s64a start = MAX(begin_loc, scan_end_loc - COUNTING_MIRACLE_LEN_MAX);
DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
u32 count = 0;
s64a m_loc = start;
if (!cm->shufti) {
u8 c = cm->c;
// Scan buffer.
const s64a buf_scan_start = MAX(0, start);
if (scan_end_loc > buf_scan_start) {
const u8 *buf = ci->buf;
const u8 *d = buf + scan_end_loc;
const u8 *d_start = buf + buf_scan_start;
const u8 *d_out;
if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
&d_out)) {
assert(d_out >= d_start);
m_loc = (d_out - d_start) + buf_scan_start;
goto success;
}
}
// Scan history.
if (start < 0) {
const u8 *hbuf_end = ci->hbuf + ci->hlen;
const u8 *d = hbuf_end + MIN(0, scan_end_loc);
const u8 *d_start = hbuf_end + start;
const u8 *d_out;
if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
&d_out)) {
assert(d_out >= d_start);
m_loc = (d_out - d_start) + start;
goto success;
}
}
} else {
m128 lo = cm->lo;
m128 hi = cm->hi;
u8 poison = cm->poison;
// Scan buffer.
const s64a buf_scan_start = MAX(0, start);
if (scan_end_loc > buf_scan_start) {
const u8 *buf = ci->buf;
const u8 *d = buf + scan_end_loc;
const u8 *d_start = buf + buf_scan_start;
const u8 *d_out;
if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
cm->count, &count, &d_out)) {
assert(d_out >= d_start);
m_loc = (d_out - d_start) + buf_scan_start;
goto success;
}
}
// Scan history.
if (start < 0) {
const u8 *hbuf_end = ci->hbuf + ci->hlen;
const u8 *d = hbuf_end + MIN(0, scan_end_loc);
const u8 *d_start = hbuf_end + start;
const u8 *d_out;
if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
cm->count, &count, &d_out)) {
assert(d_out >= d_start);
m_loc = (d_out - d_start) + start;
goto success;
}
}
}
DEBUG_PRINTF("found %u/%u\n", count, cm->count);
return 0;
success:
DEBUG_PRINTF("found %u/%u\n", count, cm->count);
assert(count >= cm->count);
assert(m_loc < scan_end_loc);
assert(m_loc >= start);
*miracle_loc = m_loc;
return 1;
}
#endif

390
src/rose/eod.c Normal file
View File

@@ -0,0 +1,390 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "catchup.h"
#include "match.h"
#include "rose_sidecar_runtime.h"
#include "rose.h"
#include "util/fatbit.h"
static really_inline
void initContext(const struct RoseEngine *t, u8 *state, u64a offset,
struct hs_scratch *scratch, RoseCallback callback,
RoseCallbackSom som_callback, void *ctx) {
struct RoseRuntimeState *rstate = getRuntimeState(state);
struct RoseContext *tctxt = &scratch->tctxt;
tctxt->t = t;
tctxt->depth = rstate->stored_depth;
tctxt->groups = loadGroups(t, state); /* TODO: diff groups for eod */
tctxt->lit_offset_adjust = scratch->core_info.buf_offset
- scratch->core_info.hlen
+ 1; // index after last byte
tctxt->delayLastEndOffset = offset;
tctxt->lastEndOffset = offset;
tctxt->filledDelayedSlots = 0;
tctxt->state = state;
tctxt->cb = callback;
tctxt->cb_som = som_callback;
tctxt->userCtx = ctx;
tctxt->lastMatchOffset = 0;
tctxt->minMatchOffset = 0;
tctxt->minNonMpvMatchOffset = 0;
tctxt->next_mpv_offset = 0;
tctxt->curr_anchored_loc = MMB_INVALID;
tctxt->curr_row_offset = 0;
scratch->catchup_pq.qm_size = 0;
scratch->al_log_sum = 0; /* clear the anchored logs */
fatbit_clear(scratch->aqa);
}
static rose_inline
hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch,
const char is_streaming) {
assert(t->ematcherOffset);
size_t eod_len;
const u8 *eod_data;
if (!is_streaming) { /* Block */
eod_data = scratch->core_info.buf;
eod_len = scratch->core_info.len;
} else { /* Streaming */
eod_len = scratch->core_info.hlen;
eod_data = scratch->core_info.hbuf;
}
assert(eod_data);
assert(eod_len);
// If we don't have enough bytes to produce a match from an EOD table scan,
// there's no point scanning.
if (eod_len < t->eodmatcherMinWidth) {
DEBUG_PRINTF("len=%zu < eodmatcherMinWidth=%u\n", eod_len,
t->eodmatcherMinWidth);
return MO_CONTINUE_MATCHING;
}
// Ensure that we only need scan the last N bytes, where N is the length of
// the eod-anchored matcher region.
size_t adj = eod_len - MIN(eod_len, t->ematcherRegionSize);
DEBUG_PRINTF("eod offset=%llu, eod length=%zu\n", offset, eod_len);
struct RoseContext *tctxt = &scratch->tctxt;
/* update side_curr for eod_len */
tctxt->side_curr = offset - eod_len;
/* no need to enable any sidecar groups as they are for .*A.* constructs
* not allowed in the eod table */
const struct HWLM *etable = getELiteralMatcher(t);
hwlmExec(etable, eod_data, eod_len, adj, roseCallback, tctxt, tctxt->groups);
// We may need to fire delayed matches
u8 dummy_delay_mask = 0;
return cleanUpDelayed(0, offset, tctxt, &dummy_delay_mask);
}
static rose_inline
int roseEodRunIterator(const struct RoseEngine *t, u8 *state, u64a offset,
struct hs_scratch *scratch) {
if (!t->eodIterOffset) {
return MO_CONTINUE_MATCHING;
}
const struct RoseRole *roleTable = getRoleTable(t);
const struct RosePred *predTable = getPredTable(t);
const struct RoseIterMapping *iterMapBase
= getByOffset(t, t->eodIterMapOffset);
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodIterOffset);
assert(ISALIGNED(iterMapBase));
assert(ISALIGNED(it));
// Sparse iterator state was allocated earlier
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
struct fatbit *handled_roles = scratch->handled_roles;
const u32 numStates = t->rolesWithStateCount;
void *role_state = getRoleState(state);
u32 idx = 0;
u32 i = mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s);
fatbit_clear(handled_roles);
for (; i != MMB_INVALID;
i = mmbit_sparse_iter_next(role_state, numStates, i, &idx, it, s)) {
DEBUG_PRINTF("pred state %u (iter idx=%u) is on\n", i, idx);
const struct RoseIterMapping *iterMap = iterMapBase + idx;
const struct RoseIterRole *roles = getByOffset(t, iterMap->offset);
assert(ISALIGNED(roles));
DEBUG_PRINTF("%u roles to consider\n", iterMap->count);
for (u32 j = 0; j != iterMap->count; j++) {
u32 role = roles[j].role;
assert(role < t->roleCount);
DEBUG_PRINTF("checking role %u, pred %u:\n", role, roles[j].pred);
const struct RoseRole *tr = roleTable + role;
if (fatbit_isset(handled_roles, t->roleCount, role)) {
DEBUG_PRINTF("role %u already handled by the walk, skip\n",
role);
continue;
}
// Special case: if this role is a trivial case (pred type simple)
// we don't need to check any history and we already know the pred
// role is on.
if (tr->flags & ROSE_ROLE_PRED_SIMPLE) {
DEBUG_PRINTF("pred type is simple, no need for checks\n");
} else {
assert(roles[j].pred < t->predCount);
const struct RosePred *tp = predTable + roles[j].pred;
if (!roseCheckPredHistory(tp, offset)) {
continue;
}
}
/* mark role as handled so we don't touch it again in this walk */
fatbit_set(handled_roles, t->roleCount, role);
DEBUG_PRINTF("fire report for role %u, report=%u\n", role,
tr->reportId);
int rv = scratch->tctxt.cb(offset, tr->reportId,
scratch->tctxt.userCtx);
if (rv == MO_HALT_MATCHING) {
return MO_HALT_MATCHING;
}
}
}
return MO_CONTINUE_MATCHING;
}
static rose_inline
void roseCheckNfaEod(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch, u64a offset,
const char is_streaming) {
/* data, len is used for state decompress, should be full available data */
const u8 *aa = getActiveLeafArray(t, state);
const u32 aaCount = t->activeArrayCount;
u8 key = 0;
if (is_streaming) {
const u8 *eod_data = scratch->core_info.hbuf;
size_t eod_len = scratch->core_info.hlen;
key = eod_len ? eod_data[eod_len - 1] : 0;
}
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
qi = mmbit_iterate(aa, aaCount, qi)) {
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
const struct NFA *nfa = getNfaByInfo(t, info);
if (!nfaAcceptsEod(nfa)) {
DEBUG_PRINTF("nfa %u does not accept eod\n", qi);
continue;
}
DEBUG_PRINTF("checking nfa %u\n", qi);
char *fstate = scratch->fullState + info->fullStateOffset;
const char *sstate = (const char *)state + info->stateOffset;
if (is_streaming) {
// Decompress stream state.
nfaExpandState(nfa, fstate, sstate, offset, key);
}
nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb,
scratch->tctxt.cb_som, scratch->tctxt.userCtx);
}
}
static rose_inline
void cleanupAfterEodMatcher(const struct RoseEngine *t, u8 *state, u64a offset,
struct hs_scratch *scratch) {
struct RoseContext *tctxt = &scratch->tctxt;
// Flush history to make sure it's consistent.
roseFlushLastByteHistory(t, state, offset, tctxt);
// Catch up the sidecar to cope with matches raised in the etable.
catchup_sidecar(tctxt, offset);
}
static rose_inline
void roseCheckEodSuffixes(const struct RoseEngine *t, u8 *state, u64a offset,
struct hs_scratch *scratch) {
const u8 *aa = getActiveLeafArray(t, state);
const u32 aaCount = t->activeArrayCount;
UNUSED u32 qCount = t->queueCount;
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
qi = mmbit_iterate(aa, aaCount, qi)) {
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
const struct NFA *nfa = getNfaByInfo(t, info);
assert(nfaAcceptsEod(nfa));
DEBUG_PRINTF("checking nfa %u\n", qi);
assert(fatbit_isset(scratch->aqa, qCount, qi)); /* we have just been
triggered */
char *fstate = scratch->fullState + info->fullStateOffset;
const char *sstate = (const char *)state + info->stateOffset;
struct mq *q = scratch->queues + qi;
pushQueueNoMerge(q, MQE_END, scratch->core_info.len);
q->context = NULL;
/* rose exec is used as we don't want to / can't raise matches in the
* history buffer. */
char rv = nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX);
if (rv) { /* nfa is still alive */
nfaCheckFinalState(nfa, fstate, sstate, offset, scratch->tctxt.cb,
scratch->tctxt.cb_som, scratch->tctxt.userCtx);
}
}
}
static really_inline
void roseEodExec_i(const struct RoseEngine *t, u8 *state, u64a offset,
struct hs_scratch *scratch, const char is_streaming) {
assert(t);
assert(scratch->core_info.buf || scratch->core_info.hbuf);
assert(!scratch->core_info.buf || !scratch->core_info.hbuf);
assert(!can_stop_matching(scratch));
// Fire the special EOD event literal.
if (t->hasEodEventLiteral) {
DEBUG_PRINTF("firing eod event id %u at offset %llu\n",
t->eodLiteralId, offset);
const struct core_info *ci = &scratch->core_info;
size_t len = ci->buf ? ci->len : ci->hlen;
assert(len || !ci->buf); /* len may be 0 if no history is required
* (bounds checks only can lead to this) */
roseRunEvent(len, t->eodLiteralId, &scratch->tctxt);
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("user told us to stop\n");
return;
}
}
roseCheckNfaEod(t, state, scratch, offset, is_streaming);
if (!t->eodIterOffset && !t->ematcherOffset) {
DEBUG_PRINTF("no eod accepts\n");
return;
}
// Handle pending EOD reports.
int itrv = roseEodRunIterator(t, state, offset, scratch);
if (itrv == MO_HALT_MATCHING) {
return;
}
// Run the EOD anchored matcher if there is one.
if (t->ematcherOffset) {
assert(t->ematcherRegionSize);
// Unset the reports we just fired so we don't fire them again below.
mmbit_clear(getRoleState(state), t->rolesWithStateCount);
mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount);
sidecar_enabled_populate(t, scratch, state);
hwlmcb_rv_t rv = roseEodRunMatcher(t, offset, scratch, is_streaming);
if (rv == HWLM_TERMINATE_MATCHING) {
return;
}
cleanupAfterEodMatcher(t, state, offset, scratch);
// Fire any new EOD reports.
roseEodRunIterator(t, state, offset, scratch);
roseCheckEodSuffixes(t, state, offset, scratch);
}
}
void roseEodExec(const struct RoseEngine *t, u8 *state, u64a offset,
struct hs_scratch *scratch, RoseCallback callback,
RoseCallbackSom som_callback, void *context) {
assert(state);
assert(scratch);
assert(callback);
assert(context);
assert(t->requiresEodCheck);
DEBUG_PRINTF("ci buf %p/%zu his %p/%zu\n", scratch->core_info.buf,
scratch->core_info.len, scratch->core_info.hbuf,
scratch->core_info.hlen);
if (t->maxBiAnchoredWidth != ROSE_BOUND_INF
&& offset > t->maxBiAnchoredWidth) {
DEBUG_PRINTF("bailing, we are beyond max width\n");
/* also some of the history/state may be stale */
return;
}
initContext(t, state, offset, scratch, callback, som_callback, context);
roseEodExec_i(t, state, offset, scratch, 1);
}
static rose_inline
void prepForEod(const struct RoseEngine *t, u8 *state, size_t length,
struct RoseContext *tctxt) {
roseFlushLastByteHistory(t, state, length, tctxt);
tctxt->lastEndOffset = length;
if (t->requiresEodSideCatchup) {
catchup_sidecar(tctxt, length);
}
}
void roseBlockEodExec(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch) {
assert(t->requiresEodCheck);
assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF
|| offset <= t->maxBiAnchoredWidth);
assert(!can_stop_matching(scratch));
u8 *state = (u8 *)scratch->core_info.state;
// Ensure that history is correct before we look for EOD matches
prepForEod(t, state, scratch->core_info.len, &scratch->tctxt);
roseEodExec_i(t, state, offset, scratch, 0);
}

154
src/rose/infix.h Normal file
View File

@@ -0,0 +1,154 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef INFIX_H
#define INFIX_H
#include "ue2common.h"
#include "nfa/nfa_api.h"
#include "nfa/nfa_api_queue.h"
static really_inline
int infixTooOld(struct mq *q, s64a curr_loc) {
u32 maxAge = q->nfa->maxWidth;
if (!maxAge) {
return 0;
}
return q_last_loc(q) + maxAge < curr_loc;
}
/**
* Removes tops which are known not to affect the final state from the queue.
* May also reinitialise the engine state if it is unneeded.
*
* maxAge is the maximum width of the infix. Any tops/state before this can be
* ignored. 0 is used to indicate that there is no upper bound on the width of
* the pattern.
*
* maxTops is the maximum number of locations of tops that can affect the top.
* It is only possible for the last maxTops tops to affect the final state -
* earlier ones can be safely removed. Also, any state before the max tops may
* be ignored.
*
* This code assumes/requires that there are not multiple tops at the same
* location in the queue. This code also assumes that it is not a multitop
* engine.
*/
static really_inline
void reduceQueue(struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) {
assert(q->end > q->cur);
assert(maxTops);
u32 qlen = q->end - q->cur; /* includes MQE_START */
DEBUG_PRINTF("q=%p, len=%u, maxTops=%u maxAge=%u\n", q, qlen, maxTops,
maxAge);
char any_work = 0;
if (maxAge && q->items[q->cur].location + maxAge < curr_loc) {
any_work = 1;
}
if (qlen - 1 > maxTops) {
any_work = 1;
}
if (qlen - 1 == maxTops
&& q->items[q->cur].location != q->items[q->cur + 1].location) {
/* we can advance start to the first top location */
any_work = 1;
}
if (!any_work) {
DEBUG_PRINTF("nothing to do\n");
return;
}
#ifdef DEBUG
debugQueue(q);
#endif
char drop_state = qlen - 1 >= maxTops
|| (maxAge && q->items[q->cur].location + maxAge < curr_loc);
LIMIT_TO_AT_MOST(&maxTops, qlen - 1);
// We leave our START where it is, at the front of the queue.
assert(q->items[q->cur].type == MQE_START);
// We want to shuffle maxQueueLen items from the end of the queue to just
// after the start, effectively dequeuing old items. We could use memmove
// for this, but it's probably not a good idea to take the cost of the
// function call.
const struct mq_item *src = &q->items[q->cur + qlen - maxTops];
q->items[0] = q->items[q->cur]; /* shift start event to 0 slot */
q->cur = 0;
q->end = 1;
struct mq_item *dst = &q->items[1];
u32 i = 0;
if (maxAge) {
/* any event which is older than maxAge can be dropped */
for (; i < maxTops; i++, src++) {
if (src->location >= curr_loc - maxAge) {
break;
}
}
}
for (; i < maxTops; i++) {
*dst = *src;
src++;
dst++;
q->end++;
}
if (drop_state) {
/* clear state and shift start up to first top */
s64a new_loc;
if (q->end > 1) {
new_loc = q->items[1].location;
} else {
DEBUG_PRINTF("no tops\n");
new_loc = curr_loc;
}
DEBUG_PRINTF("advancing start from %lld to %lld\n",
q->items[0].location, new_loc);
assert(new_loc > q->items[0].location);
q->items[0].location = new_loc;
nfaQueueInitState(q->nfa, q);
}
DEBUG_PRINTF("reduced queue to len=%u\n", q->end - q->cur);
#ifdef DEBUG
debugQueue(q);
#endif
}
#endif

120
src/rose/init.c Normal file
View File

@@ -0,0 +1,120 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "init.h"
#include "match.h"
#include "runtime.h"
#include "scratch.h"
#include "rose.h"
#include "rose_common.h"
#include "rose_internal.h"
#include "ue2common.h"
#include "nfa/mcclellan.h"
#include "nfa/nfa_api_util.h"
#include "nfa/nfa_internal.h"
#include "sidecar/sidecar.h"
#include "sidecar/sidecar_internal.h"
#include "util/multibit.h"
#include <string.h>
static really_inline
void init_rstate(const struct RoseEngine *t, u8 *state) {
// Set runtime state: initial depth is 1 and we take our initial groups
// from the RoseEngine.
DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups);
struct RoseRuntimeState *rstate = getRuntimeState(state);
rstate->stored_depth = 1;
storeGroups(t, state, t->initialGroups);
rstate->flags = 0;
rstate->broken = NOT_BROKEN;
}
static really_inline
void init_sidecar(const struct RoseEngine *t, u8 *state) {
assert(getSLiteralMatcher(t));
struct sidecar_enabled *enabled_state
= (struct sidecar_enabled *)(state + t->stateOffsets.sidecar);
DEBUG_PRINTF("welcome to the sidecar\n");
assert(t->initSideEnableOffset);
// We have to enable some sidecar literals
const char *template = (const char *)t + t->initSideEnableOffset;
memcpy(enabled_state, template, t->stateOffsets.sidecar_size);
}
static really_inline
void init_outfixes(const struct RoseEngine *t, u8 *state) {
/* The active leaf array has been init'ed by the scatter with outfix
* bits set on */
// Init the NFA state for each outfix.
for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
const struct NFA *nfa = getNfaByInfo(t, info);
nfaInitCompressedState(nfa, 0, state + info->stateOffset,
0 /* assume NUL at start */);
}
if (t->initMpvNfa != MO_INVALID_IDX) {
const struct NfaInfo *info = getNfaInfoByQueue(t, t->initMpvNfa);
const struct NFA *nfa = getNfaByInfo(t, info);
nfaInitCompressedState(nfa, 0, state + info->stateOffset,
0 /* assume NUL at start */);
mmbit_set(getActiveLeafArray(t, state), t->activeArrayCount,
t->initMpvNfa);
}
}
void roseInitState(const struct RoseEngine *t, u8 *state) {
assert(t);
assert(state);
DEBUG_PRINTF("init for Rose %p with %u roles (%u with state indices)\n",
t, t->roleCount, t->rolesWithStateCount);
// Rose is guaranteed 8-aligned state
assert(ISALIGNED_N(state, 8));
init_rstate(t, state);
// Init the sidecar state
if (t->smatcherOffset) {
init_sidecar(t, state);
}
init_state(t, state);
init_outfixes(t, state);
// Clear the floating matcher state, if any.
DEBUG_PRINTF("clearing %u bytes of floating matcher state\n",
t->floatingStreamState);
memset(getFloatingMatcherState(t, state), 0, t->floatingStreamState);
}

46
src/rose/init.h Normal file
View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_INIT_H
#define ROSE_INIT_H
#include "rose_internal.h"
#include "ue2common.h"
#include "util/scatter_runtime.h"
/*
* Initialisation code common to streaming mode Rose (init.c) and block mode
* Rose (block.c) code.
*/
static really_inline
void init_state(const struct RoseEngine *t, u8 *state) {
scatter(state, t, &t->state_init);
}
#endif // ROSE_INIT_H

2127
src/rose/match.c Normal file

File diff suppressed because it is too large Load Diff

326
src/rose/match.h Normal file
View File

@@ -0,0 +1,326 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_MATCH_H
#define ROSE_MATCH_H
#include "hwlm/hwlm.h"
#include "runtime.h"
#include "scratch.h"
#include "rose_common.h"
#include "rose_internal.h"
#include "ue2common.h"
#include "nfa/nfa_api.h"
#include "nfa/nfa_api_queue.h"
#include "nfa/nfa_api_util.h"
#include "som/som_runtime.h"
#include "util/bitutils.h"
#include "util/internal_report.h"
#include "util/multibit.h"
/* Callbacks, defined in catchup.c */
int roseNfaAdaptor(u64a offset, ReportID id, void *context);
int roseNfaAdaptorNoInternal(u64a offset, ReportID id, void *context);
int roseNfaSomAdaptor(u64a from_offset, u64a offset, ReportID id, void *context);
/* Callbacks, defined in match.c */
hwlmcb_rv_t roseCallback(size_t start, size_t end, u32 id, void *ctx);
hwlmcb_rv_t roseDelayRebuildCallback(size_t start, size_t end, u32 id,
void *ctx);
int roseAnchoredCallback(u64a end, u32 id, void *ctx);
void roseRunEvent(size_t end, u32 id, struct RoseContext *tctxt);
/* Common code, used all over Rose runtime */
static rose_inline
void resetAnchoredLog(const struct RoseEngine *t, struct hs_scratch *scratch) {
u8 **anchoredRows = getAnchoredLog(scratch);
u32 region_width = t->anchoredMatches;
struct RoseContext *tctxt = &scratch->tctxt;
tctxt->curr_anchored_loc = bf64_iterate(scratch->am_log_sum, MMB_INVALID);
if (tctxt->curr_anchored_loc != MMB_INVALID) {
assert(tctxt->curr_anchored_loc < scratch->anchored_region_len);
u8 *curr_row = anchoredRows[tctxt->curr_anchored_loc];
tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width,
MMB_INVALID);
assert(tctxt->curr_row_offset != MMB_INVALID);
}
DEBUG_PRINTF("AL reset --> %u, %u\n", tctxt->curr_anchored_loc,
tctxt->curr_row_offset);
}
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r,
u64a end, struct RoseContext *tctxt,
char in_anchored, char in_catchup);
static really_inline
void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
struct RoseContext *tctxt) {
struct hs_scratch *scratch = tctxtToScratch(tctxt);
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
assert(scratch->fullState);
q->nfa = getNfaByInfo(t, info);
q->end = 0;
q->cur = 0;
q->state = scratch->fullState + info->fullStateOffset;
q->streamState = (char *)tctxt->state + info->stateOffset;
q->offset = scratch->core_info.buf_offset;
q->buffer = scratch->core_info.buf;
q->length = scratch->core_info.len;
q->history = scratch->core_info.hbuf;
q->hlength = scratch->core_info.hlen;
if (info->only_external) {
q->cb = roseNfaAdaptorNoInternal;
} else {
q->cb = roseNfaAdaptor;
}
q->som_cb = roseNfaSomAdaptor;
q->context = tctxt;
q->report_current = 0;
DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
"state=%u\n", qi, q->offset, info->fullStateOffset,
info->stateOffset, *(u32 *)q->state);
}
static really_inline
void initRoseQueue(const struct RoseEngine *t, u32 qi,
const struct LeftNfaInfo *left,
struct RoseContext *tctxt) {
struct hs_scratch *scratch = tctxtToScratch(tctxt);
struct mq *q = scratch->queues + qi;
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
q->nfa = getNfaByInfo(t, info);
q->end = 0;
q->cur = 0;
q->state = scratch->fullState + info->fullStateOffset;
// Transient roses don't have stream state, we use tstate in scratch
// instead. The only reason we need this at ALL is for LimEx extended
// regions, which assume that they have access to q->streamState +
// compressedStateSize.
if (left->transient) {
q->streamState = (char *)scratch->tstate + info->stateOffset;
} else {
q->streamState = (char *)tctxt->state + info->stateOffset;
}
q->offset = scratch->core_info.buf_offset;
q->buffer = scratch->core_info.buf;
q->length = scratch->core_info.len;
q->history = scratch->core_info.hbuf;
q->hlength = scratch->core_info.hlen;
q->cb = NULL;
q->context = NULL;
q->report_current = 0;
DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
"state=%u\n", qi, q->offset, info->fullStateOffset,
info->stateOffset, *(u32 *)q->state);
}
/** returns 0 if space for two items (top and end) on the queue */
static really_inline
char isQueueFull(const struct mq *q) {
return q->end + 2 > MAX_MQE_LEN;
}
static really_inline
void loadStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
q->offset, q->length, q->hlength, loc);
nfaExpandState(nfa, q->state, q->streamState, q->offset + loc,
queue_prev_byte(q, loc));
}
static really_inline
void storeRoseDelay(const struct RoseEngine *t, u8 *state,
const struct LeftNfaInfo *left, u32 loc) {
u32 di = left->lagIndex;
if (di == ROSE_OFFSET_INVALID) {
return;
}
assert(loc < 256); // ONE WHOLE BYTE!
DEBUG_PRINTF("storing rose delay %u in slot %u\n", loc, di);
u8 *leftfixDelay = getLeftfixLagTable(t, state);
assert(loc <= MAX_STORED_LEFTFIX_LAG);
leftfixDelay[di] = loc;
}
static really_inline
void setAsZombie(const struct RoseEngine *t, u8 *state,
const struct LeftNfaInfo *left) {
u32 di = left->lagIndex;
assert(di != ROSE_OFFSET_INVALID);
if (di == ROSE_OFFSET_INVALID) {
return;
}
u8 *leftfixDelay = getLeftfixLagTable(t, state);
leftfixDelay[di] = OWB_ZOMBIE_ALWAYS_YES;
}
/* loadRoseDelay MUST NOT be called on the first stream write as it is only
* initialized for running nfas on stream boundaries */
static really_inline
u32 loadRoseDelay(const struct RoseEngine *t, const u8 *state,
const struct LeftNfaInfo *left) {
u32 di = left->lagIndex;
if (di == ROSE_OFFSET_INVALID) {
return 0;
}
const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
u32 loc = leftfixDelay[di];
DEBUG_PRINTF("read rose delay %u from slot %u\n", loc, di);
return loc;
}
static really_inline
char isZombie(const struct RoseEngine *t, const u8 *state,
const struct LeftNfaInfo *left) {
u32 di = left->lagIndex;
assert(di != ROSE_OFFSET_INVALID);
if (di == ROSE_OFFSET_INVALID) {
return 0;
}
const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
DEBUG_PRINTF("read owb %hhu from slot %u\n", leftfixDelay[di], di);
return leftfixDelay[di] == OWB_ZOMBIE_ALWAYS_YES;
}
hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a end);
static really_inline
hwlmcb_rv_t flushQueuedLiterals(struct RoseContext *tctxt, u64a end) {
if (tctxt->delayLastEndOffset == end) {
DEBUG_PRINTF("no progress, no flush\n");
return HWLM_CONTINUE_MATCHING;
}
if (!tctxt->filledDelayedSlots && !tctxtToScratch(tctxt)->al_log_sum) {
tctxt->delayLastEndOffset = end;
return HWLM_CONTINUE_MATCHING;
}
return flushQueuedLiterals_i(tctxt, end);
}
static really_inline
hwlmcb_rv_t cleanUpDelayed(size_t length, u64a offset, struct RoseContext *tctxt,
u8 *status) {
if (can_stop_matching(tctxtToScratch(tctxt))) {
return HWLM_TERMINATE_MATCHING;
}
if (flushQueuedLiterals(tctxt, length + offset)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
if (tctxt->filledDelayedSlots) {
DEBUG_PRINTF("dirty\n");
*status |= DELAY_FLOAT_DIRTY;
} else {
*status &= ~DELAY_FLOAT_DIRTY;
}
tctxt->filledDelayedSlots = 0;
tctxt->delayLastEndOffset = offset;
return HWLM_CONTINUE_MATCHING;
}
static really_inline
void update_depth(struct RoseContext *tctxt, const struct RoseRole *tr) {
u8 d = MAX(tctxt->depth, tr->depth + 1);
assert(d >= tctxt->depth);
DEBUG_PRINTF("depth now %hhu was %hhu\n", d, tctxt->depth);
tctxt->depth = d;
}
static really_inline
int roseCheckHistoryAnch(const struct RosePred *tp, u64a end) {
DEBUG_PRINTF("end %llu min %u max %u\n", end, tp->minBound, tp->maxBound);
if (tp->maxBound == ROSE_BOUND_INF) {
return end >= tp->minBound;
} else {
return end >= tp->minBound && end <= tp->maxBound;
}
}
// Check that a predecessor's history requirements are satisfied.
static really_inline
int roseCheckPredHistory(const struct RosePred *tp, u64a end) {
DEBUG_PRINTF("pred type %u\n", tp->historyCheck);
if (tp->historyCheck == ROSE_ROLE_HISTORY_ANCH) {
return roseCheckHistoryAnch(tp, end);
}
assert(tp->historyCheck == ROSE_ROLE_HISTORY_NONE ||
tp->historyCheck == ROSE_ROLE_HISTORY_LAST_BYTE);
return 1;
}
/* Note: uses the stashed sparse iter state; cannot be called from
* anybody else who is using it */
static rose_inline
void roseFlushLastByteHistory(const struct RoseEngine *t, u8 *state,
u64a currEnd, struct RoseContext *tctxt) {
if (!t->lastByteHistoryIterOffset) {
return;
}
struct hs_scratch *scratch = tctxtToScratch(tctxt);
struct core_info *ci = &scratch->core_info;
/* currEnd is last byte of string + 1 */
if (tctxt->lastEndOffset == ci->buf_offset + ci->len
|| currEnd != ci->buf_offset + ci->len) {
/* already flushed or it is not yet time to flush */
return;
}
DEBUG_PRINTF("flushing\n");
const struct mmbit_sparse_iter *it
= (const void *)((const char *)t + t->lastByteHistoryIterOffset);
const u32 numStates = t->rolesWithStateCount;
void *role_state = getRoleState(state);
mmbit_sparse_iter_unset(role_state, numStates, it,
scratch->sparse_iter_state);
}
#endif

138
src/rose/miracle.h Normal file
View File

@@ -0,0 +1,138 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_MIRACLE_H
#define ROSE_MIRACLE_H
#include "ue2common.h"
#include "runtime.h"
#include "rose_internal.h"
/** \brief Maximum number of bytes to scan when looking for a "miracle" stop
* character. */
#define MIRACLE_LEN_MAX 32
static really_inline
u64a roseMiracleScan(const u8 *stop, const u8 *d, const u8 *d_start) {
assert(d >= d_start);
// Note: unrolling this loop manually does appear to reduce its
// performance. I'm sick of tilting at this particular windmill.
u32 mshift = 0;
do {
u64a s = (u64a)stop[*d];
if (s) {
s <<= mshift;
return s;
}
mshift++;
} while (--d >= d_start);
return 0;
}
/**
* \brief "Miracle" scan: uses stop table to check if we can skip forward to a
* location where we know that the given rose engine will be in a known state.
*
* Scans the buffer/history between relative locations \a begin_loc and \a
* end_loc, and returns a miracle location (if any) that appears in the stream
* after \a begin_loc.
*
* Returns 1 if some bytes can be skipped and sets \a miracle_loc
* appropriately, 0 otherwise.
*/
static rose_inline
char roseMiracleOccurs(const struct RoseEngine *t,
const struct LeftNfaInfo *left,
const struct core_info *ci, const s64a begin_loc,
const s64a end_loc, s64a *miracle_loc) {
assert(!left->transient);
assert(left->stopTable);
DEBUG_PRINTF("looking for miracle over [%lld,%lld], maxLag=%u\n",
begin_loc, end_loc, left->maxLag);
DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
assert(begin_loc <= end_loc);
assert(begin_loc >= -(s64a)ci->hlen);
assert(end_loc <= (s64a)ci->len);
const u8 *stop = getByOffset(t, left->stopTable);
const s64a scan_end_loc = end_loc - left->maxLag;
if (scan_end_loc <= begin_loc) {
DEBUG_PRINTF("nothing to scan\n");
return 0;
}
const s64a start = MAX(begin_loc, scan_end_loc - MIRACLE_LEN_MAX);
DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
u64a s = 0; // state, on bits are miracle locations
// Scan buffer.
const s64a buf_scan_start = MAX(0, start);
if (scan_end_loc > buf_scan_start) {
const u8 *buf = ci->buf;
const u8 *d = buf + scan_end_loc - 1;
const u8 *d_start = buf + buf_scan_start;
s = roseMiracleScan(stop, d, d_start);
if (s) {
goto miracle_found;
}
}
// Scan history.
if (start < 0) {
const u8 *hbuf_end = ci->hbuf + ci->hlen;
const u8 *d = hbuf_end + MIN(0, scan_end_loc) - 1;
const u8 *d_start = hbuf_end + start;
s = roseMiracleScan(stop, d, d_start);
if (scan_end_loc > 0) {
// Shift s over to account for the buffer scan above.
s <<= scan_end_loc;
}
}
if (s) {
miracle_found:
DEBUG_PRINTF("s=0x%llx, ctz=%u\n", s, ctz64(s));
s64a loc = end_loc - left->maxLag - ctz64(s) - 1;
if (loc > begin_loc) {
DEBUG_PRINTF("miracle at %lld\n", loc);
*miracle_loc = loc;
return 1;
}
}
DEBUG_PRINTF("no viable miraculous stop characters found\n");
return 0;
}
#endif // ROSE_MIRACLE_H

110
src/rose/rose.h Normal file
View File

@@ -0,0 +1,110 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_H
#define ROSE_H
#include "rose_types.h"
#include "rose_internal.h"
#include "runtime.h"
#include "scratch.h"
#include "ue2common.h"
#include "util/multibit.h"
// Initialise state space for engine use.
void roseInitState(const struct RoseEngine *t, u8 *state);
void roseBlockEodExec(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch);
void roseBlockExec_i(const struct RoseEngine *t, struct hs_scratch *scratch,
RoseCallback callback, RoseCallbackSom som_callback,
void *context);
/* assumes core_info in scratch has been init to point to data */
static really_inline
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch,
RoseCallback callback, RoseCallbackSom som_callback,
void *context) {
assert(t);
assert(scratch);
assert(scratch->core_info.buf);
// If this block is shorter than our minimum width, then no pattern in this
// RoseEngine could match.
/* minWidth checks should have already been performed by the caller */
const size_t length = scratch->core_info.len;
assert(length >= t->minWidth);
// Similarly, we may have a maximum width (for engines constructed entirely
// of bi-anchored patterns).
/* This check is now handled by the interpreter */
assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF
|| length <= t->maxBiAnchoredWidth);
roseBlockExec_i(t, scratch, callback, som_callback, context);
if (!t->requiresEodCheck) {
return;
}
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("bailing, already halted\n");
return;
}
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
const u32 numStates = t->rolesWithStateCount;
u8 *state = (u8 *)scratch->core_info.state;
void *role_state = getRoleState(state);
u32 idx = 0;
const struct mmbit_sparse_iter *it
= (const void *)((const u8 *)t + t->eodIterOffset);
if (!t->ematcherOffset && !t->hasEodEventLiteral
&& !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)
&& (!t->eodIterOffset
|| mmbit_sparse_iter_begin(role_state, numStates, &idx, it, s)
== MMB_INVALID)) {
return;
}
roseBlockEodExec(t, length, scratch);
}
/* assumes core_info in scratch has been init to point to data */
void roseStreamExec(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch, RoseCallback callback,
RoseCallbackSom som_callback, void *context);
void roseEodExec(const struct RoseEngine *t, u8 *state, u64a offset,
struct hs_scratch *scratch, RoseCallback callback,
RoseCallbackSom som_callback, void *context);
#define ROSE_CONTINUE_MATCHING_NO_EXHAUST 2
#endif // ROSE_H

151
src/rose/rose_build.h Normal file
View File

@@ -0,0 +1,151 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Rose Build interface.
*
* Rose Build interface. Everything you ever needed to feed literals in and
* get a RoseEngine out. This header should be everything needed by the rest
* of UE2.
*/
#ifndef ROSE_BUILD_H
#define ROSE_BUILD_H
#include "ue2common.h"
#include "rose_common.h"
#include "rose_in_graph.h"
#include "util/alloc.h"
#include "util/charreach.h"
#include "util/ue2string.h"
#include <memory>
#include <set>
#include <utility>
#include <vector>
#include <boost/core/noncopyable.hpp>
struct NFA;
struct SmallWriteEngine;
struct RoseEngine;
namespace ue2 {
struct BoundaryReports;
struct CompileContext;
struct raw_puff;
struct raw_som_dfa;
class CharReach;
class NGHolder;
class ReportManager;
class SomSlotManager;
class RoseDedupeAux {
public:
virtual ~RoseDedupeAux();
/** \brief True if we can not establish that at most a single callback will
* be generated at a given offset from this set of reports. */
virtual bool requiresDedupeSupport(const std::set<ReportID> &reports) const
= 0;
};
/** \brief Abstract interface intended for callers from elsewhere in the tree,
* real underlying implementation is RoseBuildImpl in rose_build_impl.h. */
class RoseBuild : boost::noncopyable {
public:
virtual ~RoseBuild();
/** \brief Adds a single literal. */
virtual void add(bool anchored, bool eod, const ue2_literal &lit,
const ue2::flat_set<ReportID> &ids) = 0;
virtual bool addRose(const RoseInGraph &ig, bool prefilter,
bool finalChance = false) = 0;
virtual bool addSombeRose(const RoseInGraph &ig) = 0;
virtual bool addOutfix(const NGHolder &h) = 0;
virtual bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) = 0;
virtual bool addOutfix(const raw_puff &rp) = 0;
virtual bool addChainTail(const raw_puff &rp, u32 *queue_out,
u32 *event_out) = 0;
/** \brief Returns true if we were able to add it as a mask. */
virtual bool add(bool anchored, const std::vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports) = 0;
/** \brief Attempts to add the graph to the anchored acyclic table. Returns
* true on success. */
virtual bool addAnchoredAcyclic(const NGHolder &graph) = 0;
virtual bool validateMask(const std::vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports,
bool anchored, bool eod) const = 0;
virtual void addMask(const std::vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports, bool anchored,
bool eod) = 0;
/** \brief Construct a runtime implementation. */
virtual ue2::aligned_unique_ptr<RoseEngine> buildRose(u32 minWidth) = 0;
virtual std::unique_ptr<RoseDedupeAux> generateDedupeAux() const = 0;
/** Get a unique report identifier for a prefix|infix engine */
virtual ReportID getNewNfaReport() = 0;
/** Note that we have seen a SOM pattern. */
virtual void setSom() = 0;
};
// Construct a usable Rose builder.
std::unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm,
SomSlotManager &ssm,
const CompileContext &cc,
const BoundaryReports &boundary);
bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
const ReportManager &rm, const CompileContext &cc);
size_t roseSize(const RoseEngine *t);
/* used by heuristics to determine the small write engine. High numbers are
* intended to indicate a lightweight rose. */
u32 roseQuality(const RoseEngine *t);
ue2::aligned_unique_ptr<RoseEngine>
roseAddSmallWrite(const RoseEngine *t, const SmallWriteEngine *smwr);
bool roseIsPureLiteral(const RoseEngine *t);
size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay);
} // namespace ue2
#endif // ROSE_BUILD_H

1986
src/rose/rose_build_add.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,48 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_ADD_INTERNAL_H
#define ROSE_BUILD_ADD_INTERNAL_H
#include "rose_graph.h"
#include "ue2common.h"
#include <set>
namespace ue2 {
class RoseBuildImpl;
RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
u32 minBound, u32 maxBound, u32 literalId,
size_t literalLength,
const ue2::flat_set<ReportID> &reports);
} // namespace ue2
#endif

View File

@@ -0,0 +1,779 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rose_build_impl.h"
#include "ue2common.h"
#include "grey.h"
#include "rose_build_add_internal.h"
#include "rose_build_anchored.h"
#include "rose_in_util.h"
#include "hwlm/hwlm_literal.h"
#include "nfagraph/ng_depth.h"
#include "nfagraph/ng_dump.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_limex.h"
#include "nfagraph/ng_reports.h"
#include "nfagraph/ng_util.h"
#include "nfagraph/ng_width.h"
#include "util/charreach.h"
#include "util/charreach_util.h"
#include "util/compare.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/graph.h"
#include "util/make_unique.h"
#include "util/ue2string.h"
#include "util/verify_types.h"
#include <algorithm>
#include <map>
#include <set>
#include <string>
#include <vector>
#include <utility>
using namespace std;
namespace ue2 {
#define MIN_MASK_LIT_LEN 2
#define MAX_MASK_SIZE 255
#define MAX_MASK_LITS 30
static
void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
ue2_literal *lit, u32 *offset, const Grey &grey) {
bool case_fixed = false;
bool nocase = false;
size_t best_begin = 0;
size_t best_end = 0;
size_t best_len = 0;
size_t begin = 0;
size_t end = 0;
for (size_t i = 0; i < mask.size(); i++) {
bool fail = false;
if (mask[i].count() != 1 && !mask[i].isCaselessChar()) {
DEBUG_PRINTF("hit non-literal char, resetting at %zu\n", i);
fail = true;
}
if (!fail && streaming && (end >= grey.maxHistoryAvailable + 1)) {
DEBUG_PRINTF("hit literal limit, resetting at %zu\n", i);
fail = true;
}
if (!fail && case_fixed && mask[i].isAlpha()) {
if (nocase && mask[i].count() != 2) {
fail = true;
}
if (!nocase && mask[i].count() != 1) {
fail = true;
}
}
if (fail) {
case_fixed = false;
nocase = false;
size_t len = end - begin;
bool better = len > best_len;
if (better) {
best_begin = begin;
best_end = end;
best_len = len;
}
begin = i + 1;
end = i + 1;
} else {
assert(end == i);
end = i + 1;
if (mask[i].isAlpha()) {
case_fixed = true;
nocase = mask[i].count() == 2;
}
}
}
size_t len = end - begin;
/* Everybody would rather be trigger towards the end */
bool better = len >= best_len && mask.size() - end <= MAX_DELAY;
if (better) {
best_begin = begin;
best_end = end;
best_len = len;
}
for (size_t i = best_begin; i < best_end; i++) {
assert(mask[i].count() == 1 || mask[i].count() == 2);
lit->push_back(mask[i].find_first(), mask[i].count() > 1);
}
*offset = verify_u32(best_begin);
}
static
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
char c = (char)i;
bool nocase = myisupper(c) && cr.test(mytolower(c));
if (myislower(c) && cr.test(mytoupper(c))) {
continue;
}
if (cand->size() >= MAX_MASK_LITS) {
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
return false;
}
cand->emplace_back(c, nocase);
}
assert(cand->size() <= MAX_MASK_LITS);
return !cand->empty();
}
static
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> *cand) {
DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
DEBUG_PRINTF(" current cand list size %zu\n", cand->size());
vector<ue2_literal> curr;
for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
char c = (char)i;
bool nocase = myisupper(c) && cr.test(mytolower(c));
if (myislower(c) && cr.test(mytoupper(c))) {
continue;
}
for (const auto &lit : *cand) {
if (curr.size() >= MAX_MASK_LITS) {
DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
return false;
}
curr.emplace_back(c, nocase);
curr.back() += lit;
}
}
if (curr.back().length() > MAX_MASK2_WIDTH &&
any_of(begin(curr), end(curr), mixed_sensitivity)) {
DEBUG_PRINTF("mixed-sensitivity lit is too long, stopping\n");
return false;
}
assert(curr.size() <= MAX_MASK_LITS);
cand->swap(curr);
return true;
}
static
u32 scoreFmlCandidates(const vector<ue2_literal> &cand) {
if (cand.empty()) {
DEBUG_PRINTF("no candidates\n");
return 0;
}
const u32 len = cand.back().length();
DEBUG_PRINTF("length = %u count %zu\n", len, cand.size());
u32 min_period = len;
for (const auto &lit : cand) {
u32 period = lit.length() - maxStringSelfOverlap(lit);
min_period = min(min_period, period);
}
DEBUG_PRINTF("min_period %u\n", min_period);
u32 length_score =
(5 * min_period + len) * (cand.back().any_nocase() ? 90 : 100);
u32 count_penalty;
if (len > 4) {
count_penalty = 9 * len * cand.size();
} else {
count_penalty = 5 * cand.size();
}
if (length_score <= count_penalty) {
return 1;
}
return length_score - count_penalty;
}
/* favours later literals */
static
bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
u32 *minBound, u32 *length) {
*minBound = 0;
*length = 0;
vector<ue2_literal> candidates, best_candidates;
u32 best_score = 0;
u32 best_minOffset = 0;
vector<CharReach>::const_iterator it, itb, ite;
for (it = itb = mask.begin(), ite = mask.end(); it != ite; ++it) {
candidates.clear();
if (!initFmlCandidates(*it, &candidates)) {
DEBUG_PRINTF("failed to init\n");
continue;
}
DEBUG_PRINTF("++\n");
vector<CharReach>::const_iterator jt = it;
while (jt != itb) {
--jt;
DEBUG_PRINTF("--\n");
if (!expandFmlCandidates(*jt, &candidates)) {
DEBUG_PRINTF("expansion stopped\n");
break;
}
}
u32 score = scoreFmlCandidates(candidates);
DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
candidates.size());
if (!candidates.empty() && score >= best_score) {
best_minOffset = it - itb - candidates.back().length() + 1;
best_candidates.swap(candidates);
best_score = score;
}
}
if (!best_score) {
DEBUG_PRINTF("no lits\n");
return false;
}
*minBound = best_minOffset;
*length = best_candidates.back().length();
DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
for (const auto &cand : best_candidates) {
assert(cand.length() == *length);
lit->push_back(cand);
}
return true;
}
static
unique_ptr<NGHolder> buildMaskLhs(bool anchored, u32 prefix_len,
const vector<CharReach> &mask) {
DEBUG_PRINTF("build %slhs len %u/%zu\n", anchored ? "anc " : "", prefix_len,
mask.size());
unique_ptr<NGHolder> lhs = ue2::make_unique<NGHolder>(NFA_PREFIX);
assert(prefix_len);
assert(mask.size() >= prefix_len);
NFAVertex pred = anchored ? lhs->start : lhs->startDs;
u32 m_idx = 0;
while (prefix_len--) {
NFAVertex v = add_vertex(*lhs);
(*lhs)[v].char_reach = mask[m_idx++];
add_edge(pred, v, *lhs);
pred = v;
}
add_edge(pred, lhs->accept, *lhs);
(*lhs)[pred].reports.insert(0);
return lhs;
}
static
void buildLiteralMask(const vector<CharReach> &mask, vector<u8> &msk,
vector<u8> &cmp, u32 delay) {
msk.clear();
cmp.clear();
if (mask.size() <= delay) {
return;
}
// Construct an and/cmp mask from our mask ending at delay positions before
// the end of the literal, with max length HWLM_MASKLEN.
auto ite = mask.end() - delay;
auto it = ite - min(size_t{HWLM_MASKLEN}, mask.size() - delay);
for (; it != ite; ++it) {
msk.push_back(0);
cmp.push_back(0);
make_and_cmp_mask(*it, &msk.back(), &cmp.back());
}
assert(msk.size() == cmp.size());
assert(msk.size() <= HWLM_MASKLEN);
}
static
bool validateTransientMask(const vector<CharReach> &mask, bool eod, const Grey &grey) {
assert(!mask.empty());
// An EOD anchored mask requires that everything fit into history, while an
// ordinary floating case can handle one byte more (i.e., max history size
// and one byte in the buffer).
const size_t max_width = grey.maxHistoryAvailable + (eod ? 0 : 1);
if (mask.size() > max_width) {
DEBUG_PRINTF("mask too long for max available history\n");
return false;
}
vector<ue2_literal> lits;
u32 lit_minBound; /* minBound of each literal in lit */
u32 lit_length; /* length of each literal in lit */
if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
DEBUG_PRINTF("failed to find any lits\n");
return false;
}
if (lits.empty()) {
return false;
}
const u32 delay = mask.size() - lit_length - lit_minBound;
if (delay > MAX_DELAY) {
DEBUG_PRINTF("delay %u is too much\n", delay);
return false;
}
if (lit_length == 1 && lits.size() > 3) {
DEBUG_PRINTF("no decent trigger\n");
return false;
}
// Mixed-sensitivity literals require benefits masks to implement, and thus
// have a maximum length. This has been taken into account in
// findMaskLiterals.
assert(lit_length <= MAX_MASK2_WIDTH ||
none_of(begin(lits), end(lits), mixed_sensitivity));
// Build the HWLM literal mask.
vector<u8> msk, cmp;
if (grey.roseHamsterMasks) {
buildLiteralMask(mask, msk, cmp, delay);
}
// We consider the HWLM mask length to run from the first non-zero byte to
// the end, and let max(mask length, literal length) be the effective
// literal length.
//
// A one-byte literal with no mask is too short, but a one-byte literal
// with a few bytes of mask information is OK.
u32 msk_length = distance(find_if(begin(msk), end(msk),
[](u8 v) { return v != 0; }), end(msk));
u32 eff_lit_length = max(lit_length, msk_length);
DEBUG_PRINTF("msk_length=%u, eff_lit_length = %u\n", msk_length,
eff_lit_length);
if (eff_lit_length < MIN_MASK_LIT_LEN) {
DEBUG_PRINTF("literals too short\n");
return false;
}
DEBUG_PRINTF("mask is ok\n");
return true;
}
static
bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) {
ue2::flat_set<NFAVertex> curr = {g.accept};
ue2::flat_set<NFAVertex> next;
for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
const CharReach &cr = *it;
DEBUG_PRINTF("check %s\n", describeClass(*it).c_str());
next.clear();
for (auto v : curr) {
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (isSubsetOf(cr, g[u].char_reach)) {
next.insert(u);
}
}
}
if (next.empty()) {
DEBUG_PRINTF("no path to start\n");
return true;
}
curr.swap(next);
}
for (auto v : curr) {
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (u == g.start || u == g.startDs) {
DEBUG_PRINTF("literal spans graph from start to accept\n");
return false;
}
}
}
DEBUG_PRINTF("literal doesn't reach start\n");
return true;
}
static
void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports, bool anchored,
bool eod) {
vector<ue2_literal> lits;
u32 lit_minBound; /* minBound of each literal in lit */
u32 lit_length; /* length of each literal in lit */
if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
DEBUG_PRINTF("failed to find any lits\n");
assert(0);
return;
}
DEBUG_PRINTF("%zu literals, minBound=%u, length=%u\n", lits.size(),
lit_minBound, lit_length);
if (lits.empty()) {
assert(0);
return;
}
u32 delay = mask.size() - lit_length - lit_minBound;
assert(delay <= MAX_DELAY);
DEBUG_PRINTF("delay=%u\n", delay);
shared_ptr<NGHolder> mask_graph = buildMaskLhs(anchored, mask.size(), mask);
u32 mask_lag = 0; /* TODO */
// Everyone gets the same report ID.
ReportID mask_report = build.getNewNfaReport();
setReportId(*mask_graph, mask_report);
// Build the HWLM literal mask.
vector<u8> msk, cmp;
if (build.cc.grey.roseHamsterMasks) {
buildLiteralMask(mask, msk, cmp, delay);
}
/* adjust bounds to be relative to trigger rather than mask */
const u32 v_min_offset = add_rose_depth(0, mask.size());
const u32 v_max_offset =
add_rose_depth(anchored ? 0 : ROSE_BOUND_INF, mask.size());
RoseGraph &g = build.g;
// By default, masked literals go into the floating table (except for eod
// cases).
enum rose_literal_table table = ROSE_FLOATING;
RoseVertex eod_v = RoseGraph::null_vertex();
if (eod) {
eod_v = add_vertex(g);
g[eod_v].eod_accept = true;
insert(&g[eod_v].reports, reports);
g[eod_v].min_offset = v_min_offset;
g[eod_v].max_offset = v_max_offset;
// Note: because this is a transient mask, we know that we can match it
// completely inside the history buffer. So, using the EOD literal
// table is always safe.
table = ROSE_EOD_ANCHORED;
// Widen the EOD table window to cover the mask.
ENSURE_AT_LEAST(&build.ematcher_region_size, mask.size());
}
const ue2::flat_set<ReportID> no_reports;
for (const auto &lit : lits) {
u32 lit_id = build.getLiteralId(lit, msk, cmp, delay, table);
const RoseVertex parent = anchored ? build.anchored_root : build.root;
bool use_mask = delay || maskIsNeeded(lit, *mask_graph);
auto v = createVertex(&build, parent, 0, ROSE_BOUND_INF, lit_id,
lit.length(), eod ? no_reports : reports);
if (use_mask) {
g[v].left.graph = mask_graph;
g[v].left.lag = mask_lag;
g[v].left.leftfix_report = mask_report;
} else {
// Make sure our edge bounds are correct.
auto e = edge_by_target(parent, v, g).first;
g[e].minBound = 0;
g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF;
g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH
: ROSE_ROLE_HISTORY_NONE;
}
// Set offsets correctly.
g[v].min_offset = v_min_offset;
g[v].max_offset = v_max_offset;
if (eod) {
auto e = add_edge(v, eod_v, g).first;
g[e].minBound = 0;
g[e].maxBound = 0;
g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
}
}
}
static
unique_ptr<NGHolder> buildMaskRhs(const ue2::flat_set<ReportID> &reports,
const vector<CharReach> &mask,
u32 suffix_len) {
assert(suffix_len);
assert(mask.size() > suffix_len);
unique_ptr<NGHolder> rhs = ue2::make_unique<NGHolder>(NFA_SUFFIX);
NGHolder &h = *rhs;
NFAVertex succ = h.accept;
u32 m_idx = mask.size() - 1;
while (suffix_len--) {
NFAVertex u = add_vertex(h);
if (succ == h.accept) {
h[u].reports.insert(reports.begin(), reports.end());
}
h[u].char_reach = mask[m_idx--];
add_edge(u, succ, h);
succ = u;
}
add_edge(h.start, succ, h);
return rhs;
}
static
void doAddMask(RoseBuildImpl &tbi, bool anchored,
const vector<CharReach> &mask, const ue2_literal &lit,
u32 prefix_len, u32 suffix_len,
const ue2::flat_set<ReportID> &reports) {
/* Note: bounds are relative to literal start */
RoseInGraph ig;
RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
DEBUG_PRINTF("pref + lit = %u\n", prefix_len);
assert(prefix_len >= lit.length());
// prefix len is relative to end of literal.
u32 minBound = prefix_len - lit.length();
if (minBound) {
if (anchored && prefix_len > tbi.cc.grey.maxAnchoredRegion) {
DEBUG_PRINTF("too deep\n");
/* see if there is an anchored literal we can also hang off */
ue2_literal lit2;
u32 lit2_offset;
vector<CharReach> mask2 = mask;
assert(mask2.size() > tbi.cc.grey.maxAnchoredRegion);
mask2.resize(MIN(tbi.cc.grey.maxAnchoredRegion, minBound));
findMaskLiteral(mask2, tbi.cc.streaming, &lit2, &lit2_offset,
tbi.cc.grey);
if (lit2.length() >= MIN_MASK_LIT_LEN) {
u32 prefix2_len = lit2_offset + lit2.length();
assert(prefix2_len < minBound);
RoseInVertex u
= add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
if (lit2_offset){
DEBUG_PRINTF("building lhs (off %u)\n", lit2_offset);
shared_ptr<NGHolder> lhs2
= buildMaskLhs(true, lit2_offset, mask);
add_edge(s, u, RoseInEdgeProps(lhs2, lit2.length()), ig);
} else {
add_edge(s, u, RoseInEdgeProps(0, 0), ig);
}
/* midfix */
DEBUG_PRINTF("building mhs\n");
vector<CharReach> mask3(mask.begin() + prefix2_len, mask.end());
u32 overlap = maxOverlap(lit2, lit, 0);
u32 delay = lit.length() - overlap;
shared_ptr<NGHolder> mhs
= buildMaskLhs(true, minBound - prefix2_len + overlap,
mask3);
mhs->kind = NFA_INFIX;
add_edge(u, v, RoseInEdgeProps(mhs, delay), ig);
DEBUG_PRINTF("add anch literal too!\n");
goto do_rhs;
}
}
shared_ptr<NGHolder> lhs = buildMaskLhs(anchored, minBound, mask);
add_edge(s, v, RoseInEdgeProps(lhs, lit.length()), ig);
} else {
u32 maxBound = anchored ? minBound : ROSE_BOUND_INF;
add_edge(s, v, RoseInEdgeProps(minBound, maxBound), ig);
}
do_rhs:
if (suffix_len) {
shared_ptr<NGHolder> rhs = buildMaskRhs(reports, mask, suffix_len);
RoseInVertex a =
add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
add_edge(v, a, RoseInEdgeProps(rhs, 0), ig);
} else {
/* Note: masks have no eod connections */
RoseInVertex a
= add_vertex(RoseInVertexProps::makeAccept(reports), ig);
add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
}
calcVertexOffsets(ig);
bool rv = tbi.addRose(ig, false);
assert(rv); /* checkAllowMask should have prevented this */
if (!rv) {
throw std::exception();
}
}
static
bool checkAllowMask(const vector<CharReach> &mask, ue2_literal *lit,
u32 *prefix_len, u32 *suffix_len,
const CompileContext &cc) {
assert(!mask.empty());
u32 lit_offset;
findMaskLiteral(mask, cc.streaming, lit, &lit_offset, cc.grey);
if (lit->length() < MIN_MASK_LIT_LEN && lit->length() != mask.size()) {
DEBUG_PRINTF("need more literal - bad mask\n");
return false;
}
DEBUG_PRINTF("mask lit '%s', len=%zu at offset=%u\n",
dumpString(*lit).c_str(), lit->length(), lit_offset);
assert(!cc.streaming || lit->length() <= cc.grey.maxHistoryAvailable + 1);
/* literal is included in the prefix nfa so that matches from the prefix
* can't occur in the history buffer - probably should tweak the NFA API
* to allow such matches not to be suppressed */
*prefix_len = lit_offset + lit->length();
*suffix_len = mask.size() - *prefix_len;
DEBUG_PRINTF("prefix_len=%u, suffix_len=%u\n", *prefix_len, *suffix_len);
/* check if we can backtrack sufficiently */
if (cc.streaming && *prefix_len > cc.grey.maxHistoryAvailable + 1) {
DEBUG_PRINTF("too much lag\n");
return false;
}
if (*suffix_len > MAX_MASK_SIZE || *prefix_len > MAX_MASK_SIZE) {
DEBUG_PRINTF("too big\n");
return false;
}
return true;
}
bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports) {
if (validateTransientMask(mask, false, cc.grey)) {
bool eod = false;
addTransientMask(*this, mask, reports, anchored, eod);
return true;
}
ue2_literal lit;
u32 prefix_len = 0;
u32 suffix_len = 0;
if (!checkAllowMask(mask, &lit, &prefix_len, &suffix_len, cc)) {
return false;
}
/* we know that the mask can be handled now, start playing with the rose
* graph */
doAddMask(*this, anchored, mask, lit, prefix_len, suffix_len, reports);
return true;
}
bool RoseBuildImpl::validateMask(const vector<CharReach> &mask,
UNUSED const ue2::flat_set<ReportID> &reports,
UNUSED bool anchored, bool eod) const {
return validateTransientMask(mask, eod, cc.grey);
}
static
unique_ptr<NGHolder> makeAnchoredGraph(const vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports,
bool eod) {
auto gp = ue2::make_unique<NGHolder>();
NGHolder &g = *gp;
NFAVertex u = g.start;
for (const auto &cr : mask) {
NFAVertex v = add_vertex(g);
g[v].char_reach = cr;
add_edge(u, v, g);
u = v;
}
g[u].reports = reports;
add_edge(u, eod ? g.acceptEod : g.accept, g);
return gp;
}
static
bool addAnchoredMask(RoseBuildImpl &build, const vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports, bool eod) {
if (!build.cc.grey.allowAnchoredAcyclic) {
return false;
}
auto g = makeAnchoredGraph(mask, reports, eod);
assert(g);
return build.addAnchoredAcyclic(*g);
}
void RoseBuildImpl::addMask(const vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports,
bool anchored, bool eod) {
if (anchored && addAnchoredMask(*this, mask, reports, eod)) {
DEBUG_PRINTF("added mask as anchored acyclic graph\n");
return;
}
addTransientMask(*this, mask, reports, anchored, eod);
}
} // namespace ue2

View File

@@ -0,0 +1,882 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rose_build_anchored.h"
#include "grey.h"
#include "rose_build_impl.h"
#include "rose_internal.h"
#include "ue2common.h"
#include "nfa/dfa_min.h"
#include "nfa/mcclellancompile.h"
#include "nfa/mcclellancompile_util.h"
#include "nfa/nfa_build_util.h"
#include "nfa/rdfa_merge.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_repeat.h"
#include "nfagraph/ng_util.h"
#include "nfagraph/ng_mcclellan_internal.h"
#include "util/alloc.h"
#include "util/bitfield.h"
#include "util/charreach.h"
#include "util/compile_context.h"
#include "util/compile_error.h"
#include "util/container.h"
#include "util/determinise.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/order_check.h"
#include "util/ue2_containers.h"
#include "util/ue2string.h"
#include "util/verify_types.h"
#include <map>
#include <queue>
#include <set>
#include <vector>
using namespace std;
namespace ue2 {
#define ANCHORED_NFA_STATE_LIMIT 512
#define MAX_DFA_STATES 16000
#define DFA_PAIR_MERGE_THRESHOLD 5000
#define MAX_SMALL_START_REACH 4
#define INIT_STATE (DEAD_STATE + 1)
// Adds a vertex with the given reach.
static
NFAVertex add_vertex(NGHolder &h, const CharReach &cr) {
NFAVertex v = add_vertex(h);
h[v].char_reach = cr;
return v;
}
static
void add_edges(const set<NFAVertex> &parents, NFAVertex v, NGHolder &h) {
for (auto p : parents) {
add_edge(p, v, h);
}
}
static
set<NFAVertex> addDotsToGraph(NGHolder &h, NFAVertex start, u32 min, u32 max,
const CharReach &cr) {
DEBUG_PRINTF("adding [%u, %u] to graph\n", min, max);
u32 i = 0;
set<NFAVertex> curr;
curr.insert(start);
for (; i < min; i++) {
NFAVertex next = add_vertex(h, cr);
add_edges(curr, next, h);
curr.clear();
curr.insert(next);
}
assert(max != ROSE_BOUND_INF);
set<NFAVertex> orig = curr;
for (; i < max; i++) {
NFAVertex next = add_vertex(h, cr);
add_edges(curr, next, h);
curr.clear();
curr.insert(next);
curr.insert(orig.begin(), orig.end());
}
return curr;
}
static
NFAVertex addToGraph(NGHolder &h, const set<NFAVertex> &curr,
const ue2_literal &s) {
DEBUG_PRINTF("adding %s to graph\n", dumpString(s).c_str());
assert(!s.empty());
ue2_literal::const_iterator it = s.begin();
NFAVertex u = add_vertex(h, *it);
add_edges(curr, u, h);
for (++it; it != s.end(); ++it) {
NFAVertex next = add_vertex(h, *it);
add_edge(u, next, h);
u = next;
}
return u;
}
static
void mergeAnchoredDfas(vector<unique_ptr<raw_dfa>> &dfas,
const RoseBuildImpl &build) {
// First, group our DFAs into "small start" and "big start" sets.
vector<unique_ptr<raw_dfa>> small_starts, big_starts;
for (auto &rdfa : dfas) {
u32 start_size = mcclellanStartReachSize(rdfa.get());
if (start_size <= MAX_SMALL_START_REACH) {
small_starts.push_back(move(rdfa));
} else {
big_starts.push_back(move(rdfa));
}
}
dfas.clear();
DEBUG_PRINTF("%zu dfas with small starts, %zu dfas with big starts\n",
small_starts.size(), big_starts.size());
mergeDfas(small_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
mergeDfas(big_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
// Rehome our groups into one vector.
for (auto &rdfa : small_starts) {
dfas.push_back(move(rdfa));
}
for (auto &rdfa : big_starts) {
dfas.push_back(move(rdfa));
}
// Final test: if we've built two DFAs here that are small enough, we can
// try to merge them.
if (dfas.size() == 2) {
size_t total_states = dfas[0]->states.size() + dfas[1]->states.size();
if (total_states < DFA_PAIR_MERGE_THRESHOLD) {
DEBUG_PRINTF("doing small pair merge\n");
mergeDfas(dfas, MAX_DFA_STATES, nullptr, build.cc.grey);
}
}
}
static
void translateReportSet(flat_set<ReportID> *rset, const RoseBuildImpl &tbi) {
flat_set<ReportID> old;
old.swap(*rset);
for (auto report_id : old) {
DEBUG_PRINTF("updating %u -> %u\n", report_id,
tbi.literal_info[report_id].final_id);
rset->insert(tbi.literal_info[report_id].final_id);
}
}
static
void remapAnchoredReports(raw_dfa &dfa, const RoseBuildImpl &tbi) {
for (dstate &ds : dfa.states) {
translateReportSet(&ds.reports, tbi);
translateReportSet(&ds.reports_eod, tbi);
}
}
/* Replaces the report ids currently in the dfas (rose graph literal ids) with
* the final id used by the runtime. */
static
void remapAnchoredReports(RoseBuildImpl &tbi) {
for (auto it = tbi.anchored_nfas.begin(); it != tbi.anchored_nfas.end();
++it) {
for (auto &rdfa : it->second) {
assert(rdfa);
remapAnchoredReports(*rdfa, tbi);
}
}
}
static
void populate_holder(const simple_anchored_info &sai, const set<u32> &exit_ids,
NGHolder *h_in) {
DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
sai.max_bound, dumpString(sai.literal).c_str());
NGHolder &h = *h_in;
set<NFAVertex> ends = addDotsToGraph(h, h.start, sai.min_bound,
sai.max_bound, CharReach::dot());
NFAVertex v = addToGraph(h, ends, sai.literal);
add_edge(v, h.accept, h);
h[v].reports.insert(exit_ids.begin(), exit_ids.end());
}
u32 anchoredStateSize(const void *atable) {
if (!atable) {
return 0;
}
const struct anchored_matcher_info *curr
= (const anchored_matcher_info *)atable;
// Walk the list until we find the last element; total state size will be
// that engine's state offset plus its state requirement.
while (curr->next_offset) {
curr = (const anchored_matcher_info *)
((const char *)curr + curr->next_offset);
}
const NFA *nfa = (const NFA *)((const char *)curr + sizeof(*curr));
return curr->state_offset + nfa->scratchStateSize;
}
bool anchoredIsMulti(const RoseEngine &engine) {
const struct anchored_matcher_info *curr
= (const anchored_matcher_info *)getALiteralMatcher(&engine);
return curr && curr->next_offset;
}
namespace {
typedef bitfield<ANCHORED_NFA_STATE_LIMIT> nfa_state_set;
struct Holder_StateSet {
Holder_StateSet() : wdelay(0) {}
nfa_state_set wrap_state;
u32 wdelay;
bool operator==(const Holder_StateSet &b) const {
return wdelay == b.wdelay && wrap_state == b.wrap_state;
}
};
size_t hash_value(const Holder_StateSet &s) {
size_t val = 0;
boost::hash_combine(val, s.wrap_state);
boost::hash_combine(val, s.wdelay);
return val;
}
class Automaton_Holder {
public:
typedef Holder_StateSet StateSet;
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
explicit Automaton_Holder(const NGHolder &g_in) : g(g_in), bad(false) {
for (auto v : vertices_range(g)) {
vertexToIndex[v] = indexToVertex.size();
indexToVertex.push_back(v);
}
if (indexToVertex.size() > ANCHORED_NFA_STATE_LIMIT) {
bad = true;
return;
}
DEBUG_PRINTF("%zu states\n", indexToVertex.size());
init.wdelay = 0;
init.wrap_state.set(vertexToIndex[g.start]);
DEBUG_PRINTF("init wdelay %u\n", init.wdelay);
calculateAlphabet();
cr_by_index = populateCR(g, indexToVertex, alpha);
}
private:
void calculateAlphabet() {
vector<CharReach> esets(1, CharReach::dot());
for (auto v : indexToVertex) {
const CharReach &cr = g[v].char_reach;
for (size_t i = 0; i < esets.size(); i++) {
if (esets[i].count() == 1) {
continue;
}
CharReach t = cr & esets[i];
if (t.any() && t != esets[i]) {
esets[i] &= ~t;
esets.push_back(t);
}
}
}
alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
}
public:
void transition(const StateSet &in, StateSet *next) {
/* track the dfa state, reset nfa states */
u32 wdelay = in.wdelay ? in.wdelay - 1 : 0;
for (symbol_t s = 0; s < alphasize; s++) {
next[s].wrap_state.reset();
next[s].wdelay = wdelay;
}
nfa_state_set succ;
if (wdelay != in.wdelay) {
DEBUG_PRINTF("enabling start\n");
succ.set(vertexToIndex[g.startDs]);
}
for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
i = in.wrap_state.find_next(i)) {
NFAVertex v = indexToVertex[i];
for (auto w : adjacent_vertices_range(v, g)) {
if (!contains(vertexToIndex, w)
|| w == g.accept || w == g.acceptEod) {
continue;
}
if (w == g.startDs) {
continue;
}
succ.set(vertexToIndex[w]);
}
}
for (size_t j = succ.find_first(); j != nfa_state_set::npos;
j = succ.find_next(j)) {
const CharReach &cr = cr_by_index[j];
for (size_t s = cr.find_first(); s != CharReach::npos;
s = cr.find_next(s)) {
next[s].wrap_state.set(j); /* pre alpha'ed */
}
}
next[alpha[TOP]] = in;
}
const vector<StateSet> initial() {
return {init};
}
void reports(const StateSet &in, flat_set<ReportID> &rv) {
rv.clear();
for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
i = in.wrap_state.find_next(i)) {
NFAVertex v = indexToVertex[i];
if (edge(v, g.accept, g).second) {
assert(!g[v].reports.empty());
insert(&rv, g[v].reports);
} else {
assert(g[v].reports.empty());
}
}
}
void reportsEod(const StateSet &, flat_set<ReportID> &r) {
r.clear();
}
static bool canPrune(const flat_set<ReportID> &) {
/* used by ng_ to prune states after highlander accepts */
return false;
}
private:
const NGHolder &g;
ue2::unordered_map<NFAVertex, u32> vertexToIndex;
vector<NFAVertex> indexToVertex;
vector<CharReach> cr_by_index;
StateSet init;
public:
StateSet dead;
array<u16, ALPHABET_SIZE> alpha;
array<u16, ALPHABET_SIZE> unalpha;
u16 alphasize;
bool bad;
};
} // namespace
static
bool check_dupe(const raw_dfa &rdfa,
const vector<unique_ptr<raw_dfa>> &existing, ReportID *remap) {
if (!remap) {
DEBUG_PRINTF("no remap\n");
return false;
}
set<ReportID> rdfa_reports;
for (const auto &ds : rdfa.states) {
rdfa_reports.insert(ds.reports.begin(), ds.reports.end());
}
if (rdfa_reports.size() != 1) {
return false; /* too complicated for now would need mapping TODO */
}
for (const auto &e_rdfa : existing) {
assert(e_rdfa);
const raw_dfa &b = *e_rdfa;
if (rdfa.start_anchored != b.start_anchored ||
rdfa.alpha_size != b.alpha_size ||
rdfa.states.size() != b.states.size() ||
rdfa.alpha_remap != b.alpha_remap) {
continue;
}
set<ReportID> b_reports;
for (u32 i = 0; i < b.states.size(); i++) {
assert(b.states[i].reports_eod.empty());
assert(rdfa.states[i].reports_eod.empty());
if (rdfa.states[i].reports.size() != b.states[i].reports.size()) {
goto next_dfa;
}
b_reports.insert(b.states[i].reports.begin(),
b.states[i].reports.end());
assert(rdfa.states[i].next.size() == b.states[i].next.size());
if (!equal(rdfa.states[i].next.begin(), rdfa.states[i].next.end(),
b.states[i].next.begin())) {
goto next_dfa;
}
}
if (b_reports.size() != 1) {
continue;
}
*remap = *b_reports.begin();
DEBUG_PRINTF("dupe found remapping to %u\n", *remap);
return true;
next_dfa:;
}
return false;
}
static
bool check_dupe_simple(const RoseBuildImpl &tbi, u32 min_bound, u32 max_bound,
const ue2_literal &lit, ReportID *remap) {
if (!remap) {
DEBUG_PRINTF("no remap\n");
return false;
}
simple_anchored_info sai(min_bound, max_bound, lit);
if (contains(tbi.anchored_simple, sai)) {
*remap = *tbi.anchored_simple.at(sai).begin();
return true;
}
return false;
}
static
NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) {
vector<NFAVertex> lit_verts;
NFAVertex v = h.accept;
while ((v = getSoleSourceVertex(h, v))) {
const CharReach &cr = h[v].char_reach;
if (cr.count() > 1 && !cr.isCaselessChar()) {
break;
}
lit_verts.push_back(v);
}
if (lit_verts.empty()) {
return NFAGraph::null_vertex();
}
bool nocase = false;
bool case_set = false;
for (auto it = lit_verts.rbegin(), ite = lit_verts.rend(); it != ite;
++it) {
const CharReach &cr = h[*it].char_reach;
if (cr.isAlpha()) {
bool cr_nocase = cr.count() != 1;
if (case_set && cr_nocase != nocase) {
return NFAGraph::null_vertex();
}
case_set = true;
nocase = cr_nocase;
lit->push_back(cr.find_first(), nocase);
} else {
lit->push_back(cr.find_first(), false);
}
}
return lit_verts.back();
}
static
bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound,
ue2_literal *lit, u32 *report) {
assert(!proper_out_degree(h.startDs, h));
assert(in_degree(h.acceptEod, h) == 1);
DEBUG_PRINTF("looking for simple case\n");
NFAVertex lit_head = extractLiteral(h, lit);
if (lit_head == NFAGraph::null_vertex()) {
DEBUG_PRINTF("no literal found\n");
return false;
}
const auto &reps = h[*inv_adjacent_vertices(h.accept, h).first].reports;
if (reps.size() != 1) {
return false;
}
*report = *reps.begin();
assert(!lit->empty());
set<NFAVertex> rep_exits;
/* lit should only be connected to dot vertices */
for (auto u : inv_adjacent_vertices_range(lit_head, h)) {
DEBUG_PRINTF("checking %u\n", h[u].index);
if (!h[u].char_reach.all()) {
return false;
}
if (u != h.start) {
rep_exits.insert(u);
}
}
if (rep_exits.empty()) {
DEBUG_PRINTF("direct anchored\n");
assert(edge(h.start, lit_head, h).second);
*min_bound = 0;
*max_bound = 0;
return true;
}
NFAVertex key = *rep_exits.begin();
// Special-case the check for '^.foo' or '^.?foo'.
if (rep_exits.size() == 1 && edge(h.start, key, h).second &&
out_degree(key, h) == 1) {
DEBUG_PRINTF("one exit\n");
assert(edge(h.start, h.startDs, h).second);
size_t num_enters = out_degree(h.start, h);
if (num_enters == 2) {
DEBUG_PRINTF("^.{1,1} prefix\n");
*min_bound = 1;
*max_bound = 1;
return true;
}
if (num_enters == 3 && edge(h.start, lit_head, h).second) {
DEBUG_PRINTF("^.{0,1} prefix\n");
*min_bound = 0;
*max_bound = 1;
return true;
}
}
vector<GraphRepeatInfo> repeats;
findRepeats(h, 2, &repeats);
vector<GraphRepeatInfo>::const_iterator it;
for (it = repeats.begin(); it != repeats.end(); ++it) {
DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
if (find(it->vertices.begin(), it->vertices.end(), key)
!= it->vertices.end()) {
break;
}
}
if (it == repeats.end()) {
DEBUG_PRINTF("no repeat found\n");
return false;
}
set<NFAVertex> rep_verts;
insert(&rep_verts, it->vertices);
if (!is_subset_of(rep_exits, rep_verts)) {
DEBUG_PRINTF("bad exit check\n");
return false;
}
set<NFAVertex> rep_enters;
insert(&rep_enters, adjacent_vertices(h.start, h));
rep_enters.erase(lit_head);
rep_enters.erase(h.startDs);
if (!is_subset_of(rep_enters, rep_verts)) {
DEBUG_PRINTF("bad entry check\n");
return false;
}
u32 min_b = it->repeatMin;
if (edge(h.start, lit_head, h).second) { /* jump edge */
if (min_b != 1) {
DEBUG_PRINTF("jump edge around repeat with min bound\n");
return false;
}
min_b = 0;
}
*min_bound = min_b;
*max_bound = it->repeatMax;
DEBUG_PRINTF("repeat %u %u before %s\n", *min_bound, *max_bound,
dumpString(*lit).c_str());
return true;
}
static
int finalise_out(RoseBuildImpl &tbi, const NGHolder &h,
const Automaton_Holder &autom, unique_ptr<raw_dfa> out_dfa,
ReportID *remap) {
u32 min_bound = ~0U;
u32 max_bound = ~0U;
ue2_literal lit;
u32 simple_report = MO_INVALID_IDX;
if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) {
assert(simple_report != MO_INVALID_IDX);
if (check_dupe_simple(tbi, min_bound, max_bound, lit, remap)) {
DEBUG_PRINTF("found duplicate remapping to %u\n", *remap);
return ANCHORED_REMAP;
}
DEBUG_PRINTF("add with report %u\n", simple_report);
tbi.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)]
.insert(simple_report);
return ANCHORED_SUCCESS;
}
out_dfa->start_anchored = INIT_STATE;
out_dfa->start_floating = DEAD_STATE;
out_dfa->alpha_size = autom.alphasize;
out_dfa->alpha_remap = autom.alpha;
auto hash = hash_dfa_no_reports(*out_dfa);
if (check_dupe(*out_dfa, tbi.anchored_nfas[hash], remap)) {
return ANCHORED_REMAP;
}
tbi.anchored_nfas[hash].push_back(move(out_dfa));
return ANCHORED_SUCCESS;
}
static
int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) {
Automaton_Holder autom(h);
if (autom.bad) {
DEBUG_PRINTF("autom bad!\n");
return ANCHORED_FAIL;
}
unique_ptr<raw_dfa> out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX);
if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
return finalise_out(tbi, h, autom, move(out_dfa), remap);
}
DEBUG_PRINTF("determinise failed\n");
return ANCHORED_FAIL;
}
static
void setReports(NGHolder &h, const map<NFAVertex, set<u32>> &reportMap,
const ue2::unordered_map<NFAVertex, NFAVertex> &orig_to_copy) {
for (const auto &m : reportMap) {
NFAVertex t = orig_to_copy.at(m.first);
assert(!m.second.empty());
add_edge(t, h.accept, h);
insert(&h[t].reports, m.second);
}
}
int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper,
const map<NFAVertex, set<u32>> &reportMap) {
NGHolder h;
ue2::unordered_map<NFAVertex, NFAVertex> orig_to_copy;
cloneHolder(h, wrapper, &orig_to_copy);
clear_in_edges(h.accept, h);
clear_in_edges(h.acceptEod, h);
add_edge(h.accept, h.acceptEod, h);
clearReports(h);
setReports(h, reportMap, orig_to_copy);
return addAutomaton(tbi, h, nullptr);
}
int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored,
u32 exit_id, ReportID *remap) {
NGHolder h;
cloneHolder(h, anchored);
clearReports(h);
assert(in_degree(h.acceptEod, h) == 1);
for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
h[v].reports.clear();
h[v].reports.insert(exit_id);
}
return addAutomaton(tbi, h, remap);
}
static
void buildSimpleDfas(const RoseBuildImpl &tbi,
vector<unique_ptr<raw_dfa>> *anchored_dfas) {
/* we should have determinised all of these before so there should be no
* chance of failure. */
for (const auto &simple : tbi.anchored_simple) {
set<u32> exit_ids;
for (auto lit_id : simple.second) {
exit_ids.insert(tbi.literal_info[lit_id].final_id);
}
NGHolder h;
populate_holder(simple.first, exit_ids, &h);
Automaton_Holder autom(h);
assert(!autom.bad);
unique_ptr<raw_dfa> rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX);
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
assert(!rv);
rdfa->start_anchored = INIT_STATE;
rdfa->start_floating = DEAD_STATE;
rdfa->alpha_size = autom.alphasize;
rdfa->alpha_remap = autom.alpha;
anchored_dfas->push_back(move(rdfa));
}
}
/**
* Fill the given vector with all of the raw_dfas we need to compile into the
* anchored matcher. Takes ownership of the input structures, clearing them
* from RoseBuildImpl.
*/
static
void getAnchoredDfas(RoseBuildImpl &tbi,
vector<unique_ptr<raw_dfa>> *anchored_dfas) {
// DFAs that already exist as raw_dfas.
for (auto &anch_dfas : tbi.anchored_nfas) {
for (auto &rdfa : anch_dfas.second) {
anchored_dfas->push_back(move(rdfa));
}
}
tbi.anchored_nfas.clear();
// DFAs we currently have as simple literals.
if (!tbi.anchored_simple.empty()) {
buildSimpleDfas(tbi, anchored_dfas);
tbi.anchored_simple.clear();
}
}
/**
* \brief Builds our anchored DFAs into runtime NFAs.
*
* Constructs a vector of NFA structures and a vector of their start offsets
* (number of dots removed from the prefix) from the raw_dfa structures given.
*
* Note: frees the raw_dfa structures on completion.
*
* \return Total bytes required for the complete anchored matcher.
*/
static
size_t buildNfas(vector<unique_ptr<raw_dfa>> &anchored_dfas,
vector<aligned_unique_ptr<NFA>> *nfas, vector<u32> *start_offset,
const CompileContext &cc) {
const size_t num_dfas = anchored_dfas.size();
nfas->reserve(num_dfas);
start_offset->reserve(num_dfas);
size_t total_size = 0;
for (auto &rdfa : anchored_dfas) {
u32 removed_dots = remove_leading_dots(*rdfa);
start_offset->push_back(removed_dots);
minimize_hopcroft(*rdfa, cc.grey);
aligned_unique_ptr<NFA> nfa = mcclellanCompile(*rdfa, cc);
if (!nfa) {
assert(0);
throw std::bad_alloc();
}
assert(nfa->length);
total_size += ROUNDUP_CL(sizeof(anchored_matcher_info) + nfa->length);
nfas->push_back(move(nfa));
}
// We no longer need to keep the raw_dfa structures around.
anchored_dfas.clear();
return total_size;
}
aligned_unique_ptr<void> buildAnchoredAutomataMatcher(RoseBuildImpl &tbi,
size_t *asize) {
const CompileContext &cc = tbi.cc;
remapAnchoredReports(tbi);
if (tbi.anchored_nfas.empty() && tbi.anchored_simple.empty()) {
DEBUG_PRINTF("empty\n");
*asize = 0;
return nullptr;
}
vector<unique_ptr<raw_dfa>> anchored_dfas;
getAnchoredDfas(tbi, &anchored_dfas);
mergeAnchoredDfas(anchored_dfas, tbi);
vector<aligned_unique_ptr<NFA>> nfas;
vector<u32> start_offset; // start offset for each dfa (dots removed)
size_t total_size = buildNfas(anchored_dfas, &nfas, &start_offset, cc);
if (total_size > cc.grey.limitRoseAnchoredSize) {
throw ResourceLimitError();
}
*asize = total_size;
aligned_unique_ptr<void> atable = aligned_zmalloc_unique<void>(total_size);
char *curr = (char *)atable.get();
u32 state_offset = 0;
for (size_t i = 0; i < nfas.size(); i++) {
const NFA *nfa = nfas[i].get();
anchored_matcher_info *ami = (anchored_matcher_info *)curr;
char *prev_curr = curr;
curr += sizeof(anchored_matcher_info);
memcpy(curr, nfa, nfa->length);
curr += nfa->length;
curr = ROUNDUP_PTR(curr, 64);
if (i + 1 == nfas.size()) {
ami->next_offset = 0U;
} else {
ami->next_offset = verify_u32(curr - prev_curr);
}
// State must be aligned.
u32 align_req = state_alignment(*nfa);
assert(align_req <= 2); // only DFAs.
while (state_offset % align_req) {
state_offset++;
}
ami->state_offset = state_offset;
state_offset += nfa->scratchStateSize;
ami->anchoredMinDistance = start_offset[i];
}
DEBUG_PRINTF("success %zu\n", *asize);
return atable;
}
} // namespace ue2

View File

@@ -0,0 +1,66 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_ANCHORED
#define ROSE_BUILD_ANCHORED
#include "ue2common.h"
#include "rose_build.h"
#include "nfagraph/ng_holder.h"
#include "util/alloc.h"
#include <map>
#include <vector>
#include <set>
struct RoseEngine;
namespace ue2 {
class NGHolder;
class RoseBuildImpl;
struct Grey;
aligned_unique_ptr<void> buildAnchoredAutomataMatcher(RoseBuildImpl &tbi,
size_t *asize);
u32 anchoredStateSize(const void *atable);
bool anchoredIsMulti(const RoseEngine &engine);
#define ANCHORED_FAIL 0
#define ANCHORED_SUCCESS 1
#define ANCHORED_REMAP 2
int addAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &wrapper,
const std::map<NFAVertex, std::set<u32>> &reportMap);
int addToAnchoredMatcher(RoseBuildImpl &tbi, const NGHolder &anchored,
u32 exit_id, ReportID *remap);
} // namespace ue2
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,43 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_CONVERT_H
#define ROSE_BUILD_CONVERT_H
namespace ue2 {
class RoseBuildImpl;
void convertFloodProneSuffixes(RoseBuildImpl &tbi);
void convertBadLeaves(RoseBuildImpl &tbi);
void convertPrefixToBounds(RoseBuildImpl &tbi);
void convertAnchPrefixToBounds(RoseBuildImpl &tbi);
} // namespace ue2
#endif

View File

@@ -0,0 +1,633 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "rose_build_dump.h"
#include "hwlm/hwlm_build.h"
#include "rose_build_impl.h"
#include "rose/rose_dump.h"
#include "rose_internal.h"
#include "ue2common.h"
#include "nfa/nfa_internal.h"
#include "nfagraph/ng_dump.h"
#include "som/slot_manager_dump.h"
#include "util/compile_context.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/graph_range.h"
#include "util/ue2string.h"
#include <iomanip>
#include <ostream>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
static
string to_string(nfa_kind k) {
switch (k) {
case NFA_PREFIX:
return "p";
case NFA_INFIX:
return "i";
case NFA_SUFFIX:
return "s";
case NFA_OUTFIX:
return "o";
case NFA_REV_PREFIX:
return "r";
}
assert(0);
return "?";
}
// Get the RoseRole associated with a given vertex in the build graph from the
// RoseEngine.
static
const RoseRole *getRoseRole(const RoseBuildImpl &build,
const RoseEngine *engine, RoseVertex v) {
if (!engine) {
return nullptr;
}
u32 role_idx = build.g[v].role;
if (role_idx == MO_INVALID_IDX) {
return nullptr;
}
const RoseRole *roles = getRoleTable(engine);
return &roles[role_idx];
}
namespace {
class RoseGraphWriter {
public:
RoseGraphWriter(const RoseBuildImpl &b_in, const RoseEngine *t_in) :
build(b_in), t(t_in) {
for (const auto &m : build.ghost) {
ghost.insert(m.second);
}
}
void operator() (ostream &os, const RoseVertex &v) const {
const RoseGraph &g = build.g;
if (v == build.root) {
os << "[label=\"<root>\"]";
return;
}
if (v == build.anchored_root) {
os << "[label=\"<^>\"]";
return;
}
os << "[label=\"";
os << "role=" << g[v].role << "[i" << g[v].idx <<"]\\n";
for (u32 lit_id : g[v].literals) {
writeLiteral(os, lit_id);
os << "\\n";
}
os << "min_offset=" << g[v].min_offset;
if (g[v].max_offset >= ROSE_BOUND_INF) {
os << ", max_offset=inf";
} else {
os << ", max_offset=" << g[v].max_offset;
}
os << "\\n";
if (!g[v].reports.empty()) {
if (g[v].eod_accept) {
os << "\\nACCEPT_EOD";
} else {
os << "\\nACCEPT";
}
os << " (rep=" << as_string_list(g[v].reports) << ")";
}
const RoseRole *r = getRoseRole(v);
if (g[v].suffix) {
os << "\\nSUFFIX (TOP " << g[v].suffix.top;
if (r) {
assert(t);
const NFA *n = (const NFA *)((const char *)t + r->suffixOffset);
os << ", Q" << n->queueIndex;
} else {
// Can't dump the queue number, but we can identify the suffix.
if (g[v].suffix.graph) {
os << ", graph=" << g[v].suffix.graph.get()
<< " " << to_string(g[v].suffix.graph->kind);
}
if (g[v].suffix.castle) {
os << ", castle=" << g[v].suffix.castle.get();
}
if (g[v].suffix.rdfa) {
os << ", dfa=" << g[v].suffix.rdfa.get();
}
if (g[v].suffix.haig) {
os << ", haig=" << g[v].suffix.haig.get();
}
}
os << ")";
}
if (!g[v].literals.empty()) {
u32 id = *g[v].literals.begin();
if (id < build.literal_info.size()
&& build.literal_info[id].final_id != MO_INVALID_IDX
&& (build.literal_info[id].final_id & LITERAL_DR_FLAG)) {
os << "\\nDIRECT REPORT";
}
}
if (g[v].escapes.any()) {
os << "\\nescapes=";
describeClass(os, g[v].escapes, 5, CC_OUT_DOT);
}
if (ghost.find(v) != ghost.end()) {
os << "\\nGHOST";
}
if (g[v].left) {
const char *roseKind =
build.isRootSuccessor(v) ? "PREFIX" : "INFIX";
os << "\\nROSE " << roseKind;
os << " (";
if (r) {
os << "Q" << r->leftfixQueue << ", ";
}
os << "report " << g[v].left.leftfix_report << ")";
if (g[v].left.graph) {
os << " " << to_string(g[v].left.graph->kind);
}
}
os << "\"";
// Roles with a rose prefix get a colour.
if (g[v].left) {
os << " color=violetred ";
}
// Our accepts get different colours.
if (!g[v].reports.empty()) {
os << " color=blue ";
}
if (g[v].suffix) {
os << " color=forestgreen ";
}
os << "]";
}
void operator() (ostream &os, const RoseEdge &e) const {
const RoseGraph &g = build.g;
// Render the bounds on this edge.
u32 minBound = g[e].minBound;
u32 maxBound = g[e].maxBound;
os << "[label=\"";
if (minBound == 0 && maxBound == ROSE_BOUND_INF) {
os << ".*";
} else if (minBound == 1 && maxBound == ROSE_BOUND_INF) {
os << ".+";
} else {
os << ".{" << minBound << ",";
if (maxBound != ROSE_BOUND_INF) {
os << maxBound;
}
os << "}";
}
// If we lead to an infix, display which top we're using.
RoseVertex v = target(e, g);
if (g[v].left) {
os << "\\nROSE TOP " << g[e].rose_top;
}
switch (g[e].history) {
case ROSE_ROLE_HISTORY_NONE:
break;
case ROSE_ROLE_HISTORY_ANCH:
os << "\\nANCH history";
break;
case ROSE_ROLE_HISTORY_LAST_BYTE:
os << "\\nLAST_BYTE history";
break;
case ROSE_ROLE_HISTORY_INVALID:
os << "\\nINVALID history";
break;
}
os << "\"]";
}
private:
// Render the literal associated with a vertex.
void writeLiteral(ostream &os, u32 id) const {
os << "lit=" << id;
if (id < build.literal_info.size()) {
os << "/" << build.literal_info[id].final_id << " ";
} else {
os << "/nofinal ";
}
if (contains(build.literals.right, id)) {
const auto &lit = build.literals.right.at(id);
os << '\'' << dotEscapeString(lit.s.get_string()) << '\'';
if (lit.s.any_nocase()) {
os << " (nocase)";
}
if (lit.delay) {
os << " +" << lit.delay;
}
} else {
os << "<unknown>";
}
}
const RoseRole *getRoseRole(RoseVertex v) const {
return ue2::getRoseRole(build, t, v);
}
set<RoseVertex> ghost;
const RoseBuildImpl &build;
const RoseEngine *t;
};
} // namespace
void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t,
const char *filename) {
const RoseBuildImpl &build = dynamic_cast<const RoseBuildImpl &>(build_base);
const Grey &grey = build.cc.grey;
if (!grey.dumpFlags) {
return;
}
stringstream ss;
ss << grey.dumpPath << filename;
DEBUG_PRINTF("dumping graph to %s\n", ss.str().c_str());
ofstream os(ss.str());
RoseGraphWriter writer(build, t);
writeGraphviz(os, build.g, writer, get(&RoseVertexProps::idx, build.g));
}
namespace {
struct CompareVertexRole {
explicit CompareVertexRole(const RoseGraph &g_in) : g(g_in) {}
inline bool operator()(const RoseVertex &a, const RoseVertex &b) const {
return g[a].role < g[b].role;
}
private:
const RoseGraph &g;
};
}
static
void lit_graph_info(const RoseBuildImpl &build, const rose_literal_info &li,
u32 *min_offset, bool *in_root_role) {
*min_offset = ~0U;
*in_root_role = false;
for (auto v : li.vertices) {
*in_root_role |= build.isRootSuccessor(v);
LIMIT_TO_AT_MOST(min_offset, build.g[v].min_offset);
}
}
static
void dumpRoseLiterals(const RoseBuildImpl &build, const char *filename) {
const RoseGraph &g = build.g;
DEBUG_PRINTF("dumping literals\n");
ofstream os(filename);
os << "ROSE LITERALS: a total of " << build.literals.right.size()
<< " literals and " << num_vertices(g) << " roles." << endl << endl;
const auto depths = findDepths(build);
for (const auto &e : build.literals.right) {
u32 id = e.first;
const ue2_literal &s = e.second.s;
const rose_literal_info &lit_info = build.literal_info[id];
switch (e.second.table) {
case ROSE_ANCHORED:
os << "ANCHORED";
break;
case ROSE_FLOATING:
os << "FLOATING";
break;
case ROSE_EOD_ANCHORED:
os << "EOD-ANCHORED";
break;
case ROSE_ANCHORED_SMALL_BLOCK:
os << "SMALL-BLOCK";
break;
case ROSE_EVENT:
os << "EVENT";
break;
}
os << " ID " << id << "/" << lit_info.final_id << ": \""
<< escapeString(s.get_string()) << "\""
<< " (len " << s.length() << ",";
if (s.any_nocase()) {
os << " nocase,";
}
if (lit_info.requires_benefits) {
os << " benefits,";
}
if (e.second.delay) {
os << " delayed "<< e.second.delay << ",";
}
os << " groups 0x" << hex << setw(16) << setfill('0')
<< lit_info.group_mask << dec << ",";
if (lit_info.squash_group) {
os << " squashes group,";
}
u32 min_offset;
bool in_root_role;
lit_graph_info(build, lit_info, &min_offset, &in_root_role);
os << " min offset " << min_offset;
if (in_root_role) {
os << " root literal";
}
os << ") roles=" << lit_info.vertices.size() << endl;
if (!lit_info.delayed_ids.empty()) {
os << " Children:";
for (u32 d_id : lit_info.delayed_ids) {
os << " " << d_id;
}
os << endl;
}
// Temporary vector, so that we can sort the output by role.
vector<RoseVertex> verts(lit_info.vertices.begin(),
lit_info.vertices.end());
sort(verts.begin(), verts.end(), CompareVertexRole(g));
for (RoseVertex v : verts) {
// role info
os << " Role " << g[v].role << ": depth=" << depths.at(v)
<< ", groups=0x" << hex << setw(16) << setfill('0')
<< g[v].groups << dec;
if (g[v].reports.empty()) {
os << ", report=NONE";
} else {
os << ", report={" << as_string_list(g[v].reports) << "}";
}
os << ", min_offset=" << g[v].min_offset;
os << ", max_offset=" << g[v].max_offset << endl;
// pred info
for (const auto &ie : in_edges_range(v, g)) {
os << " Predecessor role=";
u32 predRole = g[source(ie, g)].role;
if (predRole == MO_INVALID_IDX) {
os << "ROOT";
} else if (predRole == g[build.anchored_root].role) {
os << "ANCHORED_ROOT";
} else {
os << predRole;
}
os << ": bounds [" << g[ie].minBound << ", ";
if (g[ie].maxBound == ROSE_BOUND_INF) {
os << "inf";
} else {
os << g[ie].maxBound;
}
os << "]" << endl;
}
}
}
os.close();
}
template<class Iter>
static
string toHex(Iter i, const Iter &end) {
ostringstream oss;
for (; i != end; ++i) {
u8 c = *i;
oss << hex << setw(2) << setfill('0') << ((unsigned)c & 0xff);
}
return oss.str();
}
static
void dumpTestLiterals(const string &filename, const vector<hwlmLiteral> &lits) {
ofstream of(filename.c_str());
for (const hwlmLiteral &lit : lits) {
of << lit.id << "=";
if (lit.nocase) {
of << "!";
}
of << toHex(lit.s.begin(), lit.s.end());
if (!lit.msk.empty()) {
of << " " << toHex(lit.msk.begin(), lit.msk.end());
of << " " << toHex(lit.cmp.begin(), lit.cmp.end());
}
of << endl;
}
of.close();
}
namespace {
struct LongerThanLimit {
explicit LongerThanLimit(size_t len) : max_len(len) {}
bool operator()(const hwlmLiteral &lit) const {
return lit.s.length() > max_len;
}
private:
size_t max_len;
};
}
static
void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
vector<hwlmLiteral> lits;
fillHamsterLiteralList(build, ROSE_ANCHORED, &lits);
dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits);
lits.clear();
fillHamsterLiteralList(build, ROSE_FLOATING, &lits);
dumpTestLiterals(base + "rose_float_test_literals.txt", lits);
lits.clear();
fillHamsterLiteralList(build, ROSE_EOD_ANCHORED, &lits);
dumpTestLiterals(base + "rose_eod_test_literals.txt", lits);
lits.clear();
fillHamsterLiteralList(build, ROSE_FLOATING, &lits);
fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK, &lits);
lits.erase(remove_if(lits.begin(), lits.end(),
LongerThanLimit(ROSE_SMALL_BLOCK_LEN)),
lits.end());
dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits);
}
static
CharReach bitvectorToReach(const u8 *reach) {
CharReach cr;
for (size_t i = 0; i < 256; i++) {
if (reach[i / 8] & (1U << (i % 8))) {
cr.set(i);
}
}
return cr;
}
static
void dumpRoseLookaround(const RoseBuildImpl &build, const RoseEngine *t,
const Grey &grey, const string &filename) {
stringstream ss;
ss << grey.dumpPath << filename;
ofstream os(ss.str());
const RoseGraph &g = build.g;
const u8 *base = (const u8 *)t;
const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
const u8 *reach_base = base + t->lookaroundReachOffset;
for (RoseVertex v : vertices_range(g)) {
const RoseRole *role = getRoseRole(build, t, v);
if (!role || role->lookaroundIndex == MO_INVALID_IDX) {
continue;
}
os << "Role " << g[v].role << endl;
os << " literals: " << as_string_list(g[v].literals) << endl;
os << " lookaround: index=" << role->lookaroundIndex
<< ", count=" << role->lookaroundCount << endl;
const s8 *look = look_base + role->lookaroundIndex;
const s8 *look_end = look + role->lookaroundCount;
const u8 *reach =
reach_base + role->lookaroundIndex * REACH_BITVECTOR_LEN;
for (; look < look_end; look++, reach += REACH_BITVECTOR_LEN) {
os << " " << std::setw(4) << std::setfill(' ') << int{*look}
<< ": ";
describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
os << endl;
}
os << endl;
}
os.close();
}
void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
const Grey &grey) {
if (!grey.dumpFlags) {
return;
}
const RoseBuildImpl &build = dynamic_cast<const RoseBuildImpl&>(build_base);
stringstream ss;
ss << grey.dumpPath << "rose.txt";
FILE *f = fopen(ss.str().c_str(), "w");
if (!t) {
fprintf(f, "<< no rose >>\n");
fclose(f);
return;
}
// Dump Rose table info
roseDumpText(t, f);
fclose(f);
roseDumpComponents(t, false, grey.dumpPath);
// Graph.
dumpRoseGraph(build, t, "rose.dot");
// Literals.
ss.str("");
ss.clear();
ss << grey.dumpPath << "rose_literals.txt";
dumpRoseLiterals(build, ss.str().c_str());
dumpRoseTestLiterals(build, grey.dumpPath);
f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w");
roseDumpStructRaw(t, f);
fclose(f);
// Lookaround tables.
dumpRoseLookaround(build, t, grey, "rose_lookaround.txt");
}
} // namespace ue2

View File

@@ -0,0 +1,60 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_DUMP_H
#define ROSE_BUILD_DUMP_H
struct RoseEngine;
namespace ue2 {
class RoseBuild;
struct Grey;
#ifdef DUMP_SUPPORT
// Dump the Rose graph in graphviz representation.
void dumpRoseGraph(const RoseBuild &build, const RoseEngine *t,
const char *filename);
void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
const Grey &grey);
#else
static UNUSED
void dumpRoseGraph(const RoseBuild &, const RoseEngine *, const char *) {
}
static UNUSED
void dumpRose(const RoseBuild &, const RoseEngine *, const Grey &) {
}
#endif
} // namespace ue2
#endif

570
src/rose/rose_build_impl.h Normal file
View File

@@ -0,0 +1,570 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_IMPL_H_17E20A3C6935D6
#define ROSE_BUILD_IMPL_H_17E20A3C6935D6
#include "rose_build.h"
#include "rose_build_util.h"
#include "rose_graph.h"
#include "nfa/mpvcompile.h"
#include "nfa/goughcompile.h"
#include "nfa/nfa_internal.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_revacc.h"
#include "util/alloc.h"
#include "util/order_check.h"
#include "util/queue_index_factory.h"
#include "util/ue2_containers.h"
#include <deque>
#include <map>
#include <string>
#include <vector>
#include <boost/bimap.hpp>
#include <boost/functional/hash/hash.hpp>
struct RoseEngine;
namespace ue2 {
#define ROSE_GROUPS_MAX 64
struct BoundaryReports;
struct CastleProto;
struct CompileContext;
struct hwlmLiteral;
class ReportManager;
class SomSlotManager;
struct suffix_id {
suffix_id(const RoseSuffixInfo &in)
: g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()),
h(in.haig.get()), dfa_min_width(in.dfa_min_width),
dfa_max_width(in.dfa_max_width) {
assert(!g || g->kind == NFA_SUFFIX);
}
bool operator==(const suffix_id &b) const {
bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
assert(!rv || dfa_min_width == b.dfa_min_width);
assert(!rv || dfa_max_width == b.dfa_max_width);
return rv;
}
bool operator!=(const suffix_id &b) const { return !(*this == b); }
bool operator<(const suffix_id &b) const {
const suffix_id &a = *this;
ORDER_CHECK(g);
ORDER_CHECK(c);
ORDER_CHECK(d);
ORDER_CHECK(h);
return false;
}
NGHolder *graph() {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return g;
}
const NGHolder *graph() const {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return g;
}
CastleProto *castle() {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return c;
}
const CastleProto *castle() const {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return c;
}
raw_som_dfa *haig() { return h; }
const raw_som_dfa *haig() const { return h; }
raw_dfa *dfa() { return d; }
const raw_dfa *dfa() const { return d; }
size_t hash() const;
private:
NGHolder *g;
CastleProto *c;
raw_dfa *d;
raw_som_dfa *h;
depth dfa_min_width;
depth dfa_max_width;
friend depth findMinWidth(const suffix_id &s);
friend depth findMaxWidth(const suffix_id &s);
};
std::set<ReportID> all_reports(const suffix_id &s);
std::set<u32> all_tops(const suffix_id &s);
bool has_eod_accepts(const suffix_id &s);
bool has_non_eod_accepts(const suffix_id &s);
depth findMinWidth(const suffix_id &s);
depth findMaxWidth(const suffix_id &s);
size_t hash_value(const suffix_id &s);
/** \brief represents an engine to the left of a rose role */
struct left_id {
left_id(const LeftEngInfo &in)
: g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
h(in.haig.get()), dfa_min_width(in.dfa_min_width),
dfa_max_width(in.dfa_max_width) {
assert(!g || !generates_callbacks(*g));
}
bool operator==(const left_id &b) const {
bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
assert(!rv || dfa_min_width == b.dfa_min_width);
assert(!rv || dfa_max_width == b.dfa_max_width);
return rv;
}
bool operator!=(const left_id &b) const { return !(*this == b); }
bool operator<(const left_id &b) const {
const left_id &a = *this;
ORDER_CHECK(g);
ORDER_CHECK(c);
ORDER_CHECK(d);
ORDER_CHECK(h);
return false;
}
NGHolder *graph() {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return g;
}
const NGHolder *graph() const {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return g;
}
CastleProto *castle() {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return c;
}
const CastleProto *castle() const {
if (!d && !h) {
assert(dfa_min_width == depth(0));
assert(dfa_max_width == depth::infinity());
}
return c;
}
raw_som_dfa *haig() { return h; }
const raw_som_dfa *haig() const { return h; }
raw_dfa *dfa() { return d; }
const raw_dfa *dfa() const { return d; }
size_t hash() const;
private:
NGHolder *g;
CastleProto *c;
raw_dfa *d;
raw_som_dfa *h;
depth dfa_min_width;
depth dfa_max_width;
friend bool isAnchored(const left_id &r);
friend depth findMinWidth(const left_id &r);
friend depth findMaxWidth(const left_id &r);
};
std::set<u32> all_tops(const left_id &r);
bool isAnchored(const left_id &r);
depth findMinWidth(const left_id &r);
depth findMaxWidth(const left_id &r);
u32 num_tops(const left_id &r);
size_t hash_value(const left_id &r);
struct rose_literal_info {
ue2::flat_set<u32> delayed_ids;
ue2::flat_set<RoseVertex> vertices;
rose_group group_mask = 0;
u32 undelayed_id = MO_INVALID_IDX;
u32 final_id = MO_INVALID_IDX; /* id reported by fdr */
bool squash_group = false;
bool requires_explode = false;
bool requires_benefits = false;
};
/**
* \brief Main literal struct used at Rose build time. Numeric literal IDs
* used at build time point at these (via the RoseBuildImpl::literals map).
*/
struct rose_literal_id {
rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in,
u32 delay_in)
: s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {}
rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in,
const std::vector<u8> &cmp_in, rose_literal_table table_in,
u32 delay_in);
ue2_literal s;
std::vector<u8> msk;
std::vector<u8> cmp;
rose_literal_table table;
u32 delay;
u32 distinctiveness;
size_t elength(void) const { return s.length() + delay; }
};
static inline
bool operator<(const rose_literal_id &a, const rose_literal_id &b) {
ORDER_CHECK(distinctiveness);
ORDER_CHECK(table);
ORDER_CHECK(s);
ORDER_CHECK(delay);
ORDER_CHECK(msk);
ORDER_CHECK(cmp);
return 0;
}
// Literals are stored in a map from (string, nocase) -> ID
typedef boost::bimap<rose_literal_id, u32> RoseLiteralMap;
struct simple_anchored_info {
simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit)
: min_bound(min_b), max_bound(max_b), literal(lit) {}
u32 min_bound; /**< min number of characters required before literal can
* start matching */
u32 max_bound; /**< max number of characters allowed before literal can
* start matching */
ue2_literal literal;
};
static really_inline
bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) {
ORDER_CHECK(min_bound);
ORDER_CHECK(max_bound);
ORDER_CHECK(literal);
return 0;
}
struct OutfixInfo { /* TODO: poly */
OutfixInfo() {}
explicit OutfixInfo(std::unique_ptr<raw_dfa> r) : rdfa(std::move(r)) {
assert(rdfa);
}
explicit OutfixInfo(std::unique_ptr<NGHolder> h) : holder(std::move(h)) {
assert(holder);
}
explicit OutfixInfo(std::unique_ptr<raw_som_dfa> r) : haig(std::move(r)) {
assert(haig);
}
u32 get_queue(QueueIndexFactory &qif);
bool is_nonempty_mpv() const {
return !puffettes.empty() || !triggered_puffettes.empty();
}
bool is_dead() const {
return !holder && !rdfa && !haig && puffettes.empty() &&
triggered_puffettes.empty();
}
void clear() {
holder.reset();
rdfa.reset();
haig.reset();
puffettes.clear();
triggered_puffettes.clear();
assert(is_dead());
}
std::unique_ptr<NGHolder> holder;
std::unique_ptr<raw_dfa> rdfa;
std::unique_ptr<raw_som_dfa> haig;
std::vector<raw_puff> puffettes;
std::vector<raw_puff> triggered_puffettes;
/** Once the outfix has been built into an engine, this will point to it. */
NFA *nfa = nullptr;
RevAccInfo rev_info;
u32 maxBAWidth = 0; //!< max bi-anchored width
depth minWidth = depth::infinity();
depth maxWidth = 0;
u64a maxOffset = 0;
bool chained = false;
bool in_sbmatcher = false; //!< handled by small-block matcher.
private:
u32 queue = ~0U;
};
std::set<ReportID> all_reports(const OutfixInfo &outfix);
// Concrete impl class
class RoseBuildImpl : public RoseBuild {
public:
RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm,
const CompileContext &cc, const BoundaryReports &boundary);
~RoseBuildImpl() override;
// Adds a single literal.
void add(bool anchored, bool eod, const ue2_literal &lit,
const ue2::flat_set<ReportID> &ids) override;
bool addRose(const RoseInGraph &ig, bool prefilter,
bool finalChance = false) override;
bool addSombeRose(const RoseInGraph &ig) override;
bool addOutfix(const NGHolder &h) override;
bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override;
bool addOutfix(const raw_puff &rp) override;
bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override;
// Returns true if we were able to add it as a mask
bool add(bool anchored, const std::vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports) override;
bool addAnchoredAcyclic(const NGHolder &graph) override;
bool validateMask(const std::vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports, bool anchored,
bool eod) const override;
void addMask(const std::vector<CharReach> &mask,
const ue2::flat_set<ReportID> &reports, bool anchored,
bool eod) override;
// Construct a runtime implementation.
aligned_unique_ptr<RoseEngine> buildRose(u32 minWidth) override;
aligned_unique_ptr<RoseEngine> buildFinalEngine(u32 minWidth);
void setSom() override { hasSom = true; }
std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override;
bool hasEodSideLink() const;
// Find the maximum bound on the edges to this vertex's successors.
u32 calcSuccMaxBound(RoseVertex u) const;
// Assign roles to groups, writing the groups bitset into each role in the
// graph.
void assignGroupsToRoles();
/* Returns the ID of the given literal in the literal map, adding it if
* necessary. */
u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table);
// Variant with msk/cmp.
u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk,
const std::vector<u8> &cmp, u32 delay,
rose_literal_table table);
bool hasLiteral(const ue2_literal &s, rose_literal_table table) const;
u32 getNewLiteralId(void);
void removeVertices(const std::vector<RoseVertex> &dead);
// Is the Rose anchored?
bool hasNoFloatingRoots() const;
bool hasDirectReports() const;
RoseVertex cloneVertex(RoseVertex v);
u32 calcHistoryRequired() const;
rose_group getInitialGroups() const;
rose_group getSuccGroups(RoseVertex start) const;
rose_group getGroups(RoseVertex v) const;
bool hasDelayedLiteral(RoseVertex v) const;
bool hasDelayPred(RoseVertex v) const;
bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const;
bool hasAnchoredTablePred(RoseVertex v) const;
void assignGroupsToLiterals(void);
// Is the given vertex a successor of either root or anchored_root?
bool isRootSuccessor(const RoseVertex &v) const;
/* Is the given vertex a successor of something other than root or
* anchored_root? */
bool isNonRootSuccessor(const RoseVertex &v) const;
bool isDirectReport(u32 id) const;
bool isDelayed(u32 id) const;
bool hasDirectFinalId(u32 id) const;
bool hasDirectFinalId(RoseVertex v) const;
bool hasFinalId(u32 id) const;
bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored
* table */
bool isFloating(RoseVertex v) const; /* true iff has literal in floating
* table */
bool isInETable(RoseVertex v) const; /* true iff has literal in eod
* table */
size_t maxLiteralLen(RoseVertex v) const;
size_t minLiteralLen(RoseVertex v) const;
// max overlap considered for every pair (ulit, vlit).
size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const;
void renumberVertices(void);
bool isPseudoStar(const RoseEdge &e) const;
bool isPseudoStarOrFirstOnly(const RoseEdge &e) const;
bool hasOnlyPseudoStarInEdges(RoseVertex v) const;
bool isAnyStart(const RoseVertex &v) const {
return v == root || v == anchored_root;
}
bool isVirtualVertex(const RoseVertex &v) const {
return g[v].eod_accept || isAnyStart(v);
}
void handleMixedSensitivity(void);
void findTransientLeftfixes(void);
const CompileContext &cc;
RoseGraph g;
const RoseVertex root;
const RoseVertex anchored_root;
RoseLiteralMap literals;
std::map<RoseVertex, RoseVertex> ghost;
size_t vertexIndex;
ReportID getNewNfaReport() override {
return next_nfa_report++;
}
std::deque<rose_literal_info> literal_info;
u32 delay_base_id;
bool hasSom; //!< at least one pattern requires SOM.
std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas;
std::map<simple_anchored_info, std::set<u32>> anchored_simple;
std::map<u32, std::set<u32> > group_to_literal;
u32 group_weak_end;
u32 group_end;
std::map<CharReach, std::set<RoseVertex> > side_squash_roles;
u32 anchored_base_id;
u32 nonbenefits_base_id;
u32 ematcher_region_size; /**< number of bytes the eod table runs over */
/** \brief Mapping from anchored literal ID to the original literal suffix
* present when the literal was added to the literal matcher. Used for
* overlap calculation in history assignment. */
std::map<u32, rose_literal_id> anchoredLitSuffix;
std::map<u32, std::set<u32> > final_id_to_literal; /* final literal id to
* literal id */
unordered_set<left_id> transient;
unordered_map<left_id, rose_group> rose_squash_masks;
std::vector<OutfixInfo> outfixes;
/** \brief MPV outfix entry. Null if not used, and moved into the outfixes
* list before we start building the bytecode (at which point it is set to
* null again). */
std::unique_ptr<OutfixInfo> mpv_outfix = nullptr;
bool floating_direct_report;
u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX.
u32 max_rose_anchored_floating_overlap;
/** \brief Flattened list of report IDs for multi-direct reports, indexed
* by MDR final_id. */
std::vector<ReportID> mdr_reports;
QueueIndexFactory qif;
ReportManager &rm;
SomSlotManager &ssm;
const BoundaryReports &boundary;
private:
ReportID next_nfa_report;
};
// Free functions, in rose_build_misc.cpp
bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);
bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v);
size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b);
void setReportId(NGHolder &g, ReportID id);
#ifndef NDEBUG
bool roseHasTops(const RoseGraph &g, RoseVertex v);
#endif
u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
// Function that operates on a msk/cmp pair and a literal, as used in
// hwlmLiteral, and zeroes msk elements that don't add any power to the
// literal.
void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
std::vector<u8> &cmp);
void fillHamsterLiteralList(const RoseBuildImpl &tbi, rose_literal_table table,
std::vector<hwlmLiteral> *hl);
// Find the minimum depth in hops of each role. Note that a role may be
// accessible from both the root and the anchored root.
std::map<RoseVertex, u32> findDepths(const RoseBuildImpl &build);
#ifndef NDEBUG
bool canImplementGraphs(const RoseBuildImpl &tbi);
#endif
} // namespace ue2
#endif /* ROSE_BUILD_IMPL_H_17E20A3C6935D6 */

View File

@@ -0,0 +1,325 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rose/rose_build_infix.h"
#include "ue2common.h"
#include "nfa/castlecompile.h"
#include "nfagraph/ng_dump.h"
#include "nfagraph/ng_width.h"
#include "nfagraph/ng_util.h"
#include "rose/rose_build_impl.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/graph_range.h"
#include "util/graph.h"
#include "util/ue2_containers.h"
#include "util/ue2string.h"
#include <algorithm>
#include <set>
using namespace std;
namespace ue2 {
static
bool couldEndLiteral(const ue2_literal &s, NFAVertex initial,
const NGHolder &h) {
ue2::flat_set<NFAVertex> curr, next;
curr.insert(initial);
for (auto it = s.rbegin(), ite = s.rend(); it != ite; ++it) {
const CharReach &cr_s = *it;
bool matched = false;
next.clear();
for (auto v : curr) {
if (v == h.start) {
// We can't see what we had before the start, so we must assume
// the literal could overlap with it.
return true;
}
const CharReach &cr_v = h[v].char_reach;
if (overlaps(cr_v, cr_s)) {
insert(&next, inv_adjacent_vertices(v, h));
matched = true;
}
}
if (!matched) {
return false;
}
curr.swap(next);
}
return true;
}
static
void contractVertex(NGHolder &g, NFAVertex v,
ue2::unordered_set<pair<NFAVertex, NFAVertex>> &all_edges) {
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (u == v) {
continue; // self-edge
}
for (auto w : adjacent_vertices_range(v, g)) {
if (w == v) {
continue; // self-edge
}
// Construct edge (u, v) only if it doesn't already exist. We use
// the all_edges container here, as checking existence inside the
// graph is expensive when u or v have large degree.
if (all_edges.emplace(u, w).second) {
add_edge(u, w, g);
}
}
}
// Note that edges to/from v will remain in all_edges.
clear_vertex(v, g);
}
static
u32 findMaxInfixMatches(const NGHolder &h, const set<ue2_literal> &lits) {
DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size());
//dumpGraph("infix.dot", h.g);
if (!onlyOneTop(h)) {
DEBUG_PRINTF("more than one top!n");
return NO_MATCH_LIMIT;
}
// Indices of vertices that could terminate any of the literals in 'lits'.
set<u32> terms;
for (const auto &s : lits) {
DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
if (s.empty()) {
// Likely an anchored case, be conservative here.
return NO_MATCH_LIMIT;
}
for (auto v : vertices_range(h)) {
if (is_special(v, h)) {
continue;
}
if (couldEndLiteral(s, v, h)) {
u32 idx = h[v].index;
DEBUG_PRINTF("vertex %u could terminate lit\n", idx);
terms.insert(idx);
}
}
}
if (terms.empty()) {
DEBUG_PRINTF("literals cannot match inside infix\n");
return 0;
}
NGHolder g;
cloneHolder(g, h);
vector<NFAVertex> dead;
// The set of all edges in the graph is used for existence checks in contractVertex.
ue2::unordered_set<pair<NFAVertex, NFAVertex>> all_edges;
for (const auto &e : edges_range(g)) {
all_edges.emplace(source(e, g), target(e, g));
}
for (auto v : vertices_range(g)) {
if (is_special(v, g)) {
continue;
}
if (contains(terms, g[v].index)) {
continue;
}
contractVertex(g, v, all_edges);
dead.push_back(v);
}
remove_vertices(dead, g);
//dumpGraph("relaxed.dot", g.g);
depth maxWidth = findMaxWidth(g);
DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
assert(maxWidth.is_reachable());
if (maxWidth.is_infinite()) {
// Cycle detected, so we can likely squeeze an unlimited number of
// matches into this graph.
return NO_MATCH_LIMIT;
}
assert(terms.size() >= maxWidth);
return maxWidth;
}
namespace {
struct ReachMismatch {
explicit ReachMismatch(const CharReach &cr_in) : cr(cr_in) {}
bool operator()(const CharReach &a) const { return !overlaps(cr, a); }
private:
CharReach cr;
};
}
static
u32 findMaxInfixMatches(const CastleProto &castle,
const set<ue2_literal> &lits) {
DEBUG_PRINTF("castle=%p, %zu literals\n", &castle, lits.size());
if (castle.repeats.size() > 1) {
DEBUG_PRINTF("more than one top!\n");
return NO_MATCH_LIMIT;
}
assert(!castle.repeats.empty());
const PureRepeat &pr = castle.repeats.begin()->second;
DEBUG_PRINTF("repeat=%s reach=%s\n", pr.bounds.str().c_str(),
describeClass(pr.reach).c_str());
size_t max_count = 0;
for (const auto &s : lits) {
DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
if (s.empty()) {
// Likely an anchored case, be conservative here.
return NO_MATCH_LIMIT;
}
size_t count = 0;
auto f = find_if(s.rbegin(), s.rend(), ReachMismatch(pr.reach));
if (f == s.rbegin()) {
DEBUG_PRINTF("lit can't terminate inside infix\n");
count = 0;
} else if (f != s.rend()) {
size_t suffix_len = distance(s.rbegin(), f);
DEBUG_PRINTF("suffix of len %zu matches at start\n", suffix_len);
if (pr.bounds.max.is_finite()) {
count = min(suffix_len, (size_t)pr.bounds.max);
} else {
count = suffix_len;
}
} else {
DEBUG_PRINTF("whole lit can match inside infix (repeatedly)\n");
if (pr.bounds.max.is_finite()) {
count = pr.bounds.max;
} else {
DEBUG_PRINTF("inf bound\n");
return NO_MATCH_LIMIT;
}
}
DEBUG_PRINTF("count=%zu\n", count);
max_count = max(max_count, count);
}
DEBUG_PRINTF("max_count %zu\n", max_count);
if (max_count > NO_MATCH_LIMIT) {
assert(0); // This would be a surprise.
return NO_MATCH_LIMIT;
}
return (u32)max_count;
}
u32 findMaxInfixMatches(const left_id &left, const set<ue2_literal> &lits) {
if (left.castle()) {
return findMaxInfixMatches(*left.castle(), lits);
}
if (left.graph()) {
return findMaxInfixMatches(*left.graph(), lits);
}
return NO_MATCH_LIMIT;
}
void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable,
u8 *cm_count, CharReach *cm_cr) {
DEBUG_PRINTF("hello\n");
*cm_count = 0;
cm_cr->clear();
if (!left.graph()) {
return;
}
const NGHolder &g = *left.graph();
auto cyclics = findVerticesInCycles(g);
if (!proper_out_degree(g.startDs, g)) {
cyclics.erase(g.startDs);
}
CharReach cyclic_cr;
for (NFAVertex v : cyclics) {
DEBUG_PRINTF("considering %u ||=%zu\n", g[v].index,
g[v].char_reach.count());
cyclic_cr |= g[v].char_reach;
}
if (cyclic_cr.none() || cyclic_cr.all()) {
DEBUG_PRINTF("cyclic cr width %zu\n", cyclic_cr.count());
return; /* useless */
}
*cm_cr = ~cyclic_cr;
/* stop character will be part of normal miracles, no need to look for them
* here too */
assert(stopTable.size() == N_CHARS);
for (u32 i = 0; i < N_CHARS; i++) {
if (stopTable[i]) {
cm_cr->clear(i);
}
}
set<ue2_literal> lits;
for (size_t c = cm_cr->find_first(); c != CharReach::npos;
c = cm_cr->find_next(c)) {
DEBUG_PRINTF("considering %hhx as stop character\n", (u8)c);
lits.insert(ue2_literal(c, false));
}
u32 count = findMaxInfixMatches(*left.graph(), lits);
DEBUG_PRINTF("counting miracle %u\n", count + 1);
if (count && count < 50) {
*cm_count = count + 1;
}
}
} // namespace ue2

View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_INFIX_H
#define ROSE_BUILD_INFIX_H
#include "ue2common.h"
#include <set>
#include <vector>
namespace ue2 {
class CharReach;
struct left_id;
struct ue2_literal;
static constexpr u32 NO_MATCH_LIMIT = 0xffffffff;
u32 findMaxInfixMatches(const left_id &left, const std::set<ue2_literal> &lits);
void findCountingMiracleInfo(const left_id &left, const std::vector<u8> &stop,
u8 *cm_count, CharReach *cm_cr);
} // namespace ue2
#endif // ROSE_BUILD_INFIX_H

View File

@@ -0,0 +1,667 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Rose compile-time analysis for lookaround masks.
*/
#include "rose_build_lookaround.h"
#include "rose_build_impl.h"
#include "nfa/castlecompile.h"
#include "nfa/goughcompile.h"
#include "nfa/rdfa.h"
#include "nfagraph/ng_repeat.h"
#include "nfagraph/ng_util.h"
#include "util/container.h"
#include "util/dump_charclass.h"
#include "util/graph_range.h"
#include "util/ue2_containers.h"
#include "util/verify_types.h"
#include <cstdlib>
#include <queue>
using namespace std;
namespace ue2 {
/** \brief Max search distance for reachability in front of a role. */
static const u32 MAX_FWD_LEN = 64;
/** \brief Max search distance for reachability behind a role. */
static const u32 MAX_BACK_LEN = 64;
/** \brief Max lookaround entries for a role. */
static const u32 MAX_LOOKAROUND_ENTRIES = 16;
/** \brief We would rather have lookarounds with smaller reach than this. */
static const u32 LOOKAROUND_WIDE_REACH = 200;
static
void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
ue2::flat_set<NFAVertex> curr, next;
// Consider only successors of start with the required top.
for (const auto &e : out_edges_range(g.start, g)) {
NFAVertex v = target(e, g);
if (v == g.startDs) {
continue;
}
if (g[e].top == top) {
curr.insert(v);
}
}
for (u32 i = 0; i < MAX_FWD_LEN; i++) {
if (curr.empty() || contains(curr, g.accept) ||
contains(curr, g.acceptEod)) {
break;
}
next.clear();
CharReach cr;
for (auto v : curr) {
assert(!is_special(v, g));
cr |= g[v].char_reach;
insert(&next, adjacent_vertices(v, g));
}
assert(cr.any());
look[i] |= cr;
curr.swap(next);
}
}
static
void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
map<s32, CharReach> &look) {
ue2::flat_set<NFAVertex> curr, next;
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
if (contains(g[v].reports, report)) {
curr.insert(v);
}
}
for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
if (curr.empty() || contains(curr, g.start) ||
contains(curr, g.startDs)) {
break;
}
next.clear();
CharReach cr;
for (auto v : curr) {
assert(!is_special(v, g));
cr |= g[v].char_reach;
insert(&next, inv_adjacent_vertices(v, g));
}
assert(cr.any());
look[0 - i] |= cr;
curr.swap(next);
}
}
static
void getForwardReach(const CastleProto &castle, u32 top,
map<s32, CharReach> &look) {
depth len = castle.repeats.at(top).bounds.min;
len = min(len, depth(MAX_FWD_LEN));
assert(len.is_finite());
const CharReach &cr = castle.reach();
for (u32 i = 0; i < len; i++) {
look[i] |= cr;
}
}
static
void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag,
map<s32, CharReach> &look) {
depth min_depth = depth::infinity();
for (const auto &m : castle.repeats) {
const PureRepeat &pr = m.second;
if (contains(pr.reports, report)) {
min_depth = min(min_depth, pr.bounds.min);
}
}
if (!min_depth.is_finite()) {
assert(0);
return;
}
const CharReach &cr = castle.reach();
for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN);
i++) {
look[0 - i] |= cr;
}
}
static
void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
if (rdfa.states.size() < 2) {
return;
}
ue2::flat_set<dstate_id_t> curr, next;
curr.insert(rdfa.start_anchored);
for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
next.clear();
CharReach cr;
for (const auto state_id : curr) {
const dstate &ds = rdfa.states[state_id];
if (!ds.reports.empty() || !ds.reports_eod.empty()) {
return;
}
for (unsigned c = 0; c < N_CHARS; c++) {
dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
if (succ != DEAD_STATE) {
cr.set(c);
next.insert(succ);
}
}
}
assert(cr.any());
look[i] |= cr;
curr.swap(next);
}
}
static
void getSuffixForwardReach(const suffix_id &suff, u32 top,
map<s32, CharReach> &look) {
if (suff.graph()) {
getForwardReach(*suff.graph(), top, look);
} else if (suff.castle()) {
getForwardReach(*suff.castle(), top, look);
} else if (suff.dfa()) {
assert(top == 0); // DFA isn't multi-top capable.
getForwardReach(*suff.dfa(), look);
} else if (suff.haig()) {
assert(top == 0); // DFA isn't multi-top capable.
getForwardReach(*suff.haig(), look);
}
}
static
void getRoseForwardReach(const left_id &left, u32 top,
map<s32, CharReach> &look) {
if (left.graph()) {
getForwardReach(*left.graph(), top, look);
} else if (left.castle()) {
getForwardReach(*left.castle(), top, look);
} else if (left.dfa()) {
assert(top == 0); // DFA isn't multi-top capable.
getForwardReach(*left.dfa(), look);
} else if (left.haig()) {
assert(top == 0); // DFA isn't multi-top capable.
getForwardReach(*left.haig(), look);
}
}
static
void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look,
map<s32, CharReach> &look) {
for (u32 i = 0; i < MAX_FWD_LEN; i++) {
for (const auto &rlook : rose_look) {
if (contains(rlook, i)) {
look[i] |= rlook.at(i);
} else {
look[i].setall();
}
}
}
}
static
void findForwardReach(const RoseGraph &g, const RoseVertex v,
map<s32, CharReach> &look) {
if (!g[v].reports.empty()) {
DEBUG_PRINTF("acceptor\n");
return;
}
// Non-leaf vertices can pick up a mask per successor prefix rose
// engine.
vector<map<s32, CharReach>> rose_look;
for (const auto &e : out_edges_range(v, g)) {
RoseVertex t = target(e, g);
if (!g[t].left) {
DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].idx);
return;
}
rose_look.push_back(map<s32, CharReach>());
getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back());
}
if (g[v].suffix) {
DEBUG_PRINTF("suffix engine\n");
rose_look.push_back(map<s32, CharReach>());
getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back());
}
combineForwardMasks(rose_look, look);
}
static
void findBackwardReach(const RoseGraph &g, const RoseVertex v,
map<s32, CharReach> &look) {
if (!g[v].left) {
return;
}
DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report,
g[v].left.lag);
if (g[v].left.graph) {
getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report,
g[v].left.lag, look);
} else if (g[v].left.castle) {
getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report,
g[v].left.lag, look);
}
// TODO: implement DFA variants if necessary.
}
#if defined(DEBUG) || defined(DUMP_SUPPORT)
#include <sstream>
static UNUSED
string dump(const map<s32, CharReach> &look) {
ostringstream oss;
for (auto it = look.begin(), ite = look.end(); it != ite; ++it) {
if (it != look.begin()) {
oss << ", ";
}
oss << "{" << it->first << ": " << describeClass(it->second) << "}";
}
return oss.str();
}
#endif
static
void normalise(map<s32, CharReach> &look) {
// We can erase entries where the reach is "all characters".
vector<s32> dead;
for (const auto &m : look) {
if (m.second.all()) {
dead.push_back(m.first);
}
}
erase_all(&look, dead);
}
namespace {
struct LookPriority {
explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {}
bool operator()(s32 a, s32 b) const {
const CharReach &a_reach = look.at(a);
const CharReach &b_reach = look.at(b);
if (a_reach.count() != b_reach.count()) {
return a_reach.count() < b_reach.count();
}
return abs(a) < abs(b);
}
private:
const map<s32, CharReach> &look;
};
} // namespace
static
bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
for (const auto &m : look) {
const CharReach &look_cr = m.second;
if (!overlaps(look_cr, flood_cr)) {
return false;
}
}
DEBUG_PRINTF("look can't escape flood on %s\n",
describeClass(flood_cr).c_str());
return true;
}
static
bool isFloodProne(const map<s32, CharReach> &look,
const set<CharReach> &flood_reach) {
if (flood_reach.empty()) {
return false;
}
for (const CharReach &flood_cr : flood_reach) {
if (isFloodProne(look, flood_cr)) {
return true;
}
}
return false;
}
static
void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
return;
}
DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());
// First, remove floods that we already can't escape; they shouldn't affect
// the analysis below.
for (auto it = flood_reach.begin(); it != flood_reach.end();) {
if (isFloodProne(look, *it)) {
DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
describeClass(*it).c_str());
flood_reach.erase(it++);
} else {
++it;
}
}
LookPriority cmp(look);
priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
for (const auto &m : look) {
pq.push(m.first);
}
while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
s32 d = pq.top();
assert(contains(look, d));
const CharReach cr(look[d]); // copy
pq.pop();
DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
look.erase(d);
// If removing this entry would result in us becoming flood_prone on a
// particular flood_reach case, reinstate it and move on.
if (isFloodProne(look, flood_reach)) {
DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
describeClass(cr).c_str());
look.insert(make_pair(d, cr));
}
}
while (!pq.empty()) {
s32 d = pq.top();
assert(contains(look, d));
const CharReach cr(look[d]); // copy
pq.pop();
if (cr.count() < LOOKAROUND_WIDE_REACH) {
continue;
}
DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
look.erase(d);
// If removing this entry would result in us becoming flood_prone on a
// particular flood_reach case, reinstate it and move on.
if (isFloodProne(look, flood_reach)) {
DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
describeClass(cr).c_str());
look.insert(make_pair(d, cr));
}
}
DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
}
static
void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
set<CharReach> &flood_reach) {
for (u32 lit_id : tbi.g[v].literals) {
const ue2_literal &s = tbi.literals.right.at(lit_id).s;
if (s.empty()) {
continue;
}
if (is_flood(s)) {
CharReach cr(*s.begin());
DEBUG_PRINTF("flood-prone with reach: %s\n",
describeClass(cr).c_str());
flood_reach.insert(cr);
}
}
}
static
map<s32, CharReach> findLiteralReach(const RoseBuildImpl &build,
const RoseVertex v) {
map<s32, CharReach> look;
for (u32 lit_id : build.g[v].literals) {
const rose_literal_id &lit = build.literals.right.at(lit_id);
u32 i = lit.delay + 1;
for (auto it = lit.s.rbegin(), ite = lit.s.rend(); it != ite; ++it) {
look[0 - i] |= *it;
i++;
}
}
DEBUG_PRINTF("lit lookaround: %s\n", dump(look).c_str());
return look;
}
/**
* Trim lookaround checks from the prefix that overlap with the literals
* themselves.
*/
static
void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
map<s32, CharReach> &look) {
DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str());
for (const auto &m : findLiteralReach(build, v)) {
auto it = look.find(m.first);
if (it == end(look)) {
continue;
}
if (m.second.isSubsetOf(it->second)) {
DEBUG_PRINTF("can trim entry at %d\n", it->first);
look.erase(it);
}
}
DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
}
void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
vector<LookEntry> &lookaround) {
lookaround.clear();
const RoseGraph &g = tbi.g;
map<s32, CharReach> look;
findBackwardReach(g, v, look);
findForwardReach(g, v, look);
trimLiterals(tbi, v, look);
if (look.empty()) {
return;
}
normalise(look);
if (look.empty()) {
return;
}
set<CharReach> flood_reach;
findFloodReach(tbi, v, flood_reach);
reduce(look, flood_reach);
if (look.empty()) {
return;
}
DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
lookaround.reserve(look.size());
for (const auto &m : look) {
s8 offset = verify_s8(m.first);
lookaround.emplace_back(offset, m.second);
}
}
static
bool getTransientPrefixReach(const NGHolder &g, u32 lag,
map<s32, CharReach> &look) {
if (in_degree(g.accept, g) != 1) {
DEBUG_PRINTF("more than one accept\n");
return false;
}
// Currently we don't handle anchored prefixes, as we would need to be able
// to represent the bounds from the anchor as well.
if (out_degree(g.start, g) != 1) {
DEBUG_PRINTF("anchored\n");
return false;
}
if (out_degree(g.startDs, g) != 2) {
DEBUG_PRINTF("more than one start\n");
return false;
}
NFAVertex v = *(inv_adjacent_vertices(g.accept, g).first);
u32 i = lag + 1;
while (v != g.startDs) {
DEBUG_PRINTF("i=%u, v=%u\n", i, g[v].index);
if (is_special(v, g)) {
DEBUG_PRINTF("special\n");
return false;
}
look[0 - i] = g[v].char_reach;
if (in_degree(v, g) != 1) {
DEBUG_PRINTF("branch\n");
return false;
}
v = *(inv_adjacent_vertices(v, g).first);
i++;
}
DEBUG_PRINTF("done\n");
return true;
}
bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
vector<LookEntry> &lookaround) {
lookaround.clear();
const RoseGraph &g = build.g;
const left_id leftfix(g[v].left);
if (!contains(build.transient, leftfix)) {
DEBUG_PRINTF("not transient\n");
return false;
}
if (!leftfix.graph()) {
DEBUG_PRINTF("only supported for graphs so far\n");
return false;
}
map<s32, CharReach> look;
if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.lag, look)) {
DEBUG_PRINTF("not a chain\n");
return false;
}
trimLiterals(build, v, look);
if (look.size() > MAX_LOOKAROUND_ENTRIES) {
DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size());
return false;
}
if (look.empty()) {
DEBUG_PRINTF("lookaround empty; this is weird\n");
return false;
}
lookaround.reserve(look.size());
for (const auto &m : look) {
s8 offset = verify_s8(m.first);
lookaround.emplace_back(offset, m.second);
}
return true;
}
void mergeLookaround(vector<LookEntry> &lookaround,
const vector<LookEntry> &more_lookaround) {
if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
DEBUG_PRINTF("big enough!\n");
return;
}
// Don't merge lookarounds at offsets we already have entries for.
ue2::flat_set<s8> offsets;
for (const auto &e : lookaround) {
offsets.insert(e.offset);
}
map<s32, CharReach> more;
LookPriority cmp(more);
priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
for (const auto &e : more_lookaround) {
if (!contains(offsets, e.offset)) {
more.emplace(e.offset, e.reach);
pq.push(e.offset);
}
}
while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
const s32 offset = pq.top();
pq.pop();
const auto &cr = more.at(offset);
DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
lookaround.emplace_back(verify_s8(offset), cr);
}
// Order by offset.
sort(begin(lookaround), end(lookaround),
[](const LookEntry &a, const LookEntry &b) {
return a.offset < b.offset;
});
}
} // namespace ue2

View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Rose compile-time analysis for lookaround masks.
*/
#ifndef ROSE_ROSE_BUILD_LOOKAROUND_H
#define ROSE_ROSE_BUILD_LOOKAROUND_H
#include "rose_graph.h"
#include <vector>
namespace ue2 {
class CharReach;
class RoseBuildImpl;
/** \brief Lookaround entry prototype, describing the reachability at a given
* distance from the end of a role match. */
struct LookEntry {
LookEntry(s8 offset_in, const CharReach &reach_in)
: offset(offset_in), reach(reach_in) {}
s8 offset; //!< offset from role match location.
CharReach reach; //!< reachability at given offset.
bool operator==(const LookEntry &other) const {
return offset == other.offset && reach == other.reach;
}
};
static inline
size_t hash_value(const LookEntry &l) {
size_t val = 0;
boost::hash_combine(val, l.offset);
boost::hash_combine(val, l.reach);
return val;
}
void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
std::vector<LookEntry> &lookaround);
/**
* \brief If possible, render the prefix of the given vertex as a lookaround.
*
* Given a prefix, returns true (and fills the lookaround vector) if
* it can be satisfied with a lookaround alone.
*/
bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
std::vector<LookEntry> &lookaround);
void mergeLookaround(std::vector<LookEntry> &lookaround,
const std::vector<LookEntry> &more_lookaround);
} // namespace ue2
#endif // ROSE_ROSE_BUILD_LOOKAROUND_H

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,73 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Rose Build: functions for reducing the size of the Rose graph
* through merging.
*/
#ifndef ROSE_BUILD_MERGE_H
#define ROSE_BUILD_MERGE_H
#include "rose_graph.h"
#include <deque>
#include <set>
namespace ue2 {
class NGHolder;
class RoseBuildImpl;
void mergeDupeLeaves(RoseBuildImpl &tbi);
void uncalcLeaves(RoseBuildImpl &tbi);
bool dedupeLeftfixes(RoseBuildImpl &tbi);
void mergeLeftfixesVariableLag(RoseBuildImpl &tbi);
void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi);
void dedupeSuffixes(RoseBuildImpl &tbi);
void mergeAcyclicSuffixes(RoseBuildImpl &tbi);
void mergeSmallSuffixes(RoseBuildImpl &tbi);
void mergeSmallLeftfixes(RoseBuildImpl &tbi);
void mergeCastleLeftfixes(RoseBuildImpl &tbi);
void mergeOutfixes(RoseBuildImpl &tbi);
void mergePuffixes(RoseBuildImpl &tbi);
void mergeCastleSuffixes(RoseBuildImpl &tbi);
bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
RoseVertex v);
bool mergeableRoseVertices(const RoseBuildImpl &tbi,
const std::set<RoseVertex> &v1,
const std::set<RoseVertex> &v2);
bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
const std::deque<RoseVertex> &verts1);
} // namespace ue2
#endif // ROSE_BUILD_MERGE_H

1269
src/rose/rose_build_misc.cpp Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_ROLE_ALIASING
#define ROSE_BUILD_ROLE_ALIASING
namespace ue2 {
class RoseBuildImpl;
void aliasRoles(RoseBuildImpl &build, bool mergeRoses);
} // namespace ue2
#endif

View File

@@ -0,0 +1,129 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rose_build_scatter.h"
#include "ue2common.h"
#include "util/container.h"
#include "util/multibit_build.h"
#include <cstring> // memset
#include <set>
using namespace std;
namespace ue2 {
template<typename T>
static
void rebase(vector<T> *p, u32 adj) {
for (typename vector<T>::iterator it = p->begin(); it != p->end(); ++it) {
DEBUG_PRINTF("=%u+%u\n", it->offset, adj);
it->offset += adj;
}
}
static
void rebase(scatter_plan_raw *raw, u32 adj) {
rebase(&raw->p_u64a, adj);
rebase(&raw->p_u32, adj);
rebase(&raw->p_u16, adj);
rebase(&raw->p_u8, adj);
}
static
void merge_in(scatter_plan_raw *out, const scatter_plan_raw &in) {
insert(&out->p_u64a, out->p_u64a.end(), in.p_u64a);
insert(&out->p_u32, out->p_u32.end(), in.p_u32);
insert(&out->p_u16, out->p_u16.end(), in.p_u16);
insert(&out->p_u8, out->p_u8.end(), in.p_u8);
}
void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count,
u32 left_array_count, u32 left_prefix_count,
const RoseStateOffsets &stateOffsets,
bool streaming, u32 leaf_array_count,
u32 outfix_begin, u32 outfix_end,
scatter_plan_raw *out) {
/* init role array */
scatter_plan_raw spr_role;
mmbBuildClearPlan(role_state_count, &spr_role);
rebase(&spr_role, role_state_offset);
merge_in(out, spr_role);
/* init rose array: turn on prefixes */
u32 rose_array_offset = stateOffsets.activeLeftArray;
scatter_plan_raw spr_rose;
mmbBuildInitRangePlan(left_array_count, 0, left_prefix_count, &spr_rose);
rebase(&spr_rose, rose_array_offset);
merge_in(out, spr_rose);
/* suffix/outfix array */
scatter_plan_raw spr_leaf;
if (streaming) {
mmbBuildInitRangePlan(leaf_array_count, outfix_begin, outfix_end,
&spr_leaf);
} else {
mmbBuildClearPlan(leaf_array_count, &spr_leaf);
}
rebase(&spr_leaf, stateOffsets.activeLeafArray);
merge_in(out, spr_leaf);
}
u32 aux_size(const scatter_plan_raw &raw) {
u32 rv = 0;
rv += byte_length(raw.p_u64a);
rv += byte_length(raw.p_u32);
rv += byte_length(raw.p_u16);
rv += byte_length(raw.p_u8);
return rv;
}
void write_out(scatter_full_plan *plan_out, void *aux_out,
const scatter_plan_raw &raw, u32 aux_base_offset) {
memset(plan_out, 0, sizeof(*plan_out));
#define DO_CASE(t) \
if (!raw.p_##t.empty()) { \
plan_out->s_##t##_offset = aux_base_offset; \
plan_out->s_##t##_count = raw.p_##t.size(); \
assert(ISALIGNED_N((char *)aux_out + aux_base_offset, \
alignof(scatter_unit_##t))); \
memcpy((char *)aux_out + aux_base_offset, raw.p_##t.data(), \
byte_length(raw.p_##t)); \
aux_base_offset += byte_length(raw.p_##t); \
}
DO_CASE(u64a);
DO_CASE(u32);
DO_CASE(u16);
DO_CASE(u8);
}
} // namespace ue2

View File

@@ -0,0 +1,62 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_SCATTER_H
#define ROSE_BUILD_SCATTER_H
#include "rose_internal.h"
#include "util/scatter.h"
#include <vector>
namespace ue2 {
class RoseBuildImpl;
struct scatter_plan_raw {
std::vector<scatter_unit_u64a> p_u64a;
std::vector<scatter_unit_u32> p_u32;
std::vector<scatter_unit_u16> p_u16;
std::vector<scatter_unit_u8> p_u8;
};
void buildStateScatterPlan(u32 role_state_offset, u32 role_state_count,
u32 left_array_count, u32 left_prefix_count,
const RoseStateOffsets &stateOffsets,
bool streaming, u32 leaf_array_count,
u32 outfix_begin, u32 outfix_end,
scatter_plan_raw *out);
u32 aux_size(const scatter_plan_raw &raw);
void write_out(scatter_full_plan *plan_out, void *aux_out,
const scatter_plan_raw &raw, u32 aux_base_offset);
} // namespace ue2
#endif

View File

@@ -0,0 +1,99 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_UTIL_H
#define ROSE_BUILD_UTIL_H
#include "rose_graph.h"
#include "util/graph.h"
#include <algorithm>
namespace ue2 {
// Calculate the minimum depth for the given set of vertices, ignoring those
// with depth 1.
template<class Cont>
static
u8 calcMinDepth(const std::map<RoseVertex, u32> &depths, const Cont &verts) {
u8 d = 255;
for (RoseVertex v : verts) {
u8 vdepth = (u8)std::min((u32)255, depths.at(v));
if (vdepth > 1) {
d = std::min(d, vdepth);
}
}
return d;
}
// Comparator for vertices using their index property.
struct VertexIndexComp {
VertexIndexComp(const RoseGraph &gg) : g(gg) {}
bool operator()(const RoseVertex &a, const RoseVertex &b) const {
const RoseVertexProps &pa = g[a];
const RoseVertexProps &pb = g[b];
if (pa.idx < pb.idx) {
return true;
}
if (pa.idx > pb.idx) {
return false;
}
assert(a == b); // All vertex indices should be distinct.
return a < b;
}
const RoseGraph &g;
};
// Vertex set type, ordered by index. Construct with a graph reference.
typedef std::set<RoseVertex, VertexIndexComp> RoseVertexSet;
/**
* \brief Add two Rose depths together, coping correctly with infinity at
* ROSE_BOUND_INF.
*/
static inline
u32 add_rose_depth(u32 a, u32 b) {
assert(a <= ROSE_BOUND_INF);
assert(b <= ROSE_BOUND_INF);
if (a == ROSE_BOUND_INF || b == ROSE_BOUND_INF) {
return ROSE_BOUND_INF;
}
u32 rv = a + b;
assert(rv >= a && rv >= b);
return rv;
}
} // namespace ue2
#endif // ROSE_BUILD_UTIL_H

View File

@@ -0,0 +1,252 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rose_build_width.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_dump.h"
#include "nfagraph/ng_width.h"
#include "rose_build_impl.h"
#include "ue2common.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include <algorithm>
using namespace std;
namespace ue2 {
static
bool is_end_anchored(const RoseGraph &g, RoseVertex v) {
for (auto w : adjacent_vertices_range(v, g)) {
if (g[w].eod_accept) {
return true;
}
}
return false;
}
u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
if (table != ROSE_FLOATING && table != ROSE_ANCHORED &&
table != ROSE_EOD_ANCHORED) {
/* handle other tables if ever required */
assert(0);
return 0;
}
const RoseGraph &g = tbi.g;
vector<RoseVertex> table_verts;
for (auto v : vertices_range(g)) {
if (tbi.hasLiteralInTable(v, table)) {
table_verts.push_back(v);
}
}
set<RoseVertex> reachable;
find_reachable(g, table_verts, &reachable);
u32 minWidth = ROSE_BOUND_INF;
for (auto v : reachable) {
if (g[v].eod_accept) {
DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].idx);
continue;
}
const u32 w = g[v].min_offset;
if (!g[v].reports.empty()) {
DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].idx, w);
minWidth = min(minWidth, w);
}
if (is_end_anchored(g, v)) {
DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].idx, w);
minWidth = min(minWidth, w);
}
if (g[v].suffix) {
depth suffix_width = findMinWidth(g[v].suffix);
assert(suffix_width.is_reachable());
DEBUG_PRINTF("%zu has suffix (width %s), can fire report at %u\n",
g[v].idx, suffix_width.str().c_str(),
w + suffix_width);
minWidth = min(minWidth, w + suffix_width);
}
}
/* TODO: take into account the chain relationship between the mpv and other
* engines */
DEBUG_PRINTF("min width %u\n", minWidth);
return minWidth;
}
u32 findMaxBAWidth(const RoseBuildImpl &tbi) {
const RoseGraph &g = tbi.g;
if (!isLeafNode(tbi.root, g)) {
DEBUG_PRINTF("floating literal -> no max width\n");
return ROSE_BOUND_INF;
}
u64a maxWidth = 0;
for (const auto &outfix : tbi.outfixes) {
maxWidth = max(maxWidth, (u64a)outfix.maxBAWidth);
if (maxWidth >= ROSE_BOUND_INF) {
DEBUG_PRINTF("outfix with no max ba width\n");
return ROSE_BOUND_INF;
}
}
// Everyone's anchored, so the max width can be taken from the max
// max_offset on our vertices (so long as all accepts are EOD).
for (auto v : vertices_range(g)) {
if (!g[v].reports.empty() && !g[v].eod_accept) {
DEBUG_PRINTF("accept not at eod\n");
return ROSE_BOUND_INF;
}
if (g[v].reports.empty() && !g[v].suffix) {
continue;
}
assert(g[v].eod_accept || g[v].suffix);
u64a w = g[v].max_offset;
if (g[v].suffix) {
if (has_non_eod_accepts(g[v].suffix)) {
return ROSE_BOUND_INF;
}
depth suffix_width = findMaxWidth(g[v].suffix);
DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str());
assert(suffix_width.is_reachable());
if (!suffix_width.is_finite()) {
DEBUG_PRINTF("suffix too wide\n");
return ROSE_BOUND_INF;
}
w += suffix_width;
}
maxWidth = max(maxWidth, w);
if (maxWidth >= ROSE_BOUND_INF) {
DEBUG_PRINTF("too wide\n");
return ROSE_BOUND_INF;
}
}
DEBUG_PRINTF("max ba width %llu\n", maxWidth);
assert(maxWidth < ROSE_BOUND_INF);
return maxWidth;
}
u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
const RoseGraph &g = tbi.g;
if (!isLeafNode(tbi.root, g) && table == ROSE_FLOATING) {
DEBUG_PRINTF("floating literal -> no max width\n");
return ROSE_BOUND_INF;
}
if (table != ROSE_FLOATING && table != ROSE_ANCHORED) {
/* handle other tables if ever required */
assert(0);
return ROSE_BOUND_INF;
}
DEBUG_PRINTF("looking for a max ba width for %s\n",
table == ROSE_FLOATING ? "floating" : "anchored");
vector<RoseVertex> table_verts;
for (auto v : vertices_range(g)) {
if ((table == ROSE_FLOATING && tbi.isFloating(v))
|| (table == ROSE_ANCHORED && tbi.isAnchored(v))) {
table_verts.push_back(v);
}
}
set<RoseVertex> reachable;
find_reachable(g, table_verts, &reachable);
u64a maxWidth = 0;
// Everyone's anchored, so the max width can be taken from the max
// max_offset on our vertices (so long as all accepts are ACCEPT_EOD).
for (auto v : reachable) {
DEBUG_PRINTF("inspecting vert %zu\n", g[v].idx);
if (g[v].eod_accept) {
DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].idx);
continue;
}
if (!g[v].reports.empty()) {
DEBUG_PRINTF("accept not at eod\n");
return ROSE_BOUND_INF;
}
u64a w = g[v].max_offset;
u64a follow_max = tbi.calcSuccMaxBound(v); /* may have a long bound to
accept_eod node */
if (g[v].suffix) {
if (has_non_eod_accepts(g[v].suffix)) {
DEBUG_PRINTF("has accept\n");
return ROSE_BOUND_INF;
}
depth suffix_width = findMaxWidth(g[v].suffix);
DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str());
assert(suffix_width.is_reachable());
if (!suffix_width.is_finite()) {
DEBUG_PRINTF("suffix too wide\n");
return ROSE_BOUND_INF;
}
follow_max = max(follow_max, (u64a)suffix_width);
}
w += follow_max;
DEBUG_PRINTF("w %llu\n", w);
maxWidth = max(maxWidth, w);
if (maxWidth >= ROSE_BOUND_INF) {
DEBUG_PRINTF("too wide\n");
return ROSE_BOUND_INF;
}
}
DEBUG_PRINTF("max ba width %llu\n", maxWidth);
assert(maxWidth < ROSE_BOUND_INF);
return maxWidth;
}
} // namespace ue2

View File

@@ -0,0 +1,66 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_BUILD_WIDTH_H
#define ROSE_BUILD_WIDTH_H
#include "rose_build_impl.h"
#include "ue2common.h"
namespace ue2 {
class RoseBuildImpl;
/* returns a lower bound on the minimum number of bytes required for match to be
* raised up to the user which requires the given literal table to be used
*
* returns ROSE_BOUND_INF if the table can never produce matches */
u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
/* returns an upper bound on the maximum length of a buffer that can result in
* matches. If there are any patterns which are not bianchored (start and end
* anchored), then there is no such limit and ROSE_BOUND_INF is returned.
*/
u32 findMaxBAWidth(const RoseBuildImpl &tbi);
/* returns an upper bound on the maximum length of a buffer that can result in
* matches and requires that the given table to be used. If there are any
* patterns which are not bianchored (start and end anchored), then there is no
* such limit and ROSE_BOUND_INF is returned.
*/
u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
/**
* Note: there is no function for determining the min width of the whole rose
* as this is more easily done by the NG layer which has access to the full
* nfagraphs before they are chopped into little pieces.
*/
} // namespace ue2
#endif

44
src/rose/rose_common.h Normal file
View File

@@ -0,0 +1,44 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_COMMON_H
#define ROSE_COMMON_H
// Common defs available to build-time clients as well as runtime.
#define ROSE_BOUND_INF (~0U)
#define MAX_MASK2_WIDTH 32
// Max block width to use the combined small-block matcher on, instead of
// running the floating and anchored tables.
#define ROSE_SMALL_BLOCK_LEN 32
/** \brief Length in bytes of a reach bitvector, used by the lookaround code. */
#define REACH_BITVECTOR_LEN 32
#endif // ROSE_COMMON_H

1034
src/rose/rose_dump.cpp Normal file

File diff suppressed because it is too large Load Diff

50
src/rose/rose_dump.h Normal file
View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_DUMP_H
#define ROSE_DUMP_H
#ifdef DUMP_SUPPORT
#include <cstdio>
#include <string>
struct RoseEngine;
namespace ue2 {
void roseDumpText(const RoseEngine *t, FILE *f);
void roseDumpInternals(const RoseEngine *t, const std::string &base);
void roseDumpComponents(const RoseEngine *t, bool dump_raw,
const std::string &base);
void roseDumpStructRaw(const RoseEngine *t, FILE *f);
} // namespace ue2
#endif
#endif

228
src/rose/rose_graph.h Normal file
View File

@@ -0,0 +1,228 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief BGL graph structures used internally by the Rose build process.
*
* BGL graph structures used internally by the build-time portion of Rose. The
* graph used for input is in rose_in_graph.h since it's part of the RoseBuild
* external API.
*/
#ifndef ROSE_GRAPH_H
#define ROSE_GRAPH_H
#include "ue2common.h"
#include "rose_build.h"
#include "rose_internal.h" /* role history, etc */
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
#include "util/charreach.h"
#include "util/depth.h"
#include "util/ue2_containers.h"
#include <memory>
#include <set>
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/graph_traits.hpp>
namespace ue2 {
struct CastleProto;
struct raw_dfa;
struct raw_som_dfa;
/** \brief Table type for a literal. */
enum rose_literal_table {
ROSE_ANCHORED, //!< literals anchored to start
ROSE_FLOATING, //!< general floating literals
ROSE_EOD_ANCHORED, //!< literals that match near EOD
ROSE_ANCHORED_SMALL_BLOCK, //!< anchored literals for small block table
ROSE_EVENT //!< "literal-like" events, such as EOD
};
#include "util/order_check.h"
/** \brief Provides information about the (pre|in)fix engine to the left of a
* role. */
struct LeftEngInfo {
std::shared_ptr<NGHolder> graph;
std::shared_ptr<CastleProto> castle;
std::shared_ptr<raw_dfa> dfa;
std::shared_ptr<raw_som_dfa> haig;
u32 lag = 0U;
ReportID leftfix_report = MO_INVALID_IDX;
depth dfa_min_width = 0;
depth dfa_max_width = depth::infinity();
bool operator==(const LeftEngInfo &other) const {
return other.graph == graph
&& other.castle == castle
&& other.dfa == dfa
&& other.haig == haig
&& other.lag == lag
&& other.leftfix_report == leftfix_report;
}
bool operator!=(const LeftEngInfo &other) const {
return !(*this == other);
}
bool operator<(const LeftEngInfo &b) const {
const LeftEngInfo &a = *this;
ORDER_CHECK(graph);
ORDER_CHECK(castle);
ORDER_CHECK(dfa);
ORDER_CHECK(haig);
ORDER_CHECK(lag);
ORDER_CHECK(leftfix_report);
return false;
}
void reset(void);
operator bool() const;
bool tracksSom() const { return !!haig; }
};
/** \brief Provides information about the suffix engine to the right of a
* role. */
struct RoseSuffixInfo {
u32 top = 0;
std::shared_ptr<NGHolder> graph; /* if triggers a trailing nfa */
std::shared_ptr<CastleProto> castle;
std::shared_ptr<raw_som_dfa> haig;
std::shared_ptr<raw_dfa> rdfa;
depth dfa_min_width = 0;
depth dfa_max_width = depth::infinity();
bool operator==(const RoseSuffixInfo &b) const;
bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); }
bool operator<(const RoseSuffixInfo &b) const;
void reset(void);
operator bool() const { return graph || castle || haig || rdfa; }
};
/** \brief Properties attached to each Rose graph vertex. */
struct RoseVertexProps {
/** \brief Unique dense vertex index. Used for BGL algorithms. */
size_t idx = ~size_t{0};
/** \brief IDs of literals in the Rose literal map. */
flat_set<u32> literals;
/**
* \brief If true, this vertex is a virtual vertex for firing reports at
* EOD. These vertices must have reports and have no associated literals.
*/
bool eod_accept = false;
/** \brief Report IDs to fire. */
flat_set<ReportID> reports;
/** \brief Role ID for this vertex. These are what end up in the bytecode. */
u32 role = ~u32{0};
/** \brief Bitmask of groups that this role sets. */
rose_group groups = 0;
/** \brief Characters that escape and squash this role. */
CharReach escapes;
/** \brief Minimum role (end of literal) offset depth in bytes. */
u32 min_offset = ~u32{0};
/** \brief Maximum role (end of literal) offset depth in bytes */
u32 max_offset = 0;
/** \brief SOM for the role is offset from end match offset */
u32 som_adjust = 0;
/** \brief Prefix/infix engine to the left of this role. */
LeftEngInfo left;
/**
* \brief Suffix engine to the right of this role.
*
* Note: information about triggered infixes is associated with the left of
* the destination role.
*/
RoseSuffixInfo suffix;
bool isBoring(void) const;
bool fixedOffset(void) const;
};
/** \brief Properties attached to each Rose graph edge. */
/* bounds are distance from end of prev to start of the next */
struct RoseEdgeProps {
/**
* \brief Minimum distance from the end of the source role's match to the
* start of the target role's match.
*
* Not used when the target has a left engine (as the engine represents
* bounds).
*/
u32 minBound = 0;
/**
* \brief Maximum distance from the end of the source role's match to the
* start of the target role's match.
*
* Not used when the target has a left engine (as the engine represents
* bounds).
*/
u32 maxBound = 0;
/** \brief Which top to trigger on the target role's left engine. */
u32 rose_top = 0;
/** \brief True if the rose_top can clear all other previous tops. */
u8 rose_cancel_prev_top = false;
/** \brief History required by this edge. */
RoseRoleHistory history = ROSE_ROLE_HISTORY_INVALID;
};
bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b);
/**
* \brief Core Rose graph structure.
*
* Note that we use the list selector for the edge and vertex lists: we depend
* on insertion order for determinism, so we must use these containers.
*/
using RoseGraph = boost::adjacency_list<boost::listS, // out edge list per vertex
boost::listS, // vertex list
boost::bidirectionalS, // bidirectional
RoseVertexProps, // bundled vertex properties
RoseEdgeProps, // bundled edge properties
boost::listS // graph edge list
>;
using RoseVertex = RoseGraph::vertex_descriptor;
using RoseEdge = RoseGraph::edge_descriptor;
} // namespace ue2
#endif // ROSE_GRAPH_H

132
src/rose/rose_in_dump.cpp Normal file
View File

@@ -0,0 +1,132 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "rose_in_dump.h"
#include "grey.h"
#include "ue2common.h"
#include "nfagraph/ng_dump.h"
#include "nfagraph/ng_util.h"
#include "util/container.h"
#include "util/graph_range.h"
#include <cstdio>
#include <map>
#include <sstream>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey,
const char *filename) {
if (!grey.dumpFlags) {
return;
}
if (!filename) {
filename = "pre_rose.dot";
}
DEBUG_PRINTF("dumping rose graphs\n");
FILE *f = fopen((grey.dumpPath + filename).c_str(), "w");
fprintf(f, "digraph NFA {\n");
fprintf(f, "rankdir=LR;\n");
fprintf(f, "size=\"11.5,8\"\n");
fprintf(f, "node [ shape = circle ];\n");
u32 next_id = 0;
map<RoseInVertex, u32> i_map;
for (auto v : vertices_range(ig)) {
u32 id = next_id++;
i_map[v] = id;
const RoseInVertexProps &vp = ig[v];
fprintf(f, "%u [ width = 1, fontsize = 12, label = \"%u:", id, id);
switch(vp.type) {
case RIV_LITERAL:
fprintf(f, "%s", dotEscapeString(dumpString(vp.s)).c_str());
break;
case RIV_START:
fprintf(f, "[START]");
break;
case RIV_ANCHORED_START:
fprintf(f, "[ANCHOR]");
break;
case RIV_ACCEPT:
if (!vp.reports.empty()) {
fprintf(f, "[ACCEPT %s]", as_string_list(vp.reports).c_str());
} else {
fprintf(f, "[ACCEPT]");
}
break;
case RIV_ACCEPT_EOD:
fprintf(f, "[EOD %s]", as_string_list(vp.reports).c_str());
break;
}
fprintf(f, "\" ]; \n");
}
map<NGHolder *, size_t> graph_ids;
for (const auto &e : edges_range(ig)) {
u32 u = i_map[source(e, ig)];
u32 v = i_map[target(e, ig)];
fprintf(f, "%u -> %u [label=\"", u, v);
if (ig[e].graph) {
if (!contains(graph_ids, &*ig[e].graph)) {
size_t id = graph_ids.size();
graph_ids[&*ig[e].graph] = id;
}
fprintf(f, "graph %zu", graph_ids[&*ig[e].graph]);
}
if (ig[e].haig) {
fprintf(f, "haig ");
}
fprintf(f, "\"]\n");
}
for (const auto &e : graph_ids) {
NGHolder *h = e.first;
size_t id = e.second;
ostringstream name;
name << grey.dumpPath << "pre_rose_" << id << ".dot";
dumpGraph(name.str().c_str(), h->g);
assert(allMatchStatesHaveReports(*h));
}
fprintf(f, "}\n");
fclose(f);
}
}

49
src/rose/rose_in_dump.h Normal file
View File

@@ -0,0 +1,49 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_IN_DUMP_H
#define ROSE_IN_DUMP_H
#include "rose_in_graph.h"
namespace ue2 {
struct Grey;
#ifdef DUMP_SUPPORT
void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey,
const char *filename = nullptr);
#else
static UNUSED
void dumpPreRoseGraph(const RoseInGraph &, const Grey &,
const char * = nullptr) { }
#endif
}
#endif

187
src/rose/rose_in_graph.h Normal file
View File

@@ -0,0 +1,187 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Rose Input Graph: Used for ng_rose -> rose_build_add communication.
*
* The input graph MUST be a DAG.
* There MUST be exactly 1 START or ANCHORED_START vertex.
* The edges MUST be of the form START->LITERAL, LITERAL->LITERAL,
* LITERAL->ACCEPT or LITERAL->ACCEPT_EOD.
* Every non START/ANCHORED_START vertex MUST have an in-edge.
* Every non ACCEPT/ACCEPT_EOD vertex MUST have an out-edge.
*
* Edges are either a graph or have bounds associated with them.
* Graphs on edges to accepts use their internal report ids.
*/
#ifndef ROSE_IN_GRAPH_H
#define ROSE_IN_GRAPH_H
#include "ue2common.h"
#include "rose/rose_common.h"
#include "util/ue2_containers.h"
#include "util/ue2string.h"
#include <memory>
#include <boost/graph/graph_traits.hpp>
#include <boost/graph/adjacency_list.hpp>
namespace ue2 {
class NGHolder;
struct raw_som_dfa;
enum RoseInVertexType {
RIV_LITERAL,
RIV_START,
RIV_ANCHORED_START,
RIV_ACCEPT,
RIV_ACCEPT_EOD
};
struct RoseInVertexProps {
RoseInVertexProps()
: type(RIV_LITERAL), delay(0), min_offset(0),
max_offset(ROSE_BOUND_INF) {}
private:
template <class ReportContainer>
RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
const ReportContainer &reports_in, u32 min_offset_in,
u32 max_offset_in)
: type(type_in), s(s_in), delay(0),
reports(begin(reports_in), end(reports_in)),
min_offset(min_offset_in), max_offset(max_offset_in) {}
// Constructor for a vertex with no reports.
RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
u32 min_offset_in, u32 max_offset_in)
: type(type_in), s(s_in), delay(0), min_offset(min_offset_in),
max_offset(max_offset_in) {}
public:
static RoseInVertexProps makeLiteral(const ue2_literal &lit) {
DEBUG_PRINTF("making literal %s\n", dumpString(lit).c_str());
return RoseInVertexProps(RIV_LITERAL, lit, 0, ROSE_BOUND_INF);
}
template <class ReportContainer>
static RoseInVertexProps makeAccept(const ReportContainer &rep) {
DEBUG_PRINTF("making accept for %zu reports\n", rep.size());
return RoseInVertexProps(RIV_ACCEPT, ue2_literal(), rep, 0,
ROSE_BOUND_INF);
}
template <class ReportContainer>
static RoseInVertexProps makeAcceptEod(const ReportContainer &rep) {
DEBUG_PRINTF("making accept-eod for %zu reports\n", rep.size());
return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), rep, 0,
ROSE_BOUND_INF);
}
static RoseInVertexProps makeStart(bool anchored) {
DEBUG_PRINTF("making %s\n", anchored ? "anchored start" : "start");
if (anchored) {
return RoseInVertexProps(RIV_ANCHORED_START, ue2_literal(), 0, 0);
} else {
return RoseInVertexProps(RIV_START, ue2_literal(), 0,
ROSE_BOUND_INF);
}
}
RoseInVertexType type; /* polymorphic vertices are probably a bad idea */
ue2_literal s; /**< for RIV_LITERAL */
u32 delay; /**< for RIV_LITERAL, delay applied to literal. */
flat_set<ReportID> reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */
u32 min_offset; /**< Minimum offset at which this vertex can match. */
u32 max_offset; /**< Maximum offset at which this vertex can match. */
};
struct RoseInEdgeProps {
RoseInEdgeProps()
: minBound(0), maxBound(0), graph(), haig(), graph_lag(0) {}
RoseInEdgeProps(u32 min_in, u32 max_in)
: minBound(min_in), maxBound(max_in), graph(), graph_lag(0) {
assert(minBound <= maxBound);
assert(minBound != ROSE_BOUND_INF);
}
/* haig rosefixes (prefix/infix) require their corresponding holders */
RoseInEdgeProps(std::shared_ptr<NGHolder> g, std::shared_ptr<raw_som_dfa> h,
u32 lag)
: minBound(0), maxBound(ROSE_BOUND_INF), graph(g), haig(h),
graph_lag(lag) {
assert(graph);
assert(haig);
}
/* haig suffixes do not require their corresponding holders */
explicit RoseInEdgeProps(std::shared_ptr<raw_som_dfa> h)
: minBound(0), maxBound(ROSE_BOUND_INF), haig(h), graph_lag(0) {
assert(haig);
}
RoseInEdgeProps(std::shared_ptr<NGHolder> g, u32 lag)
: minBound(0), maxBound(ROSE_BOUND_INF), graph(g), graph_lag(lag) {
assert(graph);
}
/** \brief Minimum bound on 'dot' repeat between literals. ie pred end ->
* succ begin. */
u32 minBound;
/** \brief Maximum bound on 'dot' repeat between literals. */
u32 maxBound;
/** \brief Prefix graph. Graph is end to (end - lag). */
std::shared_ptr<NGHolder> graph;
/** \brief Haig version of graph, if required. */
std::shared_ptr<raw_som_dfa> haig;
u32 graph_lag;
/** \brief Escape characters, can be used instead of graph.
*
* currently must not intersect with succ literal and must be a literal -
* literal edge, TODO: handle */
CharReach escapes;
};
typedef boost::adjacency_list<boost::listS, boost::listS, boost::bidirectionalS,
RoseInVertexProps,
RoseInEdgeProps> RoseInGraph;
typedef RoseInGraph::vertex_descriptor RoseInVertex;
typedef RoseInGraph::edge_descriptor RoseInEdge;
} // namespace ue2
#endif

267
src/rose/rose_in_util.cpp Normal file
View File

@@ -0,0 +1,267 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "rose_in_util.h"
#include "rose_build_util.h"
#include "nfa/goughcompile.h"
#include "nfagraph/ng_depth.h"
#include "nfagraph/ng_util.h"
#include "nfagraph/ng_width.h"
#include "util/container.h"
#include "util/graph_range.h"
#include "util/make_unique.h"
#include "util/ue2_containers.h"
#include <vector>
#include <boost/graph/copy.hpp>
#include <boost/graph/reverse_graph.hpp>
#include <boost/graph/topological_sort.hpp>
using namespace std;
namespace ue2 {
static
void populateIndexMap(const RoseInGraph &in,
map<RoseInVertex, size_t> *index_map) {
size_t i = 0;
for (auto v : vertices_range(in)) {
(*index_map)[v] = i++;
}
}
/* Returns a topological ordering of the vertices in g. That is the starts are
* at the front and all the predecessors of a vertex occur earlier in the list
* than the vertex. */
vector<RoseInVertex> topo_order(const RoseInGraph &g) {
map<RoseInVertex, size_t> index_map;
populateIndexMap(g, &index_map);
vector<RoseInVertex> v_order;
v_order.reserve(index_map.size());
topological_sort(g, back_inserter(v_order),
vertex_index_map(boost::make_assoc_property_map(index_map)));
reverse(v_order.begin(), v_order.end()); /* put starts at the front */
return v_order;
}
namespace {
struct RoseEdgeCopier {
typedef unordered_map<const NGHolder *, shared_ptr<NGHolder>> GraphMap;
typedef unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> HaigMap;
RoseEdgeCopier(const RoseInGraph &g1, RoseInGraph &g2,
const GraphMap &graph_map_in, const HaigMap &haig_map_in)
: ig(g1), out(g2), graph_map(graph_map_in), haig_map(haig_map_in) {}
void operator()(const RoseInEdge &e1, RoseInEdge &e2) {
// Clone all properties.
put(boost::edge_all, out, e2, get(boost::edge_all, ig, e1));
// Substitute in cloned graphs.
if (ig[e1].graph) {
out[e2].graph = graph_map.at(ig[e1].graph.get());
}
if (ig[e1].haig) {
out[e2].haig = haig_map.at(ig[e1].haig.get());
}
}
private:
const RoseInGraph &ig;
RoseInGraph &out;
const GraphMap &graph_map;
const HaigMap &haig_map;
};
}
unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) {
unique_ptr<RoseInGraph> out = make_unique<RoseInGraph>();
unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map;
unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map;
for (const auto &e : edges_range(ig)) {
const RoseInEdgeProps &ep = ig[e];
if (ep.graph && !contains(graph_map, ep.graph.get())) {
graph_map[ep.graph.get()] = cloneHolder(*ep.graph);
}
if (ep.haig && !contains(haig_map, ep.haig.get())) {
haig_map[ep.haig.get()] = make_shared<raw_som_dfa>(*ep.haig);
}
}
map<RoseInVertex, size_t> index_map;
populateIndexMap(ig, &index_map);
copy_graph(ig, *out,
boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map))
.vertex_index_map(boost::make_assoc_property_map(index_map)));
return out;
}
void calcVertexOffsets(RoseInGraph &g) {
vector<RoseInVertex> v_order = topo_order(g);
for (RoseInVertex v : v_order) {
if (g[v].type == RIV_START) {
g[v].min_offset = 0;
g[v].max_offset = ROSE_BOUND_INF;
continue;
} else if (g[v].type == RIV_ANCHORED_START) {
g[v].min_offset = 0;
g[v].max_offset = 0;
continue;
}
DEBUG_PRINTF("vertex '%s'\n", dumpString(g[v].s).c_str());
// Min and max predecessor depths.
u32 min_d = ROSE_BOUND_INF;
u32 max_d = 0;
for (const auto &e : in_edges_range(v, g)) {
RoseInVertex u = source(e, g);
u32 e_min = g[u].min_offset;
u32 e_max = g[u].max_offset;
DEBUG_PRINTF("in-edge from u with offsets [%u,%u]\n", e_min, e_max);
if (g[e].graph) {
const NGHolder &h = *g[e].graph;
depth g_min_width = findMinWidth(h);
depth g_max_width =
isAnchored(h) ? findMaxWidth(h) : depth::infinity();
u32 graph_lag = g[e].graph_lag;
DEBUG_PRINTF("edge has graph, depths [%s,%s] and lag %u\n",
g_min_width.str().c_str(),
g_max_width.str().c_str(), graph_lag);
g_min_width += graph_lag;
g_max_width += graph_lag;
e_min = add_rose_depth(e_min, g_min_width);
if (g_max_width.is_finite()) {
e_max = add_rose_depth(e_max, g_max_width);
} else {
e_max = ROSE_BOUND_INF;
}
} else {
DEBUG_PRINTF("edge has bounds [%u,%u]\n", g[e].minBound,
g[e].maxBound);
e_min = add_rose_depth(e_min, g[e].minBound);
e_max = add_rose_depth(e_max, g[e].maxBound);
if (g[v].type == RIV_LITERAL) {
u32 len = g[v].s.length();
DEBUG_PRINTF("lit len %u\n", len);
e_min = add_rose_depth(e_min, len);
e_max = add_rose_depth(e_max, len);
}
}
min_d = min(min_d, e_min);
max_d = max(max_d, e_max);
}
DEBUG_PRINTF("vertex depths [%u,%u]\n", min_d, max_d);
assert(max_d >= min_d);
g[v].min_offset = min_d;
g[v].max_offset = max_d;
}
// It's possible that we may have literal delays assigned to vertices here
// as well. If so, these need to be added to the min/max offsets.
for (RoseInVertex v : v_order) {
const u32 delay = g[v].delay;
g[v].min_offset = add_rose_depth(g[v].min_offset, delay);
g[v].max_offset = add_rose_depth(g[v].max_offset, delay);
}
}
nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e) {
RoseInVertex u = source(e, in);
RoseInVertex v = target(e, in);
bool start = in[u].type == RIV_START || in[u].type == RIV_ANCHORED_START;
bool end = in[v].type == RIV_ACCEPT || in[v].type == RIV_ACCEPT_EOD;
if (start && !end) {
return NFA_PREFIX;
} else if (!start && end) {
return NFA_SUFFIX;
} else if (!start && !end) {
return NFA_INFIX;
} else {
assert(in[v].type == RIV_ACCEPT_EOD);
return NFA_OUTFIX;
}
}
void pruneUseless(RoseInGraph &g) {
DEBUG_PRINTF("pruning useless vertices\n");
set<RoseInVertex> dead;
RoseInVertex dummy_start
= add_vertex(RoseInVertexProps::makeStart(true), g);
RoseInVertex dummy_end
= add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g);
dead.insert(dummy_start);
dead.insert(dummy_end);
for (auto v : vertices_range(g)) {
if (v == dummy_start || v == dummy_end) {
continue;
}
switch (g[v].type) {
case RIV_ANCHORED_START:
case RIV_START:
add_edge(dummy_start, v, g);
break;
case RIV_ACCEPT:
case RIV_ACCEPT_EOD:
add_edge(v, dummy_end, g);
break;
default:
break;
}
}
find_unreachable(g, vector<RoseInVertex>(1, dummy_start), &dead);
find_unreachable(boost::reverse_graph<RoseInGraph, RoseInGraph &>(g),
vector<RoseInVertex>(1, dummy_end), &dead);
for (auto v : dead) {
clear_vertex(v, g);
remove_vertex(v, g);
}
}
}

51
src/rose/rose_in_util.h Normal file
View File

@@ -0,0 +1,51 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_IN_UTIL_H
#define ROSE_IN_UTIL_H
#include "rose_in_graph.h"
#include "nfa/nfa_kind.h"
#include <vector>
namespace ue2 {
/* Returns a topological ordering of the vertices in g. That is the starts are
* at the front and all the predecessors of a vertex occur earlier in the list
* than the vertex. */
std::vector<RoseInVertex> topo_order(const RoseInGraph &g);
std::unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig);
void calcVertexOffsets(RoseInGraph &ig);
enum nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e);
void pruneUseless(RoseInGraph &g);
}
#endif

831
src/rose/rose_internal.h Normal file
View File

@@ -0,0 +1,831 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Rose data structures.
*/
#ifndef ROSE_INTERNAL_H
#define ROSE_INTERNAL_H
#include "ue2common.h"
#include "rose_common.h"
#include "util/scatter.h"
#define ROSE_OFFSET_INVALID 0xffffffff
// Group constants
typedef u64a rose_group;
// Delayed literal stuff
#define DELAY_BITS 5
#define DELAY_SLOT_COUNT (1U << DELAY_BITS)
#define MAX_DELAY (DELAY_SLOT_COUNT - 1)
#define DELAY_MASK (DELAY_SLOT_COUNT - 1)
#define DELAY_FLOAT_DIRTY (1U << 7) /* delay literal matched in history */
// Direct report stuff
#define LITERAL_DR_FLAG (1U << 31)
#define LITERAL_MDR_FLAG ((1U << 30) | (1U << 31))
/** \brief True if literal is either a direct report or a multi-direct report.
* */
static really_inline
u32 isLiteralDR(u32 id) {
return id & LITERAL_DR_FLAG;
}
static really_inline
u32 isLiteralMDR(u32 id) {
return (id & LITERAL_MDR_FLAG) == LITERAL_MDR_FLAG;
}
static really_inline
ReportID literalToReport(u32 id) {
assert(id & LITERAL_DR_FLAG);
assert(!(id & (LITERAL_MDR_FLAG ^ LITERAL_DR_FLAG)));
return id & ~LITERAL_DR_FLAG;
}
// Structure representing a literal. Each literal may have many roles.
struct RoseLiteral {
u32 rootRoleOffset; /**< If rootRoleCount == 1, this is an offset relative
* to the rose engine to the root role associated with
* the literal.
* If rootRoleCount > 1, this is the first index into
* the rootRoleTable indicating the root roles.
*/
u32 rootRoleCount; // number of root roles
u32 iterOffset; // offset of sparse iterator, relative to rose
u32 iterMapOffset; // offset of the iter mapping table, relative to rose
rose_group groups; // bitset of groups that cause this literal to fire.
u8 minDepth; // the minimum of this literal's roles' depths (for depths > 1)
u8 squashesGroup; /**< literal switches off its group behind it if it sets a
* role */
u8 requires_side; // need to catch up sidecar for this literal
u32 delay_mask; /**< bit set indicates that the literal inserts a delayed
* match at the given offset */
u32 delayIdsOffset; // offset to array of ids to poke in the delay structure
};
/* properties for sidecar entries, yay */
struct RoseSide {
u32 squashIterOffset; // offset of the squash sparse iterator, rose relative
rose_group squashGroupMask; // squash literal squash masks
};
/* Allocation of Rose literal ids
*
* The rose literal id space is segmented:
*
* ---- 0
* | | Normal undelayed literals in the e, or f tables which require a
* | | manual benefits confirm on match [a table never requires benefits]
* | |
* ---- nonbenefits_base_id
* | | 'Normal' undelayed literals in either e or f tables
* | |
* | |
* | |
* ---- anchored_base_id
* | | literals from the a table
* | |
* ---- delay_base_id
* | | Delayed version of normal literals
* | |
* ---- literalCount
* ...
* ...
* ...
* ---- LITERAL_DR_FLAG
* | | Direct Report literals: immediately raise an internal report with id
* | | given by (lit_id & ~LITERAL_DR_FLAG). Raised by a or f tables (or e??).
* | | No RoseLiteral structure
* | |
* | |
* ----
*
* Note: sidecar 'literals' are in a complete separate space
*/
/* Rose Literal Sources
*
* Rose currently gets events (mainly roseProcessMatch calls) from 8 sources:
* 1) The floating table
* 2) The anchored table
* 3) Delayed literals
* 4) Sidecar literal matcher
* 5) suffixes NFAs
* 6) masksv2 (literals with benefits)
* 7) End anchored table
* 8) prefix / infix nfas
*
* Care is required to ensure that events appear to come into Rose in order
* (or sufficiently ordered for Rose to cope). Generally the progress of the
* floating table is considered the canonical position in the buffer.
*
* Anchored table:
* The anchored table is run before the floating table as nothing in it can
* depend on a floating literal. Order is achieved by two measures:
* a) user matches^1 are logged and held until the floating matcher passes that
* point;
* b) any floating role with an anchored predecessor has a history relationship
* to enforce the ordering.
*
* Delayed literals:
* Delayed literal ordering is handled by delivering any pending delayed
* literals before processing any floating match.
*
* Sidecar:
* The sidecar matcher is unique in that it does not return match
* location information. Sidecar literals are escapes between two normal
* roles. The sidecar matcher is caught up to the floating matcher
* before any possible predecessor role, any possible successor role, and
* at stream boundaries^3.
*
* Suffix:
* Suffixes are always pure terminal roles. Prior to raising a match^2, pending
* NFA queues are run to the current point (floating or delayed literal) as
* appropriate.
*
* Maskv2:
* These are triggered from either floating literals or delayed literals and
* inspect the data behind them. Matches are raised at the same location as the
* trigger literal so there are no ordering issues. Masks are always pure
* terminal roles.
*
* Lookaround:
* These are tests run on receipt of a role that "look around" the match,
* checking characters at nearby offsets against reachability masks. Each role
* can have a list of these lookaround offset/reach pairs, ordered in offset
* order, and any failure will prevent the role from being switched on. Offsets
* are relative to the byte after a literal match, and can be negative.
*
* Prefix / Infix:
* TODO: remember / discuss
*
* End anchored table:
* All user matches occur at the last byte. We do this last, so no problems
* (yippee)
*
* ^1 User matches which occur before any possible match from the other tables
* are not delayed.
* ^2 Queues may also be run to the current location if a queue is full and
* needs to be emptied.
* ^3 There is no need to catch up at the end of a block scan as it contains no
* terminals.
*/
// We have different types of role history storage.
enum RoseRoleHistory {
ROSE_ROLE_HISTORY_NONE, // I'm sorry, I don't recall.
ROSE_ROLE_HISTORY_ANCH, // used when previous role is at a fixed offset
ROSE_ROLE_HISTORY_LAST_BYTE, /* used when previous role can only match at the
* last byte of a stream */
ROSE_ROLE_HISTORY_INVALID // history not yet assigned
};
struct RoseCountingMiracle {
char shufti; /** 1: count shufti class; 0: count a single character */
u8 count; /** minimum number of occurrences for the counting
* miracle char to kill the leftfix. */
u8 c; /** character to look for if not shufti */
u8 poison; /** character not in the shufti mask */
m128 lo; /** shufti lo mask */
m128 hi; /** shufti hi mask */
};
struct LeftNfaInfo {
u32 maxQueueLen;
u32 maxLag; // maximum of successor roles' lag
u32 lagIndex; // iff lag != 0, index into leftfixLagTable
u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID
u8 transient; /**< 0 if not transient, else max width of transient prefix */
char infix; /* TODO: make flags */
char eod_check; /**< nfa is used by the event eod literal */
u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */
rose_group squash_mask; /* & mask applied when rose nfa dies */
};
// A list of these is used to trigger prefix/infix roses.
struct RoseTrigger {
u32 queue; // queue index of leftfix
u32 event; // from enum mqe_event
u8 cancel_prev_top;
};
struct NfaInfo {
u32 nfaOffset;
u32 stateOffset;
u32 fullStateOffset; /* offset in scratch, relative to ??? */
u32 ekeyListOffset; /* suffix, relative to base of rose, 0 if no ekeys */
u8 no_retrigger; /* TODO */
u8 only_external; /**< does not raise any som internal events or chained
* rose events */
u8 in_sbmatcher; /**< this outfix should not be run in small-block
* execution, as it will be handled by the sbmatcher
* HWLM table. */
u8 eod; /* suffix is triggered by the etable --> can only produce eod
* matches */
};
#define ROSE_ROLE_FLAG_ANCHOR_TABLE (1U << 0) /**< role is triggered from
* anchored table */
#define ROSE_ROLE_FLAG_ACCEPT_EOD (1U << 2) /**< "fake" role, fires callback
* at EOD */
#define ROSE_ROLE_FLAG_ONLY_AT_END (1U << 3) /**< role can only be switched on
* at end of block */
#define ROSE_ROLE_FLAG_PRED_OF_EOD (1U << 4) /**< eod is a successor literal
* of the role */
#define ROSE_ROLE_FLAG_EOD_TABLE (1U << 5) /**< role is triggered from eod
* table */
#define ROSE_ROLE_FLAG_ROSE (1U << 6) /**< rose style prefix nfa for
* role */
#define ROSE_ROLE_FLAG_SOM_REPORT (1U << 7) /**< report id is only used to
* manipulate som */
#define ROSE_ROLE_FLAG_REPORT_START (1U << 8) /**< som som som som */
#define ROSE_ROLE_FLAG_CHAIN_REPORT (1U << 9) /**< report id is only used to
* start an outfix engine */
#define ROSE_ROLE_FLAG_SOM_ADJUST (1U << 10) /**< som value to use is offset
* from match end location */
#define ROSE_ROLE_FLAG_SOM_ROSEFIX (1U << 11) /**< som value to use is provided
* by prefix/infix */
/* We allow different types of role-predecessor relationships. These are stored
* in with the flags */
#define ROSE_ROLE_PRED_NONE (1U << 20) /**< the only pred is the root,
* [0, inf] bounds */
#define ROSE_ROLE_PRED_SIMPLE (1U << 21) /**< single [0,inf] pred, no
* offset tracking */
#define ROSE_ROLE_PRED_ROOT (1U << 22) /**< pred is root or anchored
* root, and we have bounds */
#define ROSE_ROLE_PRED_ANY (1U << 23) /**< any of our preds can match */
#define ROSE_ROLE_PRED_CLEAR_MASK (~(ROSE_ROLE_PRED_NONE \
| ROSE_ROLE_PRED_SIMPLE \
| ROSE_ROLE_PRED_ROOT \
| ROSE_ROLE_PRED_ANY))
#define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one
* whole byte (OWB) (streaming only). Other
* values in OWB are reserved for zombie
* status */
#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
* prefix checks */
// Structure representing a literal role.
struct RoseRole {
u32 flags;
u32 predOffset; // either offset of pred sparse iterator, or
// (for ROSE_ROLE_PRED_ROOT) index of single RosePred.
rose_group groups; /**< groups to enable when role is set (groups of succ
* literals) */
ReportID reportId; // report ID, or MO_INVALID_IDX
u32 stateIndex; /**< index into state multibit, or MMB_INVALID. Roles do not
* require a state bit if they are terminal */
u32 suffixEvent; // from enum mqe_event
u8 depth; /**< depth of this vertex from root in the tree, or 255 if greater.
*/
u32 suffixOffset; /**< suffix nfa: 0 if no suffix associated with the role,
* relative to base of the rose. */
ReportID leftfixReport; // (pre|in)fix report to check, or MO_INVALID_IDX.
u32 leftfixLag; /**< distance behind match where we need to check the
* leftfix engine status */
u32 leftfixQueue; /**< queue index of the prefix/infix before role */
u32 infixTriggerOffset; /* offset to list of infix roses to trigger */
u32 sidecarEnableOffset; /**< offset to list of sidecar literals to enable
*/
u32 somAdjust; /**< som for the role is offset from end match offset */
u32 lookaroundIndex; /**< index of lookaround offset/reach in table, or
* MO_INVALID_IDX. */
u32 lookaroundCount; /**< number of lookaround entries. */
};
// Structure representing a predecessor relationship
struct RosePred {
u32 role; // index of predecessor role
u32 minBound; // min bound on distance from pred (_ANCH ->absolute offset)
u32 maxBound; /* max bound on distance from pred, or ROSE_BOUND_INF
* (_ANCH -> absolute offset ) */
u8 historyCheck; // from enum RoseRoleHistory
};
// Structure mapping between the dense index produced by the literal sparse
// iterator and a list of roles.
struct RoseIterMapping {
u32 offset; // offset into iter role table
u32 count; // number of roles
};
struct RoseIterRole {
u32 role;
u32 pred;
};
/**
* \brief Rose state offsets.
*
* Stores pre-calculated offsets (in bytes) to MOST of the state structures
* used by Rose, relative to the start of stream state.
*
* State not covered by this structure includes:
*
* -# the RoseRuntimeState structure
* -# the role state multibit
*/
struct RoseStateOffsets {
/** History buffer.
*
* First byte is an 8-bit count of the number of valid history bytes
* available, followed by the history itself. Max size of history is
* RoseEngine::historyRequired. */
u32 history;
/** Exhausted bitvector.
*
* 1 bit per exhaustible key (used by Highlander mode). If a bit is set,
* reports with that ekey should not be delivered to the user. */
u32 exhausted;
/** Sidecar state. */
u32 sidecar;
/** Size of sidecar state, in bytes. */
u32 sidecar_size;
/** Multibit for active suffix/outfix engines. */
u32 activeLeafArray;
/** Multibit for active Rose (prefix/infix) engines. */
u32 activeLeftArray;
/** Size of the active Rose array multibit, in bytes. */
u32 activeLeftArray_size;
/** Table of lag information (stored as one byte per engine) for active
* Rose leftfix engines. */
u32 leftfixLagTable;
/** State for anchored matchers (McClellan DFAs). */
u32 anchorState;
/** Packed Rose groups value. */
u32 groups;
/** Size of packed Rose groups value, in bytes. */
u32 groups_size;
/** State for floating literal matcher (managed by HWLM). */
u32 floatingMatcherState;
/** Packed SOM location slots. */
u32 somLocation;
/** Multibit guarding SOM location slots. */
u32 somValid;
/** Multibit guarding SOM location slots. */
u32 somWritable;
/** Total size of Rose state, in bytes. */
u32 end;
};
struct RoseBoundaryReports {
u32 reportEodOffset; /**< 0 if no reports lits, otherwise offset of
* MO_INVALID_IDX terminated list to report at EOD */
u32 reportZeroOffset; /**< 0 if no reports lits, otherwise offset of
* MO_INVALID_IDX terminated list to report at offset
* 0 */
u32 reportZeroEodOffset; /**< 0 if no reports lits, otherwise offset of
* MO_INVALID_IDX terminated list to report if eod
* is at offset 0. Superset of other lists. */
};
/* NFA Queue Assignment
*
* --- 0
* (|) chained mpv (if present)
* #
* --- outfixBeginQueue -
* | outfixes. enabled at offset 0.
* |
* #
* --- outfixEndQueue -
* | suffixes. enabled by rose roles.
* |
* #
* --- leftfixBeginQueue -
* | prefixes
* |
* #
* --- ?
* | infixes
* |
* #
*/
#define ROSE_RUNTIME_FULL_ROSE 0
#define ROSE_RUNTIME_PURE_LITERAL 1
#define ROSE_RUNTIME_SINGLE_OUTFIX 2
// Runtime structure header for Rose.
// In memory, we follow this with:
// 1a. anchored 'literal' matcher table
// 1b. floating literal matcher table
// 1c. sidecar 'literal' matcher table
// 1d. eod-anchored literal matcher table
// 1e. small block table
// 2. array of RoseLiteral (literalCount entries)
// 3. array of RoseRole (roleCount entries)
// 4. array of RosePred (predCount entries)
// 8. array of NFA offsets, one per queue
// 9. array of state offsets, one per queue (+)
// 10. array of role ids for the set of all root roles
// 12. multi-direct report array
/*
* (+) stateOffset array note: Offsets in the array are either into the stream
* state (normal case) or into the tstate region of scratch (for transient rose
* nfas). Rose nfa info table can distinguish the cases.
*/
struct RoseEngine {
u8 hasFloatingDirectReports; // has at least one floating direct report literal
u8 noFloatingRoots; /* only need to run the anchored table if something
* matched in the anchored table */
u8 requiresEodCheck; /* stuff happens at eod time */
u8 requiresEodSideCatchup; /* we need to do a sidecar catchup before eod
* checks */
u8 hasEodEventLiteral; // fires a ROSE_EVENT literal at eod time.
u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
in small block scans. */
u8 runtimeImpl; /**< can we just run the floating table or a single outfix?
* or do we need a full rose? */
u8 mpvTriggeredByLeaf; /**< need to check (suf|out)fixes for mpv trigger */
u8 canExhaust; /**< every pattern has an exhaustion key */
u8 hasSom; /**< has at least one pattern which tracks SOM. */
u8 somHorizon; /**< width in bytes of SOM offset storage (governed by
SOM precision) */
u8 simpleCallback; /**< has only external reports with no bounds checks,
plus no exhaustion keys */
u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */
u32 historyRequired; /**< max amount of history required for streaming */
u32 ekeyCount; /**< number of exhaustion keys */
u32 dkeyCount; /**< number of dedupe keys */
u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
* report ids */
u32 somLocationCount; /**< number of som locations required */
u32 rolesWithStateCount; // number of roles with entries in state bitset
u32 stateSize; /* size of the state bitset
* WARNING: not the size of the rose state */
u32 anchorStateSize; /* size of the state for the anchor dfas */
u32 nfaStateSize; /* total size of the state for the mask/rose nfas */
u32 tStateSize; /* total size of the state for transient rose nfas */
u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch;
* used for sizing scratch only. */
u32 smallWriteOffset; /**< offset of small-write matcher */
u32 amatcherOffset; // offset of the anchored literal matcher (bytes)
u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
u32 smatcherOffset; // offset of the sidecar literal matcher (bytes)
u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
* involved with the anchored table to produce a full
* match. */
u32 fmatcherMinWidth; /**< minimum number of bytes required for a pattern
* involved with the floating table to produce a full
* match. */
u32 eodmatcherMinWidth; /**< minimum number of bytes required for a pattern
* involved with the eod table to produce a full
* match. */
u32 amatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
* produce a match for a pattern involved
* with the anchored table. */
u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
* produce a match for a pattern involved
* with the anchored table. */
u32 intReportOffset; /**< offset of array of internal_report structures */
u32 intReportCount; /**< number of internal_report structures */
u32 literalOffset; // offset of RoseLiteral array (bytes)
u32 literalCount; // number of RoseLiteral entries [NOT number of literals]
u32 sideOffset; /**< offset of RoseSide array (bytes), indexed by
*sidecar ids */
u32 sideCount; /**< number of RoseSide entries */
u32 multidirectOffset; /**< offset of multi-direct report list. */
u32 activeArrayCount; //number of nfas tracked in the active array
u32 activeLeftCount; //number of nfas tracked in the active rose array
u32 queueCount; /**< number of nfa queues */
u32 roleOffset; // offset of RoseRole array (bytes)
u32 roleCount; // number of RoseRole entries
u32 predOffset; // offset of RosePred array (bytes)
u32 predCount; // number of RosePred entries
u32 rootRoleOffset;
u32 rootRoleCount;
u32 leftOffset;
u32 roseCount;
u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values)
u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32
* bytes each) */
u32 eodIterOffset; // or 0 if no eod iterator
u32 eodIterMapOffset;
u32 lastByteHistoryIterOffset; // if non-zero
/** \brief Minimum number of bytes required to match. */
u32 minWidth;
/** \brief Minimum number of bytes required to match, excluding boundary
* reports. */
u32 minWidthExcludingBoundaries;
u32 maxBiAnchoredWidth; /* ROSE_BOUND_INF if any non bianchored patterns
* present */
u32 anchoredDistance; // region to run the anchored table over
u32 anchoredMinDistance; /* start of region to run anchored table over */
u32 floatingDistance; /* end of region to run the floating table over
ROSE_BOUND_INF if not bounded */
u32 floatingMinDistance; /* start of region to run floating table over */
u32 smallBlockDistance; /* end of region to run the floating table over
ROSE_BOUND_INF if not bounded */
u32 maxSafeAnchoredDROffset; /* the maximum offset that we can safely raise
* a direct report from the anchored table
* without delaying it */
u32 floatingMinLiteralMatchOffset; /* the minimum offset that we can get a
* 'valid' match from the floating
* table */
u32 nfaInfoOffset; /* offset to the nfa info offset array */
u32 anchoredReportMapOffset; /* am_log index --> reportid */
u32 anchoredReportInverseMapOffset; /* reportid --> am_log index */
rose_group initialGroups;
u32 size; // (bytes)
u32 anchoredMatches; /* number of anchored roles generating matches */
u32 delay_count; /* number of delayed literal ids. */
u32 delay_slot_size; /* size of delay slot mmbit. */
u32 delay_base_id; /* literal id of the first delayed literal.
* delayed literal ids are contiguous */
u32 anchored_count; /* number of anchored literal ids */
u32 anchored_base_id; /* literal id of the first literal in the A table.
* anchored literal ids are contiguous */
u32 nonbenefits_base_id; /* first literal id without benefit conf.
* contiguous, blah, blah */
u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can
* usefully be reported */
u32 delayRebuildLength; /* length of the history region which needs to be
* rescanned when we are doing a delayed literal
* rebuild scan. */
struct RoseStateOffsets stateOffsets;
struct RoseBoundaryReports boundary;
u32 totalNumLiterals; /* total number of literals including dr */
u32 asize; /* size of the atable */
u32 initSideEnableOffset; /* sidecar literals enabled initially */
u32 outfixBeginQueue; /* first outfix queue */
u32 outfixEndQueue; /* one past the last outfix queue */
u32 leftfixBeginQueue; /* first prefix/infix queue */
u32 initMpvNfa; /* (allegedly chained) mpv to force on at init */
u32 rosePrefixCount; /* number of rose prefixes */
u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */
u32 ematcherRegionSize; /* max region size to pass to ematcher */
u32 literalBenefitsOffsets; /* offset to array of benefits indexed by lit
id */
u32 somRevCount; /**< number of som reverse nfas */
u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
u32 nfaRegionBegin; /* start of the nfa region, debugging only */
u32 nfaRegionEnd; /* end of the nfa region, debugging only */
u32 group_weak_end; /* end of weak groups, debugging only */
u32 floatingStreamState; // size in bytes
u32 eodLiteralId; // literal ID for eod ROSE_EVENT if used, otherwise 0.
struct scatter_full_plan state_init;
};
struct lit_benefits {
union {
u64a a64[MAX_MASK2_WIDTH/sizeof(u64a)];
u8 a8[MAX_MASK2_WIDTH];
} and_mask;
union {
u64a e64[MAX_MASK2_WIDTH/sizeof(u64a)];
u8 e8[MAX_MASK2_WIDTH];
} expected;
};
#if defined(_WIN32)
#pragma pack(push, 1)
#endif
// Rose runtime state
struct RoseRuntimeState {
u8 stored_depth; /* depth at stream boundary */
u8 flags; /* high bit true if delay rebuild needed */
u8 broken; /* user has requested that we stop matching */
#if defined(_WIN32)
};
#pragma pack(pop)
#else
} __attribute__((packed));
#endif
struct ALIGN_CL_DIRECTIVE anchored_matcher_info {
u32 next_offset; /* relative to this, 0 for end */
u32 state_offset; /* relative to anchorState */
u32 anchoredMinDistance; /* start of region to run anchored table over */
};
static really_inline
const struct anchored_matcher_info *getALiteralMatcher(
const struct RoseEngine *t) {
if (!t->amatcherOffset) {
return NULL;
}
const char *lt = (const char *)t + t->amatcherOffset;
assert(ISALIGNED_CL(lt));
return (const struct anchored_matcher_info *)lt;
}
struct HWLM;
static really_inline
const struct HWLM *getFLiteralMatcher(const struct RoseEngine *t) {
if (!t->fmatcherOffset) {
return NULL;
}
const char *lt = (const char *)t + t->fmatcherOffset;
assert(ISALIGNED_CL(lt));
return (const struct HWLM *)lt;
}
static really_inline
const void *getSLiteralMatcher(const struct RoseEngine *t) {
if (!t->smatcherOffset) {
return NULL;
}
const char *st = (const char *)t + t->smatcherOffset;
assert(ISALIGNED_N(st, 8));
return st;
}
static really_inline
const void *getELiteralMatcher(const struct RoseEngine *t) {
if (!t->ematcherOffset) {
return NULL;
}
const char *et = (const char *)t + t->ematcherOffset;
assert(ISALIGNED_N(et, 8));
return et;
}
static really_inline
const void *getSBLiteralMatcher(const struct RoseEngine *t) {
if (!t->sbmatcherOffset) {
return NULL;
}
const char *matcher = (const char *)t + t->sbmatcherOffset;
assert(ISALIGNED_N(matcher, 8));
return matcher;
}
static really_inline
const struct RoseLiteral *getLiteralTable(const struct RoseEngine *t) {
const struct RoseLiteral *tl
= (const struct RoseLiteral *)((const char *)t + t->literalOffset);
assert(ISALIGNED_N(tl, 4));
return tl;
}
static really_inline
const struct RoseSide *getSideEntryTable(const struct RoseEngine *t) {
const struct RoseSide *rs
= (const struct RoseSide *)((const char *)t + t->sideOffset);
assert(ISALIGNED(rs));
return rs;
}
static really_inline
const struct RoseRole *getRoleTable(const struct RoseEngine *t) {
const struct RoseRole *r
= (const struct RoseRole *)((const char *)t + t->roleOffset);
assert(ISALIGNED_N(r, 4));
return r;
}
static really_inline
const struct RosePred *getPredTable(const struct RoseEngine *t) {
const struct RosePred *p
= (const struct RosePred *)((const char *)t + t->predOffset);
assert(ISALIGNED_N(p, 4));
return p;
}
static really_inline
const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
const struct LeftNfaInfo *r
= (const struct LeftNfaInfo *)((const char *)t + t->leftOffset);
assert(ISALIGNED_N(r, 4));
return r;
}
struct mmbit_sparse_iter; // forward decl
static really_inline
const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) {
assert(t->activeLeftIterOffset);
const struct mmbit_sparse_iter *it = (const struct mmbit_sparse_iter *)
((const char *)t + t->activeLeftIterOffset);
assert(ISALIGNED_N(it, 4));
return it;
}
static really_inline
const u32 *getRootRoleTable(const struct RoseEngine *t) {
const u32 *r = (const u32 *)((const char *)t + t->rootRoleOffset);
assert(ISALIGNED_N(r, 4));
return r;
}
static really_inline
const struct lit_benefits *getLiteralBenefitsTable(
const struct RoseEngine *t) {
return (const struct lit_benefits *)
((const char *)t + t->literalBenefitsOffsets);
}
static really_inline
const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) {
const struct NfaInfo *infos
= (const struct NfaInfo *)((const char *)t + t->nfaInfoOffset);
assert(ISALIGNED_N(infos, sizeof(u32)));
return &infos[qi];
}
static really_inline
const struct NFA *getNfaByInfo(const struct RoseEngine *t,
const struct NfaInfo *info) {
return (const struct NFA *)((const char *)t + info->nfaOffset);
}
static really_inline
const struct NFA *getNfaByQueue(const struct RoseEngine *t, u32 qi) {
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
return getNfaByInfo(t, info);
}
static really_inline
u32 queueToLeftIndex(const struct RoseEngine *t, u32 qi) {
assert(qi >= t->leftfixBeginQueue);
return qi - t->leftfixBeginQueue;
}
static really_inline
const struct LeftNfaInfo *getLeftInfoByQueue(const struct RoseEngine *t,
u32 qi) {
const struct LeftNfaInfo *infos = getLeftTable(t);
return &infos[queueToLeftIndex(t, qi)];
}
struct SmallWriteEngine;
static really_inline
const struct SmallWriteEngine *getSmallWrite(const struct RoseEngine *t) {
if (!t->smallWriteOffset) {
return NULL;
}
const struct SmallWriteEngine *smwr =
(const struct SmallWriteEngine *)((const char *)t + t->smallWriteOffset);
return smwr;
}
#endif // ROSE_INTERNAL_H

View File

@@ -0,0 +1,101 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_SIDECAR_RUNTIME_H_1F746F6F237176
#define ROSE_SIDECAR_RUNTIME_H_1F746F6F237176
#include "hwlm/hwlm.h"
#include "scratch.h"
#include "sidecar/sidecar.h"
#include "rose_common.h"
#include "ue2common.h"
// Callback defined in match.c
void roseSidecarCallback(u64a offset, u32 side_id, void *context);
static really_inline
void catchup_sidecar(struct RoseContext *tctxt, u64a end) {
DEBUG_PRINTF("catching up the sidecar from %llu to %llu\n",
tctxt->side_curr, end);
const struct sidecar *sidecar = getSLiteralMatcher(tctxt->t);
struct hs_scratch *scratch = tctxtToScratch(tctxt);
struct core_info *ci = &scratch->core_info;
if (!sidecar || tctxt->side_curr == end) {
return;
}
const u8 *start;
if (tctxt->side_curr >= ci->buf_offset) {
start = ci->buf + tctxt->side_curr - ci->buf_offset;
assert(end <= ci->buf_offset + ci->len);
} else {
/* at eod time we are called running over the histroy */
start = ci->hbuf + tctxt->side_curr - ci->buf_offset + ci->hlen;
assert(end <= ci->buf_offset);
}
size_t len = end - tctxt->side_curr;
DEBUG_PRINTF("enabled-->%02hhx\n", *(u8 *)&scratch->side_enabled.arb);
sidecarExec(sidecar, start, len, &scratch->side_enabled.arb,
scratch->side_scratch, tctxt->side_curr, roseSidecarCallback,
tctxt);
tctxt->side_curr = end;
DEBUG_PRINTF("finished catching up the sidecar to %llu\n", end);
}
static rose_inline
void enable_sidecar(struct RoseContext *tctxt, const struct RoseRole *tr) {
assert(tr->sidecarEnableOffset);
const struct sidecar *sidecar = getSLiteralMatcher(tctxt->t);
assert(sidecar);
struct hs_scratch *scratch = tctxtToScratch(tctxt);
DEBUG_PRINTF("welcome to the sidecar\n");
sidecarEnabledUnion(sidecar, &scratch->side_enabled.arb,
(const void *)((const char *)tctxt->t + tr->sidecarEnableOffset));
}
static really_inline
void sidecar_enabled_populate(const struct RoseEngine *t,
struct hs_scratch *scratch, const u8 *state) {
DEBUG_PRINTF("enabled-->%02hhx\n", *(state + t->stateOffsets.sidecar));
memcpy(&scratch->side_enabled, state + t->stateOffsets.sidecar,
t->stateOffsets.sidecar_size);
DEBUG_PRINTF("enabled-->%02hhx\n", *(u8 *)&scratch->side_enabled.arb);
}
static really_inline
void sidecar_enabled_preserve(const struct RoseEngine *t,
const struct hs_scratch *scratch, u8 *state) {
memcpy(state + t->stateOffsets.sidecar, &scratch->side_enabled,
t->stateOffsets.sidecar_size);
}
#endif /* ROSE_SIDECAR_RUNTIME_H_1F746F6F237176 */

41
src/rose/rose_types.h Normal file
View File

@@ -0,0 +1,41 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ROSE_TYPES_H
#define ROSE_TYPES_H
#include "ue2common.h"
struct RoseEngine;
// Note: identical signature to NfaCallback
typedef int (*RoseCallback)(u64a offset, ReportID id, void *context);
typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id,
void *context);
#endif

217
src/rose/runtime.h Normal file
View File

@@ -0,0 +1,217 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime functions shared between various Rose runtime code.
*/
#ifndef ROSE_RUNTIME_H
#define ROSE_RUNTIME_H
#include "scratch.h"
#include "rose_internal.h"
#include "util/exhaust.h" // for isExhausted
#include "util/internal_report.h"
#include "util/partial_store.h"
/*
* ROSE STATE LAYOUT:
* state multibit
* runtime state structure
* full history table
* last history table
* short history table
* short queues (two multibits)
* last queues (two multibits)
* active array
* delay rb dirty
* nfa state
*/
#define rose_inline really_inline
/** \brief Fetch runtime state ptr. */
static really_inline
struct RoseRuntimeState *getRuntimeState(u8 *state) {
struct RoseRuntimeState *rs = (struct RoseRuntimeState *)(state);
assert(ISALIGNED_N(rs, 8));
return rs;
}
static really_inline
const void *getByOffset(const struct RoseEngine *t, u32 offset) {
assert(offset < t->size);
return (const u8 *)t + offset;
}
static really_inline
void *getRoleState(u8 *state) {
return state + sizeof(struct RoseRuntimeState);
}
/** \brief Fetch the active array for suffix nfas. */
static really_inline
u8 *getActiveLeafArray(const struct RoseEngine *t, u8 *state) {
return state + t->stateOffsets.activeLeafArray;
}
/** \brief Fetch the active array for rose nfas. */
static really_inline
u8 *getActiveLeftArray(const struct RoseEngine *t, u8 *state) {
return state + t->stateOffsets.activeLeftArray;
}
static really_inline
const u32 *getAnchoredInverseMap(const struct RoseEngine *t) {
return (const u32 *)(((const u8 *)t) + t->anchoredReportInverseMapOffset);
}
static really_inline
const u32 *getAnchoredMap(const struct RoseEngine *t) {
return (const u32 *)(((const u8 *)t) + t->anchoredReportMapOffset);
}
static really_inline
rose_group loadGroups(const struct RoseEngine *t, const u8 *state) {
return partial_load_u64a(state + t->stateOffsets.groups,
t->stateOffsets.groups_size);
}
static really_inline
void storeGroups(const struct RoseEngine *t, u8 *state, rose_group groups) {
partial_store_u64a(state + t->stateOffsets.groups, groups,
t->stateOffsets.groups_size);
}
static really_inline
u8 * getFloatingMatcherState(const struct RoseEngine *t, u8 *state) {
return state + t->stateOffsets.floatingMatcherState;
}
static really_inline
u8 *getLeftfixLagTable(const struct RoseEngine *t, u8 *state) {
return state + t->stateOffsets.leftfixLagTable;
}
static really_inline
const u8 *getLeftfixLagTableConst(const struct RoseEngine *t, const u8 *state) {
return state + t->stateOffsets.leftfixLagTable;
}
static rose_inline
char roseSuffixInfoIsExhausted(const struct RoseEngine *t,
const struct NfaInfo *info,
const char *exhausted) {
if (!info->ekeyListOffset) {
return 0;
}
DEBUG_PRINTF("check exhaustion -> start at %u\n", info->ekeyListOffset);
/* END_EXHAUST terminated list */
const u32 *ekeys = (const u32 *)((const char *)t + info->ekeyListOffset);
while (*ekeys != END_EXHAUST) {
DEBUG_PRINTF("check %u\n", *ekeys);
if (!isExhausted(exhausted, *ekeys)) {
DEBUG_PRINTF("not exhausted -> alive\n");
return 0;
}
++ekeys;
}
DEBUG_PRINTF("all ekeys exhausted -> dead\n");
return 1;
}
static really_inline
char roseSuffixIsExhausted(const struct RoseEngine *t, u32 qi,
const char *exhausted) {
DEBUG_PRINTF("check queue %u\n", qi);
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
return roseSuffixInfoIsExhausted(t, info, exhausted);
}
static really_inline
u32 has_chained_nfas(const struct RoseEngine *t) {
return t->outfixBeginQueue;
}
/** \brief Fetch \ref internal_report structure for this internal ID. */
static really_inline
const struct internal_report *getInternalReport(const struct RoseEngine *t,
ReportID intId) {
const struct internal_report *reports =
(const struct internal_report *)((const u8 *)t + t->intReportOffset);
assert(intId < t->intReportCount);
return reports + intId;
}
static really_inline
const struct RoseRole *getRoleByOffset(const struct RoseEngine *t, u32 offset) {
const struct RoseRole *tr = (const void *)((const char *)t + offset);
assert((size_t)(tr - getRoleTable(t)) < t->roleCount);
DEBUG_PRINTF("get root role %zu\n", tr - getRoleTable(t));
return tr;
}
#define ANCHORED_MATCH_SENTINEL (~0U)
static really_inline
void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) {
DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
tctxt->lastMatchOffset);
assert(offset >= tctxt->minMatchOffset);
assert(offset >= tctxt->lastMatchOffset);
tctxt->lastMatchOffset = offset;
}
static really_inline
void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) {
DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
tctxt->minMatchOffset);
assert(offset >= tctxt->minMatchOffset);
assert(offset >= tctxt->minNonMpvMatchOffset);
tctxt->minMatchOffset = offset;
tctxt->minNonMpvMatchOffset = offset;
}
static really_inline
void updateMinMatchOffsetFromMpv(struct RoseContext *tctxt, u64a offset) {
DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
tctxt->minMatchOffset);
assert(offset >= tctxt->minMatchOffset);
assert(tctxt->minNonMpvMatchOffset >= tctxt->minMatchOffset);
tctxt->minMatchOffset = offset;
tctxt->minNonMpvMatchOffset = MAX(tctxt->minNonMpvMatchOffset, offset);
}
#endif

582
src/rose/stream.c Normal file
View File

@@ -0,0 +1,582 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "catchup.h"
#include "counting_miracle.h"
#include "infix.h"
#include "match.h"
#include "miracle.h"
#include "hwlm/hwlm.h"
#include "nfa/mcclellan.h"
#include "nfa/nfa_api.h"
#include "nfa/nfa_api_queue.h"
#include "nfa/nfa_internal.h"
#include "util/fatbit.h"
#include "rose_sidecar_runtime.h"
#include "rose.h"
static rose_inline
void runAnchoredTableStream(const struct RoseEngine *t, const void *atable,
size_t alen, u64a offset,
struct hs_scratch *scratch) {
char *state_base
= (char *)scratch->tctxt.state + t->stateOffsets.anchorState;
const struct anchored_matcher_info *curr = atable;
do {
DEBUG_PRINTF("--anchored nfa (+%u) no %u so %u\n",
curr->anchoredMinDistance, curr->next_offset,
curr->state_offset);
const struct NFA *nfa
= (const struct NFA *)((const char *)curr + sizeof(*curr));
assert(ISALIGNED_CL(nfa));
assert(isMcClellanType(nfa->type));
char *state = state_base + curr->state_offset;
char start = 0;
size_t adj = 0;
if (offset <= curr->anchoredMinDistance) {
adj = curr->anchoredMinDistance - offset;
if (adj >= alen) {
goto next_nfa;
}
start = 1;
} else {
// (No state decompress necessary.)
if (nfa->type == MCCLELLAN_NFA_8) {
if (!*(u8 *)state) {
goto next_nfa;
}
} else {
if (!*(u16 *)state) {
goto next_nfa;
}
}
}
if (nfa->type == MCCLELLAN_NFA_8) {
nfaExecMcClellan8_SimpStream(nfa, state, scratch->core_info.buf,
start, adj, alen, roseAnchoredCallback,
&scratch->tctxt);
} else {
nfaExecMcClellan16_SimpStream(nfa, state, scratch->core_info.buf,
start, adj, alen, roseAnchoredCallback,
&scratch->tctxt);
}
next_nfa:
if (!curr->next_offset) {
break;
}
curr = (const void *)((const char *)curr + curr->next_offset);
} while (1);
}
static really_inline
void saveStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
q->offset, q->length, q->hlength, loc);
nfaQueueCompressState(nfa, q, loc);
}
static really_inline
u8 getByteBefore(const struct core_info *ci, s64a sp) {
if (sp > 0) { // in main buffer
assert(sp <= (s64a)ci->len);
return ci->buf[sp - 1];
}
// in history buffer
assert(-sp < (s64a)ci->hlen);
return ci->hbuf[ci->hlen + sp - 1];
}
/** \brief Return value for \ref roseScanForMiracles. */
enum MiracleAction {
MIRACLE_DEAD, //!< kill off this engine
MIRACLE_SAVED, //!< engine has been caught up and state saved
MIRACLE_CONTINUE //!< continue running and catch up engine
};
static really_inline
enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch, u32 qi,
const struct LeftNfaInfo *left,
const struct NFA *nfa) {
struct core_info *ci = &scratch->core_info;
const u32 qCount = t->queueCount;
struct mq *q = scratch->queues + qi;
const char q_active = fatbit_isset(scratch->aqa, qCount, qi);
DEBUG_PRINTF("q_active=%d\n", q_active);
const s64a begin_loc = q_active ? q_cur_loc(q) : 0;
const s64a end_loc = ci->len;
s64a miracle_loc;
if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
goto found_miracle;
}
if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
&miracle_loc)) {
goto found_miracle;
}
DEBUG_PRINTF("no miracle\n");
return MIRACLE_CONTINUE;
found_miracle:
DEBUG_PRINTF("miracle at %lld\n", miracle_loc);
if (left->infix) {
if (!q_active) {
DEBUG_PRINTF("killing infix\n");
return MIRACLE_DEAD;
}
DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc);
q_skip_forward_to(q, miracle_loc);
if (q->items[q->end - 1].type == MQE_START) {
DEBUG_PRINTF("miracle caused infix to die\n");
return MIRACLE_DEAD;
}
DEBUG_PRINTF("re-init infix state\n");
assert(q->items[q->cur].type == MQE_START);
q->items[q->cur].location = miracle_loc;
nfaQueueInitState(q->nfa, q);
} else {
if (miracle_loc > end_loc - t->historyRequired) {
u8 *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset;
u64a offset = ci->buf_offset + miracle_loc;
u8 key = offset ? getByteBefore(ci, miracle_loc) : 0;
DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset);
if (!nfaInitCompressedState(nfa, offset, streamState, key)) {
return MIRACLE_DEAD;
}
storeRoseDelay(t, state, left, (s64a)ci->len - miracle_loc);
return MIRACLE_SAVED;
}
DEBUG_PRINTF("re-init prefix (skip %lld->%lld)\n", begin_loc,
miracle_loc);
if (!q_active) {
fatbit_set(scratch->aqa, qCount, qi);
initRoseQueue(t, qi, left, &scratch->tctxt);
}
q->cur = q->end = 0;
pushQueueAt(q, 0, MQE_START, miracle_loc);
pushQueueAt(q, 1, MQE_TOP, miracle_loc);
nfaQueueInitState(q->nfa, q);
}
return MIRACLE_CONTINUE;
}
static really_inline
char roseCatchUpLeftfix(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch, u32 qi,
const struct LeftNfaInfo *left) {
assert(!left->transient); // active roses only
struct core_info *ci = &scratch->core_info;
const u32 qCount = t->queueCount;
struct mq *q = scratch->queues + qi;
const struct NFA *nfa = getNfaByQueue(t, qi);
if (nfaSupportsZombie(nfa)
&& ci->buf_offset /* prefix can be alive with no q */
&& !fatbit_isset(scratch->aqa, qCount, qi)
&& isZombie(t, state, left)) {
DEBUG_PRINTF("yawn - zombie\n");
return 1;
}
if (left->stopTable) {
enum MiracleAction mrv =
roseScanForMiracles(t, state, scratch, qi, left, nfa);
switch (mrv) {
case MIRACLE_DEAD:
return 0;
case MIRACLE_SAVED:
return 1;
default:
assert(mrv == MIRACLE_CONTINUE);
break;
}
}
if (!fatbit_set(scratch->aqa, qCount, qi)) {
initRoseQueue(t, qi, left, &scratch->tctxt);
s32 sp;
if (ci->buf_offset) {
sp = -(s32)loadRoseDelay(t, state, left);
} else {
sp = 0;
}
DEBUG_PRINTF("ci->len=%zu, sp=%d, historyRequired=%u\n", ci->len, sp,
t->historyRequired);
if ( ci->len - sp + 1 < t->historyRequired) {
// we'll end up safely in the history region.
DEBUG_PRINTF("safely in history, skipping\n");
storeRoseDelay(t, state, left, (s64a)ci->len - sp);
return 1;
}
pushQueueAt(q, 0, MQE_START, sp);
if (left->infix || ci->buf_offset + sp > 0) {
loadStreamState(nfa, q, sp);
} else {
pushQueueAt(q, 1, MQE_TOP, sp);
nfaQueueInitState(nfa, q);
}
} else {
DEBUG_PRINTF("queue already active\n");
if (q->end - q->cur == 1 && q_cur_type(q) == MQE_START) {
DEBUG_PRINTF("empty queue, start loc=%lld\n", q_cur_loc(q));
s64a last_loc = q_cur_loc(q);
if (ci->len - last_loc + 1 < t->historyRequired) {
// we'll end up safely in the history region.
DEBUG_PRINTF("safely in history, saving state and skipping\n");
saveStreamState(nfa, q, last_loc);
storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
return 1;
}
}
}
// Determine whether the byte before last_loc will be in the history
// buffer on the next stream write.
s64a last_loc = q_last_loc(q);
s64a leftovers = ci->len - last_loc;
if (leftovers + 1 >= t->historyRequired) {
u32 catchup_offset = left->maxLag ? left->maxLag - 1 : 0;
last_loc = (s64a)ci->len - catchup_offset;
}
if (left->infix) {
if (infixTooOld(q, last_loc)) {
DEBUG_PRINTF("infix died of old age\n");
return 0;
}
reduceQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth);
}
DEBUG_PRINTF("end scan at %lld\n", last_loc);
pushQueueNoMerge(q, MQE_END, last_loc);
#ifdef DEBUG
debugQueue(q);
#endif
char rv = nfaQueueExecRose(nfa, q, MO_INVALID_IDX);
if (!rv) { /* nfa is dead */
DEBUG_PRINTF("died catching up to stream boundary\n");
return 0;
} else {
DEBUG_PRINTF("alive, saving stream state\n");
if (nfaSupportsZombie(nfa) &&
nfaGetZombieStatus(nfa, q, last_loc) == NFA_ZOMBIE_ALWAYS_YES) {
DEBUG_PRINTF("not so fast - zombie\n");
setAsZombie(t, state, left);
} else {
saveStreamState(nfa, q, last_loc);
storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
}
}
return 1;
}
static rose_inline
void roseCatchUpLeftfixes(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch) {
if (!t->activeLeftIterOffset) {
// No sparse iter, no non-transient roses.
return;
}
// As per UE-1629, we catch up leftfix engines to:
// * current position (last location in the queue, or last location we
// executed to if the queue is empty) if that position (and the byte
// before so we can decompress the stream state) will be in the history
// buffer on the next stream write; OR
// * (stream_boundary - max_delay) other
u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
* left_table */
const u32 arCount = t->activeLeftCount;
const struct LeftNfaInfo *left_table = getLeftTable(t);
const struct mmbit_sparse_iter *it = getActiveLeftIter(t);
struct mmbit_sparse_state *s = scratch->sparse_iter_state;
u32 idx = 0;
u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, s);
for (; ri != MMB_INVALID;
ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, s)) {
const struct LeftNfaInfo *left = left_table + ri;
u32 qi = ri + t->leftfixBeginQueue;
DEBUG_PRINTF("leftfix %u of %u, maxLag=%u, infix=%d\n", ri, arCount,
left->maxLag, (int)left->infix);
if (!roseCatchUpLeftfix(t, state, scratch, qi, left)) {
DEBUG_PRINTF("removing rose %u from active list\n", ri);
DEBUG_PRINTF("groups old=%016llx mask=%016llx\n",
scratch->tctxt.groups, left->squash_mask);
scratch->tctxt.groups &= left->squash_mask;
mmbit_unset(ara, arCount, ri);
}
}
}
// Saves out stream state for all our active suffix NFAs.
static rose_inline
void roseSaveNfaStreamState(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch) {
struct mq *queues = scratch->queues;
u8 *aa = getActiveLeafArray(t, state);
u32 aaCount = t->activeArrayCount;
if (scratch->tctxt.mpv_inactive) {
DEBUG_PRINTF("mpv is dead as a doornail\n");
/* mpv if it exists is queue 0 */
mmbit_unset(aa, aaCount, 0);
}
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
qi = mmbit_iterate(aa, aaCount, qi)) {
DEBUG_PRINTF("saving stream state for qi=%u\n", qi);
struct mq *q = queues + qi;
// If it's active, it should have an active queue (as we should have
// done some work!)
assert(fatbit_isset(scratch->aqa, t->queueCount, qi));
const struct NFA *nfa = getNfaByQueue(t, qi);
saveStreamState(nfa, q, q_cur_loc(q));
}
}
static rose_inline
void ensureStreamNeatAndTidy(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch, size_t length,
u64a offset, u8 delay_rb_status) {
struct RoseContext *tctxt = &scratch->tctxt;
if (roseCatchUpTo(t, state, length + scratch->core_info.buf_offset, scratch,
0)
== HWLM_TERMINATE_MATCHING) {
return; /* dead; no need to clean up state. */
}
roseSaveNfaStreamState(t, state, scratch);
roseCatchUpLeftfixes(t, state, scratch);
roseFlushLastByteHistory(t, state, offset + length, tctxt);
tctxt->lastEndOffset = offset + length;
catchup_sidecar(tctxt, offset + length);
sidecar_enabled_preserve(t, scratch, state);
storeGroups(t, state, tctxt->groups);
struct RoseRuntimeState *rstate = getRuntimeState(state);
rstate->stored_depth = tctxt->depth;
rstate->flags = delay_rb_status;
}
static really_inline
void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable,
struct hs_scratch *scratch) {
assert(!can_stop_matching(scratch));
size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength);
const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len;
DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len);
hwlmExec(ftable, buf, len, 0, roseDelayRebuildCallback, scratch,
scratch->tctxt.groups);
assert(!can_stop_matching(scratch));
}
void roseStreamExec(const struct RoseEngine *t, u8 *state,
struct hs_scratch *scratch, RoseCallback callback,
RoseCallbackSom som_callback, void *ctx) {
DEBUG_PRINTF("OH HAI\n");
assert(t);
assert(state);
assert(scratch->core_info.hbuf);
assert(scratch->core_info.buf);
assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
< MAX_SPARSE_ITER_STATES);
size_t length = scratch->core_info.len;
u64a offset = scratch->core_info.buf_offset;
// We may have a maximum width (for engines constructed entirely
// of bi-anchored patterns). If this write would result in us progressing
// beyond this point, we cannot possibly match.
if (t->maxBiAnchoredWidth != ROSE_BOUND_INF
&& offset + length > t->maxBiAnchoredWidth) {
DEBUG_PRINTF("bailing, write would progress beyond maxBAWidth\n");
return;
}
struct RoseRuntimeState *rstate = getRuntimeState(state);
struct RoseContext *tctxt = &scratch->tctxt;
tctxt->t = t;
tctxt->depth = rstate->stored_depth;
tctxt->mpv_inactive = 0;
tctxt->groups = loadGroups(t, state);
tctxt->lit_offset_adjust = offset + 1; // index after last byte
tctxt->delayLastEndOffset = offset;
tctxt->lastEndOffset = offset;
tctxt->filledDelayedSlots = 0;
tctxt->state = state;
tctxt->cb = callback;
tctxt->cb_som = som_callback;
tctxt->userCtx = ctx;
tctxt->lastMatchOffset = 0;
tctxt->minMatchOffset = offset;
tctxt->minNonMpvMatchOffset = offset;
tctxt->next_mpv_offset = 0;
tctxt->curr_anchored_loc = MMB_INVALID;
tctxt->curr_row_offset = 0;
tctxt->side_curr = offset;
DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu\n",
scratch->core_info.hlen, scratch->core_info.len);
fatbit_clear(scratch->aqa);
scratch->am_log_sum = 0; /* clear the anchored logs */
scratch->al_log_sum = 0;
scratch->catchup_pq.qm_size = 0;
if (t->outfixBeginQueue != t->outfixEndQueue) {
streamInitSufPQ(t, state, scratch);
}
sidecar_enabled_populate(t, scratch, state);
u8 delay_rb_status = rstate->flags;
u32 alen = t->anchoredDistance > offset ?
MIN(length + offset, t->anchoredDistance) - offset : 0;
const struct anchored_matcher_info *atable = getALiteralMatcher(t);
if (atable && alen) {
DEBUG_PRINTF("BEGIN ANCHORED %zu/%u\n", scratch->core_info.hlen, alen);
runAnchoredTableStream(t, atable, alen, offset, scratch);
if (can_stop_matching(scratch)) {
goto exit;
}
resetAnchoredLog(t, scratch);
}
const struct HWLM *ftable = getFLiteralMatcher(t);
if (ftable) {
if (t->noFloatingRoots && tctxt->depth == 1) {
DEBUG_PRINTF("skip FLOATING: no inflight matches\n");
goto flush_delay_and_exit;
}
size_t flen = length;
if (t->floatingDistance != ROSE_BOUND_INF) {
flen = t->floatingDistance > offset ?
MIN(t->floatingDistance, length + offset) - offset : 0;
}
size_t hlength = scratch->core_info.hlen;
char rebuild = hlength && (delay_rb_status & DELAY_FLOAT_DIRTY)
&& (t->maxFloatingDelayedMatch == ROSE_BOUND_INF
|| offset < t->maxFloatingDelayedMatch);
DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
rebuild, delay_rb_status, t->maxFloatingDelayedMatch,
offset);
if (!flen) {
if (rebuild) { /* rebuild floating delayed match stuff */
do_rebuild(t, ftable, scratch);
}
goto flush_delay_and_exit;
}
if (rebuild) { /* rebuild floating delayed match stuff */
do_rebuild(t, ftable, scratch);
}
if (flen + offset <= t->floatingMinDistance) {
DEBUG_PRINTF("skip FLOATING: before floating min\n");
goto flush_delay_and_exit;
}
size_t start = 0;
if (offset < t->floatingMinDistance) {
// This scan crosses the floating min distance, so we can use that
// to set HWLM's "start" offset.
start = t->floatingMinDistance - offset;
}
DEBUG_PRINTF("start=%zu\n", start);
u8 *stream_state;
if (t->floatingStreamState) {
stream_state = getFloatingMatcherState(t, state);
} else {
stream_state = NULL;
}
DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
hwlmExecStreaming(ftable, scratch, flen, start, roseCallback, tctxt,
tctxt->groups, stream_state);
}
flush_delay_and_exit:
DEBUG_PRINTF("flushing floating\n");
if (cleanUpDelayed(length, offset, tctxt, &delay_rb_status)
== HWLM_TERMINATE_MATCHING) {
return;
}
exit:
DEBUG_PRINTF("CLEAN UP TIME\n");
if (!can_stop_matching(scratch)) {
ensureStreamNeatAndTidy(t, state, scratch, length, offset,
delay_rb_status);
}
DEBUG_PRINTF("DONE STREAMING SCAN, dirty = %hhu\n", delay_rb_status);
return;
}