mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
Rose: move more report handling work into program
Move report preconditions (bounds, exhaustion, etc) into program instructions and use a more direct path to the user match callback than the adaptor functions. Report handling has been moved to new file src/report.h. Reporting from EOD now uses the same instructions as normal report handling, rather than its own. Jump target tracking in rose_build_bytecode.cpp has been cleaned up.
This commit is contained in:
parent
94b33421ca
commit
060defe6c4
@ -384,6 +384,7 @@ set (hs_exec_SRCS
|
||||
src/ue2common.h
|
||||
src/alloc.c
|
||||
src/allocator.h
|
||||
src/report.h
|
||||
src/runtime.c
|
||||
src/fdr/fdr.c
|
||||
src/fdr/fdr.h
|
||||
|
531
src/report.h
Normal file
531
src/report.h
Normal file
@ -0,0 +1,531 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime functions to do with reports, inlined into callers.
|
||||
*/
|
||||
|
||||
#ifndef REPORT_H
|
||||
#define REPORT_H
|
||||
|
||||
#include "hs_internal.h"
|
||||
#include "hs_runtime.h"
|
||||
#include "scratch.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/callback.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "rose/runtime.h"
|
||||
#include "som/som_runtime.h"
|
||||
#include "util/exhaust.h"
|
||||
#include "util/fatbit.h"
|
||||
#include "util/internal_report.h"
|
||||
|
||||
static really_inline
|
||||
int satisfiesMinLength(u64a min_len, u64a from_offset,
|
||||
u64a to_offset) {
|
||||
assert(min_len);
|
||||
|
||||
if (from_offset == HS_OFFSET_PAST_HORIZON) {
|
||||
DEBUG_PRINTF("SOM beyond horizon\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("match len=%llu, min len=%llu\n", to_offset - from_offset,
|
||||
min_len);
|
||||
return to_offset - from_offset >= min_len;
|
||||
}
|
||||
|
||||
enum DedupeResult {
|
||||
DEDUPE_CONTINUE, //!< Continue with match, not a dupe.
|
||||
DEDUPE_SKIP, //!< Don't report this match, dupe or delayed due to SOM.
|
||||
DEDUPE_HALT //!< User instructed us to stop matching.
|
||||
};
|
||||
|
||||
static really_inline
|
||||
enum DedupeResult dedupeCatchup(const struct RoseEngine *rose,
|
||||
const struct internal_report *ri,
|
||||
struct hs_scratch *scratch, u64a offset,
|
||||
u64a from_offset, u64a to_offset,
|
||||
const char do_som) {
|
||||
DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u, do_som=%d\n", offset,
|
||||
from_offset, to_offset, ri->dkey, do_som);
|
||||
DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom);
|
||||
const u32 dkey = ri->dkey;
|
||||
if (!do_som && dkey == MO_INVALID_IDX) {
|
||||
DEBUG_PRINTF("nothing to do\n");
|
||||
return DEDUPE_CONTINUE;
|
||||
}
|
||||
|
||||
struct match_deduper *deduper = &scratch->deduper;
|
||||
if (offset != deduper->current_report_offset) {
|
||||
assert(deduper->current_report_offset == ~0ULL ||
|
||||
deduper->current_report_offset < offset);
|
||||
if (offset == deduper->current_report_offset + 1) {
|
||||
fatbit_clear(deduper->log[offset % 2]);
|
||||
} else {
|
||||
fatbit_clear(deduper->log[0]);
|
||||
fatbit_clear(deduper->log[1]);
|
||||
}
|
||||
|
||||
if (do_som && flushStoredSomMatches(scratch, offset)) {
|
||||
return DEDUPE_HALT;
|
||||
}
|
||||
deduper->current_report_offset = offset;
|
||||
}
|
||||
|
||||
if (dkey != MO_INVALID_IDX) {
|
||||
const u32 dkeyCount = rose->dkeyCount;
|
||||
const s32 offset_adj = ri->offsetAdjust;
|
||||
if (ri->type == EXTERNAL_CALLBACK || ri->quashSom) {
|
||||
DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
|
||||
assert(offset_adj == 0 || offset_adj == -1);
|
||||
if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) {
|
||||
/* we have already raised this report at this offset, squash
|
||||
* dupe match. */
|
||||
DEBUG_PRINTF("dedupe\n");
|
||||
return DEDUPE_SKIP;
|
||||
}
|
||||
} else if (do_som) {
|
||||
/* SOM external event */
|
||||
DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
|
||||
assert(offset_adj == 0 || offset_adj == -1);
|
||||
u64a *starts = deduper->som_start_log[to_offset % 2];
|
||||
if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) {
|
||||
starts[dkey] = MIN(starts[dkey], from_offset);
|
||||
} else {
|
||||
starts[dkey] = from_offset;
|
||||
}
|
||||
DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]);
|
||||
|
||||
if (offset_adj) {
|
||||
deduper->som_log_dirty |= 1;
|
||||
} else {
|
||||
deduper->som_log_dirty |= 2;
|
||||
}
|
||||
|
||||
return DEDUPE_SKIP;
|
||||
}
|
||||
}
|
||||
|
||||
return DEDUPE_CONTINUE;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
enum DedupeResult dedupeCatchupSom(const struct RoseEngine *rose,
|
||||
const struct internal_report *ri,
|
||||
struct hs_scratch *scratch, u64a offset,
|
||||
u64a from_offset, u64a to_offset) {
|
||||
DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u\n", offset,
|
||||
from_offset, to_offset, ri->dkey);
|
||||
DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom);
|
||||
|
||||
struct match_deduper *deduper = &scratch->deduper;
|
||||
if (offset != deduper->current_report_offset) {
|
||||
assert(deduper->current_report_offset == ~0ULL ||
|
||||
deduper->current_report_offset < offset);
|
||||
if (offset == deduper->current_report_offset + 1) {
|
||||
fatbit_clear(deduper->log[offset % 2]);
|
||||
} else {
|
||||
fatbit_clear(deduper->log[0]);
|
||||
fatbit_clear(deduper->log[1]);
|
||||
}
|
||||
|
||||
if (flushStoredSomMatches(scratch, offset)) {
|
||||
return DEDUPE_HALT;
|
||||
}
|
||||
deduper->current_report_offset = offset;
|
||||
}
|
||||
|
||||
const u32 dkey = ri->dkey;
|
||||
if (dkey != MO_INVALID_IDX) {
|
||||
const u32 dkeyCount = rose->dkeyCount;
|
||||
const s32 offset_adj = ri->offsetAdjust;
|
||||
if (ri->quashSom) {
|
||||
DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
|
||||
assert(offset_adj == 0 || offset_adj == -1);
|
||||
if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) {
|
||||
/* we have already raised this report at this offset, squash
|
||||
* dupe match. */
|
||||
DEBUG_PRINTF("dedupe\n");
|
||||
return DEDUPE_SKIP;
|
||||
}
|
||||
} else {
|
||||
/* SOM external event */
|
||||
DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
|
||||
assert(offset_adj == 0 || offset_adj == -1);
|
||||
u64a *starts = deduper->som_start_log[to_offset % 2];
|
||||
if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) {
|
||||
starts[dkey] = MIN(starts[dkey], from_offset);
|
||||
} else {
|
||||
starts[dkey] = from_offset;
|
||||
}
|
||||
DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]);
|
||||
|
||||
if (offset_adj) {
|
||||
deduper->som_log_dirty |= 1;
|
||||
} else {
|
||||
deduper->som_log_dirty |= 2;
|
||||
}
|
||||
|
||||
return DEDUPE_SKIP;
|
||||
}
|
||||
}
|
||||
|
||||
return DEDUPE_CONTINUE;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch,
|
||||
char is_simple, char do_som) {
|
||||
assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
|
||||
assert(scratch);
|
||||
assert(scratch->magic == SCRATCH_MAGIC);
|
||||
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
const struct RoseEngine *rose = ci->rose;
|
||||
DEBUG_PRINTF("internal report %u\n", id);
|
||||
const struct internal_report *ri = getInternalReport(rose, id);
|
||||
|
||||
assert(isExternalReport(ri)); /* only external reports should reach here */
|
||||
|
||||
s32 offset_adj = ri->offsetAdjust;
|
||||
u64a to_offset = offset;
|
||||
u64a from_offset = 0;
|
||||
|
||||
u32 flags = 0;
|
||||
#ifndef RELEASE_BUILD
|
||||
if (offset_adj) {
|
||||
// alert testing tools that we've got adjusted matches
|
||||
flags |= HS_MATCH_FLAG_ADJUSTED;
|
||||
}
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
|
||||
"offsetAdj=%d\n", offset, id, ri->type, ri->onmatch,
|
||||
offset_adj);
|
||||
|
||||
if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */
|
||||
DEBUG_PRINTF("pre broken - halting\n");
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
if (!is_simple && ri->hasBounds) {
|
||||
assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET);
|
||||
assert(ri->minOffset <= ri->maxOffset);
|
||||
if (offset < ri->minOffset || offset > ri->maxOffset) {
|
||||
DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n",
|
||||
offset, ri->minOffset, ri->maxOffset);
|
||||
return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) {
|
||||
DEBUG_PRINTF("ate exhausted match\n");
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
if (ri->type == EXTERNAL_CALLBACK) {
|
||||
from_offset = 0;
|
||||
} else if (do_som) {
|
||||
from_offset = handleSomExternal(scratch, ri, to_offset);
|
||||
}
|
||||
|
||||
to_offset += offset_adj;
|
||||
assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
|
||||
|
||||
if (do_som && ri->minLength) {
|
||||
if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) {
|
||||
return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
|
||||
}
|
||||
if (ri->quashSom) {
|
||||
from_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
|
||||
from_offset, to_offset, ri->onmatch, ci->userContext);
|
||||
|
||||
int halt = 0;
|
||||
|
||||
enum DedupeResult dedupe_rv = dedupeCatchup(rose, ri, scratch, offset,
|
||||
from_offset, to_offset, do_som);
|
||||
switch (dedupe_rv) {
|
||||
case DEDUPE_HALT:
|
||||
halt = 1;
|
||||
goto exit;
|
||||
case DEDUPE_SKIP:
|
||||
halt = 0;
|
||||
goto exit;
|
||||
case DEDUPE_CONTINUE:
|
||||
break;
|
||||
}
|
||||
|
||||
halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset,
|
||||
flags, ci->userContext);
|
||||
exit:
|
||||
if (halt) {
|
||||
DEBUG_PRINTF("callback requested to terminate matches\n");
|
||||
ci->broken = BROKEN_FROM_USER;
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
if (!is_simple && ri->ekey != END_EXHAUST) {
|
||||
markAsMatched(ci->exhaustionVector, ri->ekey);
|
||||
return MO_CONTINUE_MATCHING;
|
||||
} else {
|
||||
return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Deliver the given report to the user callback.
|
||||
*
|
||||
* Assumes all preconditions (bounds, exhaustion etc) have been checked and
|
||||
* that dedupe catchup has been done.
|
||||
*/
|
||||
static really_inline
|
||||
int roseDeliverReport(u64a offset, ReportID id, struct hs_scratch *scratch,
|
||||
char is_exhaustible) {
|
||||
assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
|
||||
assert(scratch);
|
||||
assert(scratch->magic == SCRATCH_MAGIC);
|
||||
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
const struct RoseEngine *rose = ci->rose;
|
||||
DEBUG_PRINTF("internal report %u\n", id);
|
||||
const struct internal_report *ri = getInternalReport(rose, id);
|
||||
|
||||
assert(isExternalReport(ri)); /* only external reports should reach here */
|
||||
|
||||
const s32 offset_adj = ri->offsetAdjust;
|
||||
u32 flags = 0;
|
||||
#ifndef RELEASE_BUILD
|
||||
if (offset_adj) {
|
||||
// alert testing tools that we've got adjusted matches
|
||||
flags |= HS_MATCH_FLAG_ADJUSTED;
|
||||
}
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
|
||||
"offsetAdj=%d\n", offset, id, ri->type, ri->onmatch,
|
||||
offset_adj);
|
||||
|
||||
assert(!can_stop_matching(scratch));
|
||||
assert(!ri->hasBounds ||
|
||||
(offset >= ri->minOffset && offset <= ri->maxOffset));
|
||||
assert(ri->type == EXTERNAL_CALLBACK);
|
||||
assert(!ri->minLength);
|
||||
assert(!ri->quashSom);
|
||||
assert(ri->ekey == INVALID_EKEY ||
|
||||
!isExhausted(ci->exhaustionVector, ri->ekey));
|
||||
|
||||
u64a from_offset = 0;
|
||||
u64a to_offset = offset + offset_adj;
|
||||
|
||||
DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
|
||||
from_offset, to_offset, ri->onmatch, ci->userContext);
|
||||
|
||||
int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset,
|
||||
to_offset, flags, ci->userContext);
|
||||
if (halt) {
|
||||
DEBUG_PRINTF("callback requested to terminate matches\n");
|
||||
ci->broken = BROKEN_FROM_USER;
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
if (is_exhaustible) {
|
||||
assert(ri->ekey != INVALID_EKEY);
|
||||
markAsMatched(ci->exhaustionVector, ri->ekey);
|
||||
return MO_CONTINUE_MATCHING;
|
||||
} else {
|
||||
return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id,
|
||||
struct hs_scratch *scratch, char is_simple) {
|
||||
assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
|
||||
assert(scratch);
|
||||
assert(scratch->magic == SCRATCH_MAGIC);
|
||||
|
||||
u32 flags = 0;
|
||||
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
const struct RoseEngine *rose = ci->rose;
|
||||
const struct internal_report *ri = getInternalReport(rose, id);
|
||||
|
||||
/* internal events should be handled by rose directly */
|
||||
assert(ri->type == EXTERNAL_CALLBACK);
|
||||
|
||||
DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
|
||||
"offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch,
|
||||
ri->offsetAdjust);
|
||||
|
||||
if (unlikely(can_stop_matching(scratch))) {
|
||||
DEBUG_PRINTF("pre broken - halting\n");
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
if (!is_simple && ri->hasBounds) {
|
||||
assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET);
|
||||
if (to_offset < ri->minOffset || to_offset > ri->maxOffset) {
|
||||
DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n",
|
||||
to_offset, ri->minOffset, ri->maxOffset);
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
}
|
||||
|
||||
int halt = 0;
|
||||
|
||||
if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) {
|
||||
DEBUG_PRINTF("ate exhausted match\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
u64a offset = to_offset;
|
||||
|
||||
to_offset += ri->offsetAdjust;
|
||||
assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
|
||||
|
||||
if (!is_simple && ri->minLength) {
|
||||
if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
if (ri->quashSom) {
|
||||
from_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
|
||||
from_offset, to_offset, ri->onmatch, ci->userContext);
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
if (ri->offsetAdjust != 0) {
|
||||
// alert testing tools that we've got adjusted matches
|
||||
flags |= HS_MATCH_FLAG_ADJUSTED;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum DedupeResult dedupe_rv =
|
||||
dedupeCatchupSom(rose, ri, scratch, offset, from_offset, to_offset);
|
||||
switch (dedupe_rv) {
|
||||
case DEDUPE_HALT:
|
||||
halt = 1;
|
||||
goto exit;
|
||||
case DEDUPE_SKIP:
|
||||
halt = 0;
|
||||
goto exit;
|
||||
case DEDUPE_CONTINUE:
|
||||
break;
|
||||
}
|
||||
|
||||
halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset,
|
||||
flags, ci->userContext);
|
||||
|
||||
if (!is_simple) {
|
||||
markAsMatched(ci->exhaustionVector, ri->ekey);
|
||||
}
|
||||
|
||||
exit:
|
||||
if (halt) {
|
||||
DEBUG_PRINTF("callback requested to terminate matches\n");
|
||||
ci->broken = BROKEN_FROM_USER;
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Deliver the given SOM report to the user callback.
|
||||
*
|
||||
* Assumes all preconditions (bounds, exhaustion etc) have been checked and
|
||||
* that dedupe catchup has been done.
|
||||
*/
|
||||
static really_inline
|
||||
int roseDeliverSomReport(u64a from_offset, u64a to_offset, ReportID id,
|
||||
struct hs_scratch *scratch, char is_exhaustible) {
|
||||
assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
|
||||
assert(scratch);
|
||||
assert(scratch->magic == SCRATCH_MAGIC);
|
||||
|
||||
u32 flags = 0;
|
||||
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
const struct RoseEngine *rose = ci->rose;
|
||||
const struct internal_report *ri = getInternalReport(rose, id);
|
||||
|
||||
assert(isExternalReport(ri)); /* only external reports should reach here */
|
||||
|
||||
DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
|
||||
"offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch,
|
||||
ri->offsetAdjust);
|
||||
|
||||
assert(!can_stop_matching(scratch));
|
||||
assert(!ri->hasBounds ||
|
||||
(to_offset >= ri->minOffset && to_offset <= ri->maxOffset));
|
||||
assert(ri->ekey == INVALID_EKEY ||
|
||||
!isExhausted(ci->exhaustionVector, ri->ekey));
|
||||
|
||||
to_offset += ri->offsetAdjust;
|
||||
assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
|
||||
|
||||
assert(!ri->minLength ||
|
||||
satisfiesMinLength(ri->minLength, from_offset, to_offset));
|
||||
assert(!ri->quashSom || from_offset == 0);
|
||||
|
||||
DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
|
||||
from_offset, to_offset, ri->onmatch, ci->userContext);
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
if (ri->offsetAdjust != 0) {
|
||||
// alert testing tools that we've got adjusted matches
|
||||
flags |= HS_MATCH_FLAG_ADJUSTED;
|
||||
}
|
||||
#endif
|
||||
|
||||
int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset,
|
||||
to_offset, flags, ci->userContext);
|
||||
|
||||
if (halt) {
|
||||
DEBUG_PRINTF("callback requested to terminate matches\n");
|
||||
ci->broken = BROKEN_FROM_USER;
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
if (is_exhaustible) {
|
||||
assert(ri->ekey != INVALID_EKEY);
|
||||
markAsMatched(ci->exhaustionVector, ri->ekey);
|
||||
return MO_CONTINUE_MATCHING;
|
||||
} else {
|
||||
return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // REPORT_H
|
@ -47,9 +47,9 @@ void initContext(const struct RoseEngine *t, char *state, u64a offset,
|
||||
tctxt->cb = callback;
|
||||
tctxt->cb_som = som_callback;
|
||||
tctxt->lastMatchOffset = 0;
|
||||
tctxt->minMatchOffset = 0;
|
||||
tctxt->minNonMpvMatchOffset = 0;
|
||||
tctxt->next_mpv_offset = 0;
|
||||
tctxt->minMatchOffset = offset;
|
||||
tctxt->minNonMpvMatchOffset = offset;
|
||||
tctxt->next_mpv_offset = offset;
|
||||
tctxt->curr_anchored_loc = MMB_INVALID;
|
||||
tctxt->curr_row_offset = 0;
|
||||
|
||||
@ -146,14 +146,16 @@ int eodNfaSomCallback(u64a from_offset, u64a to_offset, ReportID report,
|
||||
/**
|
||||
* \brief Check for (and deliver) reports from active output-exposed (suffix
|
||||
* or outfix) NFAs.
|
||||
*
|
||||
* \return MO_HALT_MATCHING if the user instructs us to stop.
|
||||
*/
|
||||
static rose_inline
|
||||
void roseCheckNfaEod(const struct RoseEngine *t, char *state,
|
||||
int roseCheckNfaEod(const struct RoseEngine *t, char *state,
|
||||
struct hs_scratch *scratch, u64a offset,
|
||||
const char is_streaming) {
|
||||
if (!t->eodNfaIterOffset) {
|
||||
DEBUG_PRINTF("no engines that report at EOD\n");
|
||||
return;
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
/* data, len is used for state decompress, should be full available data */
|
||||
@ -194,9 +196,11 @@ void roseCheckNfaEod(const struct RoseEngine *t, char *state,
|
||||
eodNfaSomCallback,
|
||||
scratch) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("user instructed us to stop\n");
|
||||
return;
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
@ -283,7 +287,10 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset,
|
||||
return;
|
||||
}
|
||||
|
||||
roseCheckNfaEod(t, state, scratch, offset, is_streaming);
|
||||
if (roseCheckNfaEod(t, state, scratch, offset, is_streaming) ==
|
||||
MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!t->eodIterProgramOffset && !t->ematcherOffset) {
|
||||
DEBUG_PRINTF("no eod accepts\n");
|
||||
@ -291,8 +298,7 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset,
|
||||
}
|
||||
|
||||
// Handle pending EOD reports.
|
||||
int itrv = roseEodRunIterator(t, offset, scratch);
|
||||
if (itrv == MO_HALT_MATCHING) {
|
||||
if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -303,15 +309,17 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset,
|
||||
mmbit_clear(getRoleState(state), t->rolesWithStateCount);
|
||||
mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount);
|
||||
|
||||
hwlmcb_rv_t rv = roseEodRunMatcher(t, offset, scratch, is_streaming);
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
if (roseEodRunMatcher(t, offset, scratch, is_streaming) ==
|
||||
HWLM_TERMINATE_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
cleanupAfterEodMatcher(t, state, offset, scratch);
|
||||
|
||||
// Fire any new EOD reports.
|
||||
roseEodRunIterator(t, offset, scratch);
|
||||
if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
roseCheckEodSuffixes(t, state, offset, scratch);
|
||||
}
|
||||
|
@ -253,33 +253,53 @@ event_enqueued:
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
/* handles the firing of external matches */
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, ReportID id, u64a end,
|
||||
struct hs_scratch *scratch) {
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
|
||||
assert(end == tctxt->minMatchOffset);
|
||||
DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end);
|
||||
updateLastMatchOffset(tctxt, end);
|
||||
|
||||
int cb_rv = tctxt->cb(end, id, scratch);
|
||||
if (cb_rv == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("termination requested\n");
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
return roseHaltIfExhausted(t, scratch);
|
||||
}
|
||||
|
||||
/* handles catchup, som, cb, etc */
|
||||
static really_inline
|
||||
hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, char *state,
|
||||
struct RoseContext *tctxt, ReportID id,
|
||||
u64a offset, char in_anchored) {
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
|
||||
if (roseCatchUpTo(t, state, offset, scratch, in_anchored) ==
|
||||
HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
const struct internal_report *ri = getInternalReport(t, id);
|
||||
|
||||
if (ri) {
|
||||
// Mildly cheesy performance hack: if this report is already exhausted,
|
||||
// we can quash the match here.
|
||||
if (ri->ekey != INVALID_EKEY) {
|
||||
const struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
if (isExhausted(scratch->core_info.exhaustionVector, ri->ekey)) {
|
||||
DEBUG_PRINTF("eating exhausted match (report %u, ekey %u)\n",
|
||||
ri->onmatch, ri->ekey);
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
}
|
||||
|
||||
if (isInternalSomReport(ri)) {
|
||||
return roseHandleSom(t, state, id, offset, tctxt, in_anchored);
|
||||
roseHandleSom(t, scratch, id, offset);
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
} else if (ri->type == INTERNAL_ROSE_CHAIN) {
|
||||
return roseCatchUpAndHandleChainMatch(t, state, id, offset, tctxt,
|
||||
in_anchored);
|
||||
}
|
||||
}
|
||||
return roseHandleMatch(t, state, id, offset, tctxt, in_anchored);
|
||||
|
||||
return roseHandleMatch(t, id, offset, scratch);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "infix.h"
|
||||
#include "match.h"
|
||||
#include "miracle.h"
|
||||
#include "report.h"
|
||||
#include "rose.h"
|
||||
#include "rose_internal.h"
|
||||
#include "rose_program.h"
|
||||
@ -566,29 +567,20 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi,
|
||||
pushQueueSom(q, topEvent, loc, start);
|
||||
}
|
||||
|
||||
/* handles the firing of external matches */
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, char *state,
|
||||
ReportID id, u64a end, struct RoseContext *tctxt,
|
||||
char in_anchored) {
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
ReportID id, u64a end, char is_exhaustible) {
|
||||
assert(end == scratch->tctxt.minMatchOffset);
|
||||
DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end);
|
||||
updateLastMatchOffset(&scratch->tctxt, end);
|
||||
|
||||
if (roseCatchUpTo(t, state, end, scratch, in_anchored)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
assert(end == tctxt->minMatchOffset);
|
||||
DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end);
|
||||
updateLastMatchOffset(tctxt, end);
|
||||
|
||||
int cb_rv = tctxt->cb(end, id, scratch);
|
||||
int cb_rv = roseDeliverReport(end, id, scratch, is_exhaustible);
|
||||
if (cb_rv == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("termination requested\n");
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
|
||||
if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
@ -613,76 +605,38 @@ hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t,
|
||||
return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, char *state, u64a end,
|
||||
struct RoseContext *tctxt, char in_anchored) {
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
|
||||
// In SOM processing, we may be able to limit or entirely avoid catchup.
|
||||
|
||||
DEBUG_PRINTF("entry\n");
|
||||
|
||||
if (end == tctxt->minMatchOffset) {
|
||||
DEBUG_PRINTF("already caught up\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("catching up all NFAs\n");
|
||||
if (roseCatchUpTo(t, state, end, scratch, in_anchored)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
updateMinMatchOffset(tctxt, end);
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, char *state, ReportID id,
|
||||
u64a end, struct RoseContext *tctxt,
|
||||
char in_anchored) {
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
|
||||
void roseHandleSom(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
ReportID id, u64a end) {
|
||||
DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end,
|
||||
tctxt->minMatchOffset);
|
||||
scratch->tctxt.minMatchOffset);
|
||||
|
||||
// Reach into reports and handle internal reports that just manipulate SOM
|
||||
// slots ourselves, rather than going through the callback.
|
||||
|
||||
if (roseSomCatchup(t, state, end, tctxt, in_anchored)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
assert(end == scratch->tctxt.minMatchOffset);
|
||||
DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end);
|
||||
updateLastMatchOffset(&scratch->tctxt, end);
|
||||
|
||||
const struct internal_report *ri = getInternalReport(t, id);
|
||||
handleSomInternal(scratch, ri, end);
|
||||
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state,
|
||||
ReportID id, u64a start, u64a end,
|
||||
struct RoseContext *tctxt, char in_anchored) {
|
||||
struct hs_scratch *scratch = tctxtToScratch(tctxt);
|
||||
hwlmcb_rv_t roseReportSom(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, ReportID id, u64a start,
|
||||
u64a end, char is_exhaustible) {
|
||||
assert(end == scratch->tctxt.minMatchOffset);
|
||||
DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end);
|
||||
updateLastMatchOffset(&scratch->tctxt, end);
|
||||
|
||||
if (roseCatchUpTo(t, state, end, scratch, in_anchored)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id,
|
||||
start, end);
|
||||
DEBUG_PRINTF(" last match %llu\n", tctxt->lastMatchOffset);
|
||||
assert(end == tctxt->minMatchOffset);
|
||||
|
||||
updateLastMatchOffset(tctxt, end);
|
||||
int cb_rv = tctxt->cb_som(start, end, id, scratch);
|
||||
int cb_rv = roseDeliverSomReport(start, end, id, scratch, is_exhaustible);
|
||||
if (cb_rv == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("termination requested\n");
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
|
||||
if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
@ -690,23 +644,19 @@ hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state,
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, char *state,
|
||||
ReportID id, u64a start, u64a end,
|
||||
struct RoseContext *tctxt, char in_anchored) {
|
||||
void roseHandleSomSom(const struct RoseEngine *t, ReportID id, u64a start,
|
||||
u64a end, struct hs_scratch *scratch) {
|
||||
DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n",
|
||||
id, start, end, tctxt->minMatchOffset);
|
||||
id, start, end, scratch->tctxt.minMatchOffset);
|
||||
|
||||
// Reach into reports and handle internal reports that just manipulate SOM
|
||||
// slots ourselves, rather than going through the callback.
|
||||
|
||||
if (roseSomCatchup(t, state, end, tctxt, in_anchored)
|
||||
== HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
assert(end == scratch->tctxt.minMatchOffset);
|
||||
updateLastMatchOffset(&scratch->tctxt, end);
|
||||
|
||||
const struct internal_report *ri = getInternalReport(t, id);
|
||||
setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end);
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
setSomFromSomAware(scratch, ri, start, end);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
@ -848,14 +798,11 @@ u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi,
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) {
|
||||
assert(max_bound <= ROSE_BOUND_INF);
|
||||
char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) {
|
||||
DEBUG_PRINTF("check offset=%llu against bounds [%llu,%llu]\n", end,
|
||||
min_bound, max_bound);
|
||||
assert(min_bound <= max_bound);
|
||||
|
||||
if (end < min_bound) {
|
||||
return 0;
|
||||
}
|
||||
return max_bound == ROSE_BOUND_INF || end <= max_bound;
|
||||
return end >= min_bound && end <= max_bound;
|
||||
}
|
||||
|
||||
|
||||
@ -956,9 +903,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_BOUNDS) {
|
||||
if (!in_anchored &&
|
||||
!roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) {
|
||||
DEBUG_PRINTF("failed root bounds check\n");
|
||||
if (!roseCheckBounds(end, ri->min_bound, ri->max_bound)) {
|
||||
DEBUG_PRINTF("failed bounds check\n");
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
@ -1003,6 +949,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CATCH_UP) {
|
||||
if (roseCatchUpTo(t, scratch->core_info.state, end, scratch,
|
||||
in_anchored) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SOM_ADJUST) {
|
||||
assert(ri->distance <= end);
|
||||
som = end - ri->distance;
|
||||
@ -1016,6 +970,20 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SOM_FROM_REPORT) {
|
||||
const struct internal_report *ir =
|
||||
getInternalReport(t, ri->report);
|
||||
som = handleSomExternal(scratch, ir, end);
|
||||
DEBUG_PRINTF("som from report %u is %llu\n", ri->report, som);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SOM_ZERO) {
|
||||
DEBUG_PRINTF("setting SOM to zero\n");
|
||||
som = 0;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(TRIGGER_INFIX) {
|
||||
roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel,
|
||||
tctxt);
|
||||
@ -1033,13 +1001,40 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT) {
|
||||
if (roseHandleMatch(t, scratch->core_info.state,
|
||||
ri->report, end, tctxt,
|
||||
in_anchored) == HWLM_TERMINATE_MATCHING) {
|
||||
PROGRAM_CASE(DEDUPE) {
|
||||
const struct internal_report *ir =
|
||||
getInternalReport(t, ri->report);
|
||||
const char do_som = t->hasSom; // FIXME: constant propagate
|
||||
enum DedupeResult rv = dedupeCatchup(
|
||||
t, ir, scratch, end, som, end + ir->offsetAdjust, do_som);
|
||||
switch (rv) {
|
||||
case DEDUPE_HALT:
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
case DEDUPE_SKIP:
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
case DEDUPE_CONTINUE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(DEDUPE_SOM) {
|
||||
const struct internal_report *ir =
|
||||
getInternalReport(t, ri->report);
|
||||
enum DedupeResult rv = dedupeCatchupSom(
|
||||
t, ir, scratch, end, som, end + ir->offsetAdjust);
|
||||
switch (rv) {
|
||||
case DEDUPE_HALT:
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
case DEDUPE_SKIP:
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
case DEDUPE_CONTINUE:
|
||||
break;
|
||||
}
|
||||
work_done = 1;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
@ -1053,18 +1048,32 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_EOD) {
|
||||
if (tctxt->cb(end, ri->report, scratch) == MO_HALT_MATCHING) {
|
||||
PROGRAM_CASE(REPORT_SOM_INT) {
|
||||
roseHandleSom(t, scratch, ri->report, end);
|
||||
work_done = 1;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_SOM_AWARE) {
|
||||
roseHandleSomSom(t, ri->report, som, end, scratch);
|
||||
work_done = 1;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT) {
|
||||
const char is_exhaustible = 0;
|
||||
if (roseReport(t, scratch, ri->report, end, is_exhaustible) ==
|
||||
HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
work_done = 1;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_SOM_INT) {
|
||||
if (roseHandleSom(t, scratch->core_info.state, ri->report,
|
||||
end, tctxt,
|
||||
in_anchored) == HWLM_TERMINATE_MATCHING) {
|
||||
PROGRAM_CASE(REPORT_EXHAUST) {
|
||||
const char is_exhaustible = 1;
|
||||
if (roseReport(t, scratch, ri->report, end, is_exhaustible) ==
|
||||
HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
work_done = 1;
|
||||
@ -1072,25 +1081,57 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset,
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_SOM) {
|
||||
if (roseHandleSomSom(t, scratch->core_info.state,
|
||||
ri->report, som, end, tctxt,
|
||||
in_anchored) == HWLM_TERMINATE_MATCHING) {
|
||||
const char is_exhaustible = 0;
|
||||
if (roseReportSom(t, scratch, ri->report, som, end,
|
||||
is_exhaustible) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
work_done = 1;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_SOM_KNOWN) {
|
||||
if (roseHandleSomMatch(t, scratch->core_info.state, ri->report,
|
||||
som, end, tctxt, in_anchored) ==
|
||||
HWLM_TERMINATE_MATCHING) {
|
||||
PROGRAM_CASE(REPORT_SOM_EXHAUST) {
|
||||
const char is_exhaustible = 1;
|
||||
if (roseReportSom(t, scratch, ri->report, som, end,
|
||||
is_exhaustible) == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
work_done = 1;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_EXHAUSTED) {
|
||||
DEBUG_PRINTF("check ekey %u\n", ri->ekey);
|
||||
assert(ri->ekey != INVALID_EKEY);
|
||||
assert(ri->ekey < t->ekeyCount);
|
||||
const char *evec = scratch->core_info.exhaustionVector;
|
||||
if (isExhausted(evec, ri->ekey)) {
|
||||
DEBUG_PRINTF("ekey %u already set, match is exhausted\n",
|
||||
ri->ekey);
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MIN_LENGTH) {
|
||||
DEBUG_PRINTF("check min length %llu (adj %d)\n", ri->min_length,
|
||||
ri->end_adj);
|
||||
assert(ri->min_length > 0);
|
||||
assert(ri->end_adj == 0 || ri->end_adj == -1);
|
||||
assert(som == HS_OFFSET_PAST_HORIZON || som <= end);
|
||||
if (som != HS_OFFSET_PAST_HORIZON &&
|
||||
((end + ri->end_adj) - som < ri->min_length)) {
|
||||
DEBUG_PRINTF("failed check, match len %llu\n",
|
||||
(u64a)((end + ri->end_adj) - som));
|
||||
assert(ri->fail_jump); // must progress
|
||||
pc += ri->fail_jump;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SET_STATE) {
|
||||
DEBUG_PRINTF("set state index %u\n", ri->index);
|
||||
mmbit_set(getRoleState(scratch->core_info.state),
|
||||
|
@ -141,25 +141,42 @@ struct left_build_info {
|
||||
vector<LookEntry> lookaround; // alternative implementation to the NFA
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Possible jump targets for roles that perform checks.
|
||||
*
|
||||
* Fixed up into offsets before the program is written to bytecode.
|
||||
*/
|
||||
enum class JumpTarget {
|
||||
NO_JUMP, //!< Instruction does not jump.
|
||||
PROGRAM_END, //!< Jump to end of program.
|
||||
NEXT_BLOCK, //!< Jump to start of next block (sparse iter check, etc).
|
||||
FIXUP_DONE, //!< Target fixup already applied.
|
||||
};
|
||||
|
||||
/** \brief Role instruction model used at compile time. */
|
||||
class RoseInstruction {
|
||||
public:
|
||||
RoseInstruction() {
|
||||
memset(&u, 0, sizeof(u));
|
||||
u.end.code = ROSE_INSTR_END;
|
||||
}
|
||||
|
||||
explicit RoseInstruction(enum RoseInstructionCode c) {
|
||||
RoseInstruction(enum RoseInstructionCode c, JumpTarget j) : target(j) {
|
||||
memset(&u, 0, sizeof(u));
|
||||
u.end.code = c;
|
||||
}
|
||||
|
||||
explicit RoseInstruction(enum RoseInstructionCode c)
|
||||
: RoseInstruction(c, JumpTarget::NO_JUMP) {}
|
||||
|
||||
bool operator<(const RoseInstruction &a) const {
|
||||
if (code() != a.code()) {
|
||||
return code() < a.code();
|
||||
}
|
||||
if (target != a.target) {
|
||||
return target < a.target;
|
||||
}
|
||||
return memcmp(&u, &a.u, sizeof(u)) < 0;
|
||||
}
|
||||
|
||||
bool operator==(const RoseInstruction &a) const {
|
||||
return memcmp(&u, &a.u, sizeof(u)) == 0;
|
||||
return code() == a.code() && target == a.target &&
|
||||
memcmp(&u, &a.u, sizeof(u)) == 0;
|
||||
}
|
||||
|
||||
enum RoseInstructionCode code() const {
|
||||
@ -180,16 +197,24 @@ public:
|
||||
case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix;
|
||||
case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay;
|
||||
case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed;
|
||||
case ROSE_INSTR_CATCH_UP: return &u.catchUp;
|
||||
case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust;
|
||||
case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix;
|
||||
case ROSE_INSTR_SOM_FROM_REPORT: return &u.somFromReport;
|
||||
case ROSE_INSTR_SOM_ZERO: return &u.somZero;
|
||||
case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix;
|
||||
case ROSE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix;
|
||||
case ROSE_INSTR_REPORT: return &u.report;
|
||||
case ROSE_INSTR_DEDUPE: return &u.dedupe;
|
||||
case ROSE_INSTR_DEDUPE_SOM: return &u.dedupeSom;
|
||||
case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain;
|
||||
case ROSE_INSTR_REPORT_EOD: return &u.reportEod;
|
||||
case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt;
|
||||
case ROSE_INSTR_REPORT_SOM_AWARE: return &u.reportSom;
|
||||
case ROSE_INSTR_REPORT: return &u.report;
|
||||
case ROSE_INSTR_REPORT_EXHAUST: return &u.reportExhaust;
|
||||
case ROSE_INSTR_REPORT_SOM: return &u.reportSom;
|
||||
case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown;
|
||||
case ROSE_INSTR_REPORT_SOM_EXHAUST: return &u.reportSomExhaust;
|
||||
case ROSE_INSTR_CHECK_EXHAUSTED: return &u.checkExhausted;
|
||||
case ROSE_INSTR_CHECK_MIN_LENGTH: return &u.checkMinLength;
|
||||
case ROSE_INSTR_SET_STATE: return &u.setState;
|
||||
case ROSE_INSTR_SET_GROUPS: return &u.setGroups;
|
||||
case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups;
|
||||
@ -214,16 +239,24 @@ public:
|
||||
case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix);
|
||||
case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay);
|
||||
case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed);
|
||||
case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp);
|
||||
case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust);
|
||||
case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix);
|
||||
case ROSE_INSTR_SOM_FROM_REPORT: return sizeof(u.somFromReport);
|
||||
case ROSE_INSTR_SOM_ZERO: return sizeof(u.somZero);
|
||||
case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix);
|
||||
case ROSE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix);
|
||||
case ROSE_INSTR_REPORT: return sizeof(u.report);
|
||||
case ROSE_INSTR_DEDUPE: return sizeof(u.dedupe);
|
||||
case ROSE_INSTR_DEDUPE_SOM: return sizeof(u.dedupeSom);
|
||||
case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain);
|
||||
case ROSE_INSTR_REPORT_EOD: return sizeof(u.reportEod);
|
||||
case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt);
|
||||
case ROSE_INSTR_REPORT_SOM_AWARE: return sizeof(u.reportSom);
|
||||
case ROSE_INSTR_REPORT: return sizeof(u.report);
|
||||
case ROSE_INSTR_REPORT_EXHAUST: return sizeof(u.reportExhaust);
|
||||
case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom);
|
||||
case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown);
|
||||
case ROSE_INSTR_REPORT_SOM_EXHAUST: return sizeof(u.reportSomExhaust);
|
||||
case ROSE_INSTR_CHECK_EXHAUSTED: return sizeof(u.checkExhausted);
|
||||
case ROSE_INSTR_CHECK_MIN_LENGTH: return sizeof(u.checkMinLength);
|
||||
case ROSE_INSTR_SET_STATE: return sizeof(u.setState);
|
||||
case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups);
|
||||
case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups);
|
||||
@ -232,6 +265,7 @@ public:
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
|
||||
case ROSE_INSTR_END: return sizeof(u.end);
|
||||
}
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -246,16 +280,24 @@ public:
|
||||
ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix;
|
||||
ROSE_STRUCT_ANCHORED_DELAY anchoredDelay;
|
||||
ROSE_STRUCT_PUSH_DELAYED pushDelayed;
|
||||
ROSE_STRUCT_CATCH_UP catchUp;
|
||||
ROSE_STRUCT_SOM_ADJUST somAdjust;
|
||||
ROSE_STRUCT_SOM_LEFTFIX somLeftfix;
|
||||
ROSE_STRUCT_SOM_FROM_REPORT somFromReport;
|
||||
ROSE_STRUCT_SOM_ZERO somZero;
|
||||
ROSE_STRUCT_TRIGGER_INFIX triggerInfix;
|
||||
ROSE_STRUCT_TRIGGER_SUFFIX triggerSuffix;
|
||||
ROSE_STRUCT_REPORT report;
|
||||
ROSE_STRUCT_DEDUPE dedupe;
|
||||
ROSE_STRUCT_DEDUPE_SOM dedupeSom;
|
||||
ROSE_STRUCT_REPORT_CHAIN reportChain;
|
||||
ROSE_STRUCT_REPORT_EOD reportEod;
|
||||
ROSE_STRUCT_REPORT_SOM_INT reportSomInt;
|
||||
ROSE_STRUCT_REPORT_SOM_AWARE reportSomAware;
|
||||
ROSE_STRUCT_REPORT report;
|
||||
ROSE_STRUCT_REPORT_EXHAUST reportExhaust;
|
||||
ROSE_STRUCT_REPORT_SOM reportSom;
|
||||
ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown;
|
||||
ROSE_STRUCT_REPORT_SOM_EXHAUST reportSomExhaust;
|
||||
ROSE_STRUCT_CHECK_EXHAUSTED checkExhausted;
|
||||
ROSE_STRUCT_CHECK_MIN_LENGTH checkMinLength;
|
||||
ROSE_STRUCT_SET_STATE setState;
|
||||
ROSE_STRUCT_SET_GROUPS setGroups;
|
||||
ROSE_STRUCT_SQUASH_GROUPS squashGroups;
|
||||
@ -264,11 +306,15 @@ public:
|
||||
ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
|
||||
ROSE_STRUCT_END end;
|
||||
} u;
|
||||
|
||||
JumpTarget target;
|
||||
};
|
||||
|
||||
static
|
||||
size_t hash_value(const RoseInstruction &ri) {
|
||||
size_t val = 0;
|
||||
boost::hash_combine(val, ri.code());
|
||||
boost::hash_combine(val, ri.target);
|
||||
const char *bytes = (const char *)ri.get();
|
||||
const size_t len = ri.length();
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
@ -2619,61 +2665,100 @@ flattenProgram(const vector<vector<RoseInstruction>> &programs) {
|
||||
vector<RoseInstruction> out;
|
||||
|
||||
vector<u32> offsets; // offset of each instruction (bytes)
|
||||
vector<u32> targets; // jump target for each instruction
|
||||
vector<u32> blocks; // track which block we're in
|
||||
vector<u32> block_offsets; // start offsets for each block
|
||||
|
||||
DEBUG_PRINTF("%zu programs\n", programs.size());
|
||||
DEBUG_PRINTF("%zu program blocks\n", programs.size());
|
||||
|
||||
size_t curr_offset = 0;
|
||||
for (const auto &program : programs) {
|
||||
DEBUG_PRINTF("program with %zu instructions\n", program.size());
|
||||
DEBUG_PRINTF("block with %zu instructions\n", program.size());
|
||||
block_offsets.push_back(curr_offset);
|
||||
for (const auto &ri : program) {
|
||||
assert(ri.code() != ROSE_INSTR_END);
|
||||
out.push_back(ri);
|
||||
offsets.push_back(curr_offset);
|
||||
blocks.push_back(block_offsets.size() - 1);
|
||||
curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
|
||||
}
|
||||
for (size_t i = 0; i < program.size(); i++) {
|
||||
targets.push_back(curr_offset);
|
||||
}
|
||||
}
|
||||
|
||||
// Add an END instruction.
|
||||
// Add a final END instruction, which is its own block.
|
||||
out.emplace_back(ROSE_INSTR_END);
|
||||
block_offsets.push_back(curr_offset);
|
||||
offsets.push_back(curr_offset);
|
||||
targets.push_back(curr_offset);
|
||||
|
||||
assert(targets.size() == out.size());
|
||||
assert(offsets.size() == out.size());
|
||||
|
||||
for (size_t i = 0; i < out.size(); i++) {
|
||||
auto &ri = out[i];
|
||||
switch (ri.code()) {
|
||||
case ROSE_INSTR_ANCHORED_DELAY:
|
||||
assert(targets[i] > offsets[i]); // jumps always progress
|
||||
ri.u.anchoredDelay.done_jump = targets[i] - offsets[i];
|
||||
|
||||
u32 jump_target = 0;
|
||||
switch (ri.target) {
|
||||
case JumpTarget::NO_JUMP:
|
||||
case JumpTarget::FIXUP_DONE:
|
||||
continue; // Next instruction.
|
||||
case JumpTarget::PROGRAM_END:
|
||||
assert(i != out.size() - 1);
|
||||
jump_target = offsets.back();
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_ONLY_EOD:
|
||||
assert(targets[i] > offsets[i]);
|
||||
ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i];
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_BOUNDS:
|
||||
assert(targets[i] > offsets[i]);
|
||||
ri.u.checkBounds.fail_jump = targets[i] - offsets[i];
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_NOT_HANDLED:
|
||||
assert(targets[i] > offsets[i]);
|
||||
ri.u.checkNotHandled.fail_jump = targets[i] - offsets[i];
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_LOOKAROUND:
|
||||
assert(targets[i] > offsets[i]);
|
||||
ri.u.checkLookaround.fail_jump = targets[i] - offsets[i];
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_LEFTFIX:
|
||||
assert(targets[i] > offsets[i]);
|
||||
ri.u.checkLeftfix.fail_jump = targets[i] - offsets[i];
|
||||
break;
|
||||
default:
|
||||
case JumpTarget::NEXT_BLOCK:
|
||||
assert(blocks[i] + 1 < block_offsets.size());
|
||||
jump_target = block_offsets[blocks[i] + 1];
|
||||
break;
|
||||
}
|
||||
|
||||
// We currently always make progress and never jump backwards.
|
||||
assert(jump_target > offsets[i]);
|
||||
assert(jump_target <= offsets.back());
|
||||
u32 jump_val = jump_target - offsets[i];
|
||||
|
||||
switch (ri.code()) {
|
||||
case ROSE_INSTR_ANCHORED_DELAY:
|
||||
ri.u.anchoredDelay.done_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_ONLY_EOD:
|
||||
ri.u.checkOnlyEod.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_BOUNDS:
|
||||
ri.u.checkBounds.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_NOT_HANDLED:
|
||||
ri.u.checkNotHandled.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_LOOKAROUND:
|
||||
ri.u.checkLookaround.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_LEFTFIX:
|
||||
ri.u.checkLeftfix.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_DEDUPE:
|
||||
ri.u.dedupe.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_DEDUPE_SOM:
|
||||
ri.u.dedupeSom.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_EXHAUSTED:
|
||||
ri.u.checkExhausted.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_MIN_LENGTH:
|
||||
ri.u.checkMinLength.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_CHECK_STATE:
|
||||
ri.u.checkState.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_SPARSE_ITER_BEGIN:
|
||||
ri.u.sparseIterBegin.fail_jump = jump_val;
|
||||
break;
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT:
|
||||
ri.u.sparseIterNext.fail_jump = jump_val;
|
||||
break;
|
||||
default:
|
||||
assert(0); // Unhandled opcode?
|
||||
break;
|
||||
}
|
||||
|
||||
ri.target = JumpTarget::FIXUP_DONE;
|
||||
}
|
||||
|
||||
return out;
|
||||
@ -2689,6 +2774,13 @@ u32 writeProgram(build_context &bc, const vector<RoseInstruction> &program) {
|
||||
assert(program.back().code() == ROSE_INSTR_END);
|
||||
assert(program.size() >= 1);
|
||||
|
||||
// This program must have been flattened; i.e. all check instructions must
|
||||
// have their jump offsets set.
|
||||
assert(all_of(begin(program), end(program), [](const RoseInstruction &ri) {
|
||||
return ri.target == JumpTarget::NO_JUMP ||
|
||||
ri.target == JumpTarget::FIXUP_DONE;
|
||||
}));
|
||||
|
||||
auto it = bc.program_cache.find(program);
|
||||
if (it != end(bc.program_cache)) {
|
||||
DEBUG_PRINTF("reusing cached program at %u\n", it->second);
|
||||
@ -2877,7 +2969,8 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v,
|
||||
}
|
||||
u32 look_count = verify_u32(look.size());
|
||||
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND);
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND,
|
||||
JumpTarget::NEXT_BLOCK);
|
||||
ri.u.checkLookaround.index = look_idx;
|
||||
ri.u.checkLookaround.count = look_count;
|
||||
program.push_back(ri);
|
||||
@ -2898,7 +2991,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v,
|
||||
assert(!build.cc.streaming ||
|
||||
build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG);
|
||||
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX);
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX, JumpTarget::NEXT_BLOCK);
|
||||
ri.u.checkLeftfix.queue = lni.queue;
|
||||
ri.u.checkLeftfix.lag = build.g[v].left.lag;
|
||||
ri.u.checkLeftfix.report = build.g[v].left.leftfix_report;
|
||||
@ -2906,7 +2999,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v,
|
||||
}
|
||||
|
||||
static
|
||||
void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc,
|
||||
void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc,
|
||||
RoseVertex v, vector<RoseInstruction> &program) {
|
||||
// Only relevant for roles that can be triggered by the anchored table.
|
||||
if (!build.isAnchored(v)) {
|
||||
@ -2919,11 +3012,150 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc,
|
||||
return;
|
||||
}
|
||||
|
||||
auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY);
|
||||
auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY,
|
||||
JumpTarget::NEXT_BLOCK);
|
||||
ri.u.anchoredDelay.groups = build.g[v].groups;
|
||||
program.push_back(ri);
|
||||
}
|
||||
|
||||
static
|
||||
void makeDedupe(const ReportID id, vector<RoseInstruction> &report_block) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_DEDUPE, JumpTarget::NEXT_BLOCK);
|
||||
ri.u.dedupe.report = id;
|
||||
report_block.push_back(move(ri));
|
||||
}
|
||||
|
||||
static
|
||||
void makeDedupeSom(const ReportID id, vector<RoseInstruction> &report_block) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_DEDUPE_SOM, JumpTarget::NEXT_BLOCK);
|
||||
ri.u.dedupeSom.report = id;
|
||||
report_block.push_back(move(ri));
|
||||
}
|
||||
|
||||
static
|
||||
void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som,
|
||||
vector<RoseInstruction> &program) {
|
||||
assert(id < build.rm.numReports());
|
||||
const Report &report = build.rm.getReport(id);
|
||||
|
||||
vector<RoseInstruction> report_block;
|
||||
|
||||
// If this report has an exhaustion key, we can check it in the program
|
||||
// rather than waiting until we're in the callback adaptor.
|
||||
if (report.ekey != INVALID_EKEY) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_EXHAUSTED,
|
||||
JumpTarget::NEXT_BLOCK);
|
||||
ri.u.checkExhausted.ekey = report.ekey;
|
||||
report_block.push_back(move(ri));
|
||||
}
|
||||
|
||||
// Similarly, we can handle min/max offset checks.
|
||||
if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS,
|
||||
JumpTarget::NEXT_BLOCK);
|
||||
ri.u.checkBounds.min_bound = report.minOffset;
|
||||
ri.u.checkBounds.max_bound = report.maxOffset;
|
||||
report_block.push_back(move(ri));
|
||||
}
|
||||
|
||||
// Catch up -- everything except the INTERNAL_ROSE_CHAIN report needs this.
|
||||
// TODO: this could be floated in front of all the reports and only done
|
||||
// once.
|
||||
if (report.type != INTERNAL_ROSE_CHAIN) {
|
||||
program.emplace_back(ROSE_INSTR_CATCH_UP);
|
||||
}
|
||||
|
||||
// External SOM reports need their SOM value calculated.
|
||||
if (isExternalSomReport(report)) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_SOM_FROM_REPORT);
|
||||
ri.u.somFromReport.report = id;
|
||||
report_block.push_back(move(ri));
|
||||
}
|
||||
|
||||
// Min length constraint.
|
||||
if (report.minLength > 0) {
|
||||
assert(build.hasSom);
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_MIN_LENGTH,
|
||||
JumpTarget::NEXT_BLOCK);
|
||||
ri.u.checkMinLength.end_adj = report.offsetAdjust;
|
||||
ri.u.checkMinLength.min_length = report.minLength;
|
||||
report_block.push_back(move(ri));
|
||||
}
|
||||
|
||||
if (report.quashSom) {
|
||||
report_block.emplace_back(ROSE_INSTR_SOM_ZERO);
|
||||
}
|
||||
|
||||
switch (report.type) {
|
||||
case EXTERNAL_CALLBACK:
|
||||
if (!has_som) {
|
||||
makeDedupe(id, report_block);
|
||||
if (report.ekey == INVALID_EKEY) {
|
||||
report_block.emplace_back(ROSE_INSTR_REPORT);
|
||||
report_block.back().u.report.report = id;
|
||||
} else {
|
||||
report_block.emplace_back(ROSE_INSTR_REPORT_EXHAUST);
|
||||
report_block.back().u.reportExhaust.report = id;
|
||||
}
|
||||
} else { // has_som
|
||||
makeDedupeSom(id, report_block);
|
||||
if (report.ekey == INVALID_EKEY) {
|
||||
report_block.emplace_back(ROSE_INSTR_REPORT_SOM);
|
||||
report_block.back().u.reportSom.report = id;
|
||||
} else {
|
||||
report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST);
|
||||
report_block.back().u.reportSomExhaust.report = id;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case INTERNAL_SOM_LOC_SET:
|
||||
case INTERNAL_SOM_LOC_SET_IF_UNSET:
|
||||
case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
|
||||
case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
|
||||
case INTERNAL_SOM_LOC_COPY:
|
||||
case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
|
||||
case INTERNAL_SOM_LOC_MAKE_WRITABLE:
|
||||
case INTERNAL_SOM_LOC_SET_FROM:
|
||||
case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
|
||||
if (has_som) {
|
||||
report_block.emplace_back(ROSE_INSTR_REPORT_SOM_AWARE);
|
||||
report_block.back().u.reportSomAware.report = id;
|
||||
} else {
|
||||
report_block.emplace_back(ROSE_INSTR_REPORT_SOM_INT);
|
||||
report_block.back().u.reportSomInt.report = id;
|
||||
}
|
||||
break;
|
||||
case INTERNAL_ROSE_CHAIN:
|
||||
report_block.emplace_back(ROSE_INSTR_REPORT_CHAIN);
|
||||
report_block.back().u.reportChain.report = id;
|
||||
break;
|
||||
case EXTERNAL_CALLBACK_SOM_REL:
|
||||
case EXTERNAL_CALLBACK_SOM_STORED:
|
||||
case EXTERNAL_CALLBACK_SOM_ABS:
|
||||
case EXTERNAL_CALLBACK_SOM_REV_NFA:
|
||||
makeDedupeSom(id, report_block);
|
||||
if (report.ekey == INVALID_EKEY) {
|
||||
report_block.emplace_back(ROSE_INSTR_REPORT_SOM);
|
||||
report_block.back().u.reportSom.report = id;
|
||||
} else {
|
||||
report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST);
|
||||
report_block.back().u.reportSomExhaust.report = id;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
assert(!report_block.empty());
|
||||
report_block = flattenProgram({report_block});
|
||||
assert(report_block.back().code() == ROSE_INSTR_END);
|
||||
report_block.pop_back();
|
||||
insert(&program, program.end(), report_block);
|
||||
}
|
||||
|
||||
static
|
||||
void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v,
|
||||
vector<RoseInstruction> &program) {
|
||||
@ -2947,25 +3179,8 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v,
|
||||
has_som = true;
|
||||
}
|
||||
|
||||
// Write program instructions for reports.
|
||||
for (ReportID id : g[v].reports) {
|
||||
assert(id < build.rm.numReports());
|
||||
const Report &ir = build.rm.getReport(id);
|
||||
if (isInternalSomReport(ir)) {
|
||||
auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM
|
||||
: ROSE_INSTR_REPORT_SOM_INT);
|
||||
ri.u.report.report = id;
|
||||
program.push_back(ri);
|
||||
} else if (ir.type == INTERNAL_ROSE_CHAIN) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_REPORT_CHAIN);
|
||||
ri.u.report.report = id;
|
||||
program.push_back(ri);
|
||||
} else {
|
||||
auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM_KNOWN
|
||||
: ROSE_INSTR_REPORT);
|
||||
ri.u.report.report = id;
|
||||
program.push_back(ri);
|
||||
}
|
||||
makeReport(build, id, has_som, program);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3093,10 +3308,10 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
|
||||
// Use the minimum literal length.
|
||||
u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v));
|
||||
|
||||
u32 min_bound = g[e].minBound + lit_length;
|
||||
u32 max_bound = g[e].maxBound == ROSE_BOUND_INF
|
||||
? ROSE_BOUND_INF
|
||||
: g[e].maxBound + lit_length;
|
||||
u64a min_bound = g[e].minBound + lit_length;
|
||||
u64a max_bound = g[e].maxBound == ROSE_BOUND_INF
|
||||
? ROSE_BOUND_INF
|
||||
: g[e].maxBound + lit_length;
|
||||
|
||||
if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
|
||||
assert(g[u].max_offset != ROSE_BOUND_INF);
|
||||
@ -3110,7 +3325,13 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
|
||||
assert(max_bound <= ROSE_BOUND_INF);
|
||||
assert(min_bound <= max_bound);
|
||||
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS);
|
||||
// CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET
|
||||
// (max value of a u64a) to represent ROSE_BOUND_INF.
|
||||
if (max_bound == ROSE_BOUND_INF) {
|
||||
max_bound = MAX_OFFSET;
|
||||
}
|
||||
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, JumpTarget::NEXT_BLOCK);
|
||||
ri.u.checkBounds.min_bound = min_bound;
|
||||
ri.u.checkBounds.max_bound = max_bound;
|
||||
|
||||
@ -3138,7 +3359,8 @@ vector<RoseInstruction> makeProgram(RoseBuildImpl &build, build_context &bc,
|
||||
|
||||
if (onlyAtEod(build, v)) {
|
||||
DEBUG_PRINTF("only at eod\n");
|
||||
program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD));
|
||||
program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD,
|
||||
JumpTarget::NEXT_BLOCK));
|
||||
}
|
||||
|
||||
if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
|
||||
@ -3287,7 +3509,8 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
|
||||
static
|
||||
void makeRoleCheckNotHandled(build_context &bc, RoseVertex v,
|
||||
vector<RoseInstruction> &program) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED);
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED,
|
||||
JumpTarget::NEXT_BLOCK);
|
||||
|
||||
u32 handled_key;
|
||||
if (contains(bc.handledKeys, v)) {
|
||||
@ -3328,48 +3551,42 @@ vector<RoseInstruction> makePredProgram(RoseBuildImpl &build, build_context &bc,
|
||||
static
|
||||
u32 addPredBlocksSingle(
|
||||
map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
|
||||
u32 curr_offset, vector<RoseInstruction> &program) {
|
||||
assert(predProgramLists.size() == 1);
|
||||
vector<RoseInstruction> &program) {
|
||||
|
||||
u32 pred_state = predProgramLists.begin()->first;
|
||||
auto subprog = flattenProgram(predProgramLists.begin()->second);
|
||||
vector<vector<RoseInstruction>> prog_blocks;
|
||||
|
||||
// Check our pred state.
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE);
|
||||
ri.u.checkState.index = pred_state;
|
||||
program.push_back(ri);
|
||||
curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
|
||||
for (const auto &m : predProgramLists) {
|
||||
const u32 &pred_state = m.first;
|
||||
auto subprog = flattenProgram(m.second);
|
||||
|
||||
// Add subprogram.
|
||||
for (const auto &ri : subprog) {
|
||||
program.push_back(ri);
|
||||
curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
|
||||
}
|
||||
|
||||
const u32 end_offset =
|
||||
curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
|
||||
|
||||
// Fix up the instruction operands.
|
||||
curr_offset = 0;
|
||||
for (size_t i = 0; i < program.size(); i++) {
|
||||
auto &ri = program[i];
|
||||
switch (ri.code()) {
|
||||
case ROSE_INSTR_CHECK_STATE:
|
||||
ri.u.checkState.fail_jump = end_offset - curr_offset;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
|
||||
// Check our pred state.
|
||||
auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE,
|
||||
JumpTarget::NEXT_BLOCK);
|
||||
ri.u.checkState.index = pred_state;
|
||||
subprog.insert(begin(subprog), ri);
|
||||
assert(subprog.back().code() == ROSE_INSTR_END);
|
||||
subprog.pop_back();
|
||||
prog_blocks.push_back(move(subprog));
|
||||
}
|
||||
|
||||
auto prog = flattenProgram(prog_blocks);
|
||||
program.insert(end(program), begin(prog), end(prog));
|
||||
return 0; // No iterator.
|
||||
}
|
||||
|
||||
static
|
||||
u32 programLength(const vector<RoseInstruction> &program) {
|
||||
u32 len = 0;
|
||||
for (const auto &ri : program) {
|
||||
len += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static
|
||||
u32 addPredBlocksMulti(build_context &bc,
|
||||
map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
|
||||
u32 curr_offset, vector<RoseInstruction> &program) {
|
||||
vector<RoseInstruction> &program) {
|
||||
assert(!predProgramLists.empty());
|
||||
|
||||
// First, add the iterator itself.
|
||||
@ -3386,10 +3603,12 @@ u32 addPredBlocksMulti(build_context &bc,
|
||||
|
||||
// Construct our program, starting with the SPARSE_ITER_BEGIN
|
||||
// instruction, keeping track of the jump offset for each sub-program.
|
||||
vector<RoseInstruction> sparse_program;
|
||||
vector<u32> jump_table;
|
||||
|
||||
program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN));
|
||||
curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
|
||||
sparse_program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN,
|
||||
JumpTarget::PROGRAM_END));
|
||||
u32 curr_offset = programLength(program) + programLength(sparse_program);
|
||||
|
||||
for (const auto &e : predProgramLists) {
|
||||
DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(),
|
||||
@ -3402,62 +3621,61 @@ u32 addPredBlocksMulti(build_context &bc,
|
||||
// with a SPARSE_ITER_NEXT.
|
||||
assert(!subprog.empty());
|
||||
assert(subprog.back().code() == ROSE_INSTR_END);
|
||||
subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT);
|
||||
subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT,
|
||||
JumpTarget::PROGRAM_END);
|
||||
}
|
||||
|
||||
for (const auto &ri : subprog) {
|
||||
program.push_back(ri);
|
||||
curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
|
||||
}
|
||||
curr_offset += programLength(subprog);
|
||||
insert(&sparse_program, end(sparse_program), subprog);
|
||||
}
|
||||
|
||||
const u32 end_offset =
|
||||
curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN);
|
||||
// Strip the END instruction from the last block.
|
||||
assert(sparse_program.back().code() == ROSE_INSTR_END);
|
||||
sparse_program.pop_back();
|
||||
|
||||
sparse_program = flattenProgram({sparse_program});
|
||||
|
||||
// Write the jump table into the bytecode.
|
||||
const u32 jump_table_offset =
|
||||
add_to_engine_blob(bc, begin(jump_table), end(jump_table));
|
||||
|
||||
// Fix up the instruction operands.
|
||||
// Write jump table and iterator offset into sparse iter instructions.
|
||||
auto keys_it = begin(keys);
|
||||
curr_offset = 0;
|
||||
for (size_t i = 0; i < program.size(); i++) {
|
||||
auto &ri = program[i];
|
||||
for (auto &ri : sparse_program) {
|
||||
switch (ri.code()) {
|
||||
case ROSE_INSTR_SPARSE_ITER_BEGIN:
|
||||
ri.u.sparseIterBegin.iter_offset = iter_offset;
|
||||
ri.u.sparseIterBegin.jump_table = jump_table_offset;
|
||||
ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset;
|
||||
break;
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT:
|
||||
ri.u.sparseIterNext.iter_offset = iter_offset;
|
||||
ri.u.sparseIterNext.jump_table = jump_table_offset;
|
||||
assert(keys_it != end(keys));
|
||||
ri.u.sparseIterNext.state = *keys_it++;
|
||||
ri.u.sparseIterNext.fail_jump = end_offset - curr_offset;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN);
|
||||
}
|
||||
|
||||
program.insert(end(program), begin(sparse_program), end(sparse_program));
|
||||
|
||||
return iter_offset;
|
||||
}
|
||||
|
||||
static
|
||||
u32 addPredBlocks(build_context &bc,
|
||||
map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
|
||||
u32 curr_offset, vector<RoseInstruction> &program,
|
||||
vector<RoseInstruction> &program,
|
||||
bool force_sparse_iter) {
|
||||
const size_t num_preds = predProgramLists.size();
|
||||
if (num_preds == 0) {
|
||||
program = flattenProgram({program});
|
||||
return 0; // No iterator.
|
||||
} else if (!force_sparse_iter && num_preds == 1) {
|
||||
return addPredBlocksSingle(predProgramLists, curr_offset, program);
|
||||
return addPredBlocksSingle(predProgramLists, program);
|
||||
} else {
|
||||
return addPredBlocksMulti(bc, predProgramLists, curr_offset, program);
|
||||
return addPredBlocksMulti(bc, predProgramLists, program);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3481,8 +3699,7 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
|
||||
// Add blocks to deal with non-root edges (triggered by sparse iterator or
|
||||
// mmbit_isset checks). This operation will flatten the program up to this
|
||||
// point.
|
||||
u32 iter_offset =
|
||||
addPredBlocks(bc, predProgramLists, curr_offset, program, false);
|
||||
u32 iter_offset = addPredBlocks(bc, predProgramLists, program, false);
|
||||
|
||||
// If we have a root program, replace the END instruction with it. Note
|
||||
// that the root program has already been flattened.
|
||||
@ -3823,10 +4040,8 @@ vector<RoseInstruction> makeEodAnchorProgram(RoseBuildImpl &build,
|
||||
makeRoleCheckNotHandled(bc, v, program);
|
||||
}
|
||||
|
||||
for (const auto &report : g[v].reports) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_REPORT_EOD);
|
||||
ri.u.report.report = report;
|
||||
program.push_back(ri);
|
||||
for (const auto &id : g[v].reports) {
|
||||
makeReport(build, id, false, program);
|
||||
}
|
||||
|
||||
return program;
|
||||
@ -3870,7 +4085,7 @@ pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
|
||||
|
||||
// Note: we force the use of a sparse iterator for the EOD program so we
|
||||
// can easily guard EOD execution at runtime.
|
||||
u32 iter_offset = addPredBlocks(bc, predProgramLists, 0, program, true);
|
||||
u32 iter_offset = addPredBlocks(bc, predProgramLists, program, true);
|
||||
|
||||
assert(program.size() > 1);
|
||||
return {writeProgram(bc, program), iter_offset};
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "nfa/nfa_dump_api.h"
|
||||
#include "nfa/nfa_internal.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/internal_report.h"
|
||||
#include "util/multibit_internal.h"
|
||||
#include "util/multibit.h"
|
||||
|
||||
@ -152,6 +153,61 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
vector<u32> sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) {
|
||||
vector<u32> keys;
|
||||
|
||||
if (num_bits == 0) {
|
||||
return keys;
|
||||
}
|
||||
|
||||
vector<u8> bits(mmbit_size(num_bits), u8{0xff}); // All bits on.
|
||||
vector<mmbit_sparse_state> state(MAX_SPARSE_ITER_STATES);
|
||||
|
||||
const u8 *b = bits.data();
|
||||
mmbit_sparse_state *s = state.data();
|
||||
|
||||
u32 idx = 0;
|
||||
u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s);
|
||||
while (i != MMB_INVALID) {
|
||||
keys.push_back(i);
|
||||
i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s);
|
||||
}
|
||||
|
||||
return keys;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpJumpTable(ofstream &os, const RoseEngine *t,
|
||||
const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) {
|
||||
auto *it =
|
||||
(const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset);
|
||||
auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table);
|
||||
|
||||
for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) {
|
||||
os << " " << std::setw(4) << std::setfill(' ') << key << " : +"
|
||||
<< *jumps << endl;
|
||||
++jumps;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpReport(ofstream &os, const RoseEngine *t, ReportID report) {
|
||||
const auto *ir =
|
||||
(const internal_report *)loadFromByteCodeOffset(t, t->intReportOffset) +
|
||||
report;
|
||||
os << " type=" << u32{ir->type};
|
||||
os << ", onmatch=" << ir->onmatch;
|
||||
if (ir->ekey != INVALID_EKEY) {
|
||||
os << ", ekey=" << ir->ekey;
|
||||
}
|
||||
if (ir->dkey != MO_INVALID_IDX) {
|
||||
os << ", dkey=" << ir->dkey;
|
||||
}
|
||||
|
||||
os << endl;
|
||||
}
|
||||
|
||||
static
|
||||
string dumpStrMask(const u8 *mask, size_t len) {
|
||||
ostringstream oss;
|
||||
@ -211,6 +267,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_BOUNDS) {
|
||||
os << " min_bound " << ri->min_bound << endl;
|
||||
os << " max_bound " << ri->max_bound << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_NOT_HANDLED) {
|
||||
os << " key " << ri->key << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
@ -239,6 +302,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CATCH_UP) {}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SOM_ADJUST) {
|
||||
os << " distance " << ri->distance << endl;
|
||||
}
|
||||
@ -250,6 +316,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SOM_FROM_REPORT) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SOM_ZERO) {}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(TRIGGER_INFIX) {
|
||||
os << " queue " << ri->queue << endl;
|
||||
os << " event " << ri->event << endl;
|
||||
@ -263,33 +338,72 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT) {
|
||||
PROGRAM_CASE(DEDUPE) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(DEDUPE_SOM) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_CHAIN) {
|
||||
os << " report " << ri->report << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_EOD) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_SOM_INT) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_SOM_AWARE) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_EXHAUST) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_SOM) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(REPORT_SOM_KNOWN) {
|
||||
PROGRAM_CASE(REPORT_SOM_EXHAUST) {
|
||||
os << " report " << ri->report << endl;
|
||||
dumpReport(os, t, ri->report);
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_EXHAUSTED) {
|
||||
os << " ekey " << ri->ekey << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(CHECK_MIN_LENGTH) {
|
||||
os << " end_adj " << ri->end_adj << endl;
|
||||
os << " min_length " << ri->min_length << endl;
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
@ -319,6 +433,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
PROGRAM_CASE(SPARSE_ITER_BEGIN) {
|
||||
os << " iter_offset " << ri->iter_offset << endl;
|
||||
os << " jump_table " << ri->jump_table << endl;
|
||||
dumpJumpTable(os, t, ri);
|
||||
os << " fail_jump +" << ri->fail_jump << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
@ -51,16 +51,33 @@ enum RoseInstructionCode {
|
||||
ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check.
|
||||
ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state.
|
||||
ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches.
|
||||
ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches.
|
||||
ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM.
|
||||
ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine.
|
||||
ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from an internal_report.
|
||||
ROSE_INSTR_SOM_ZERO, //!< Set SOM to zero.
|
||||
ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine.
|
||||
ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine.
|
||||
ROSE_INSTR_REPORT, //!< Fire an ordinary report.
|
||||
ROSE_INSTR_DEDUPE, //!< Run deduplication for report.
|
||||
ROSE_INSTR_DEDUPE_SOM, //!< Run deduplication for SOM report.
|
||||
ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV).
|
||||
ROSE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time.
|
||||
ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only.
|
||||
ROSE_INSTR_REPORT_SOM, //!< Manipulate SOM and report.
|
||||
ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset.
|
||||
ROSE_INSTR_REPORT_SOM_AWARE, //!< Manipulate SOM from SOM-aware source.
|
||||
|
||||
/** \brief Fire a report. */
|
||||
ROSE_INSTR_REPORT,
|
||||
|
||||
/** \brief Fire an exhaustible report. */
|
||||
ROSE_INSTR_REPORT_EXHAUST,
|
||||
|
||||
/** \brief Fire a SOM report. */
|
||||
ROSE_INSTR_REPORT_SOM,
|
||||
|
||||
/** \brief Fire an exhaustible SOM report. */
|
||||
ROSE_INSTR_REPORT_SOM_EXHAUST,
|
||||
|
||||
ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set.
|
||||
ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length.
|
||||
ROSE_INSTR_SET_STATE, //!< Switch a state index on.
|
||||
ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits.
|
||||
ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups.
|
||||
@ -106,8 +123,8 @@ struct ROSE_STRUCT_CHECK_ONLY_EOD {
|
||||
|
||||
struct ROSE_STRUCT_CHECK_BOUNDS {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 min_bound; //!< Min distance from zero.
|
||||
u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF).
|
||||
u64a min_bound; //!< Min distance from zero.
|
||||
u64a max_bound; //!< Max distance from zero.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
@ -138,6 +155,10 @@ struct ROSE_STRUCT_PUSH_DELAYED {
|
||||
u32 index; // Delay literal index (relative to first delay lit).
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CATCH_UP {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SOM_ADJUST {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 distance; //!< Distance to EOM.
|
||||
@ -149,6 +170,15 @@ struct ROSE_STRUCT_SOM_LEFTFIX {
|
||||
u32 lag; //!< Lag of leftfix for this case.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SOM_FROM_REPORT {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report; //!< EXTERNAL_CALLBACK_SOM_* report to use.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SOM_ZERO {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_TRIGGER_INFIX {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u8 cancel; //!< Cancels previous top event.
|
||||
@ -162,9 +192,16 @@ struct ROSE_STRUCT_TRIGGER_SUFFIX {
|
||||
u32 event; //!< Queue event, from MQE_*.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT {
|
||||
struct ROSE_STRUCT_DEDUPE {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_DEDUPE_SOM {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_CHAIN {
|
||||
@ -172,26 +209,54 @@ struct ROSE_STRUCT_REPORT_CHAIN {
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_EOD {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_SOM_INT {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_SOM_AWARE {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_EXHAUST {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_SOM {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_SOM_KNOWN {
|
||||
struct ROSE_STRUCT_REPORT_SOM_EXHAUST {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_REPORT_SOM_EXT {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
ReportID report;
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_EXHAUSTED {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 ekey; //!< Exhaustion key to check.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_CHECK_MIN_LENGTH {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
s32 end_adj; //!< Offset adjustment to add to EOM first.
|
||||
u64a min_length; //!< Minimum distance from SOM to EOM.
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SET_STATE {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 index; //!< State index in multibit.
|
||||
|
314
src/runtime.c
314
src/runtime.c
@ -47,6 +47,7 @@
|
||||
#include "rose/rose.h"
|
||||
#include "rose/runtime.h"
|
||||
#include "database.h"
|
||||
#include "report.h"
|
||||
#include "scratch.h"
|
||||
#include "som/som_runtime.h"
|
||||
#include "som/som_stream.h"
|
||||
@ -56,8 +57,6 @@
|
||||
#include "util/fatbit.h"
|
||||
#include "util/multibit.h"
|
||||
|
||||
#define DEDUPE_MATCHES
|
||||
|
||||
static really_inline
|
||||
void prefetch_data(const char *data, unsigned length) {
|
||||
__builtin_prefetch(data);
|
||||
@ -170,306 +169,6 @@ void setBroken(char *state, u8 broken) {
|
||||
ts->broken = broken;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch,
|
||||
char is_simple, char do_som) {
|
||||
assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
|
||||
assert(scratch);
|
||||
assert(scratch->magic == SCRATCH_MAGIC);
|
||||
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
const struct RoseEngine *rose = ci->rose;
|
||||
DEBUG_PRINTF("internal report %u\n", id);
|
||||
const struct internal_report *ri = getInternalReport(rose, id);
|
||||
|
||||
assert(isExternalReport(ri)); /* only external reports should reach here */
|
||||
|
||||
s32 offset_adj = ri->offsetAdjust;
|
||||
UNUSED u32 dkey = ri->dkey;
|
||||
u64a to_offset = offset;
|
||||
u64a from_offset = 0;
|
||||
UNUSED u32 dkeyCount = rose->dkeyCount;
|
||||
|
||||
u32 flags = 0;
|
||||
#ifndef RELEASE_BUILD
|
||||
if (offset_adj) {
|
||||
// alert testing tools that we've got adjusted matches
|
||||
flags |= HS_MATCH_FLAG_ADJUSTED;
|
||||
}
|
||||
#endif
|
||||
|
||||
DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
|
||||
"offsetAdj=%d\n", offset, id, ri->type, ri->onmatch,
|
||||
offset_adj);
|
||||
|
||||
if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */
|
||||
DEBUG_PRINTF("pre broken - halting\n");
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
if (!is_simple && ri->hasBounds) {
|
||||
assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET);
|
||||
assert(ri->minOffset <= ri->maxOffset);
|
||||
if (offset < ri->minOffset || offset > ri->maxOffset) {
|
||||
DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n",
|
||||
offset, ri->minOffset, ri->maxOffset);
|
||||
return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) {
|
||||
DEBUG_PRINTF("ate exhausted match\n");
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
if (ri->type == EXTERNAL_CALLBACK) {
|
||||
from_offset = 0;
|
||||
} else if (do_som) {
|
||||
from_offset = handleSomExternal(scratch, ri, to_offset);
|
||||
}
|
||||
|
||||
to_offset += offset_adj;
|
||||
assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
|
||||
|
||||
if (do_som && ri->minLength) {
|
||||
if (from_offset != HS_OFFSET_PAST_HORIZON &&
|
||||
(to_offset - from_offset < ri->minLength)) {
|
||||
return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
|
||||
}
|
||||
if (ri->quashSom) {
|
||||
from_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
|
||||
from_offset, to_offset, ri->onmatch, ci->userContext);
|
||||
|
||||
int halt = 0;
|
||||
|
||||
if (do_som || dkey != MO_INVALID_IDX) {
|
||||
if (offset != scratch->deduper.current_report_offset) {
|
||||
assert(scratch->deduper.current_report_offset == ~0ULL ||
|
||||
scratch->deduper.current_report_offset < offset);
|
||||
if (offset == scratch->deduper.current_report_offset + 1) {
|
||||
fatbit_clear(scratch->deduper.log[offset % 2]);
|
||||
} else {
|
||||
fatbit_clear(scratch->deduper.log[0]);
|
||||
fatbit_clear(scratch->deduper.log[1]);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("adj dedupe offset %hhd\n", do_som);
|
||||
if (do_som) {
|
||||
halt = flushStoredSomMatches(scratch, offset);
|
||||
if (halt) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
scratch->deduper.current_report_offset = offset;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEDUPE_MATCHES
|
||||
if (dkey != MO_INVALID_IDX) {
|
||||
if (ri->type == EXTERNAL_CALLBACK || ri->quashSom) {
|
||||
DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
|
||||
assert(offset_adj == 0 || offset_adj == -1);
|
||||
if (fatbit_set(scratch->deduper.log[to_offset % 2], dkeyCount,
|
||||
dkey)) {
|
||||
/* we have already raised this report at this offset, squash dupe
|
||||
* match. */
|
||||
DEBUG_PRINTF("dedupe\n");
|
||||
goto exit;
|
||||
}
|
||||
} else if (do_som) {
|
||||
/* SOM external event */
|
||||
DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
|
||||
assert(offset_adj == 0 || offset_adj == -1);
|
||||
u64a *starts = scratch->deduper.som_start_log[to_offset % 2];
|
||||
if (fatbit_set(scratch->deduper.som_log[to_offset % 2], dkeyCount,
|
||||
dkey)) {
|
||||
starts[dkey] = MIN(starts[dkey], from_offset);
|
||||
} else {
|
||||
starts[dkey] = from_offset;
|
||||
}
|
||||
|
||||
if (offset_adj) {
|
||||
scratch->deduper.som_log_dirty |= 1;
|
||||
} else {
|
||||
scratch->deduper.som_log_dirty |= 2;
|
||||
}
|
||||
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset,
|
||||
flags, ci->userContext);
|
||||
#ifdef DEDUPE_MATCHES
|
||||
exit:
|
||||
#endif
|
||||
if (halt) {
|
||||
DEBUG_PRINTF("callback requested to terminate matches\n");
|
||||
|
||||
setBroken(ci->state, BROKEN_FROM_USER);
|
||||
ci->broken = BROKEN_FROM_USER;
|
||||
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
if (!is_simple && ri->ekey != END_EXHAUST) {
|
||||
markAsMatched(ci->exhaustionVector, ri->ekey);
|
||||
return MO_CONTINUE_MATCHING;
|
||||
} else {
|
||||
return ROSE_CONTINUE_MATCHING_NO_EXHAUST;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id,
|
||||
struct hs_scratch *scratch, char is_simple) {
|
||||
assert(id != MO_INVALID_IDX); // Should never get an invalid ID.
|
||||
assert(scratch);
|
||||
assert(scratch->magic == SCRATCH_MAGIC);
|
||||
|
||||
u32 flags = 0;
|
||||
|
||||
struct core_info *ci = &scratch->core_info;
|
||||
const struct RoseEngine *rose = ci->rose;
|
||||
const struct internal_report *ri = getInternalReport(rose, id);
|
||||
|
||||
/* internal events should be handled by rose directly */
|
||||
assert(ri->type == EXTERNAL_CALLBACK);
|
||||
|
||||
DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u "
|
||||
"offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch,
|
||||
ri->offsetAdjust);
|
||||
|
||||
if (unlikely(can_stop_matching(scratch))) {
|
||||
DEBUG_PRINTF("pre broken - halting\n");
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
if (!is_simple && ri->hasBounds) {
|
||||
assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET);
|
||||
if (to_offset < ri->minOffset || to_offset > ri->maxOffset) {
|
||||
DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n",
|
||||
to_offset, ri->minOffset, ri->maxOffset);
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
}
|
||||
|
||||
int halt = 0;
|
||||
|
||||
if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) {
|
||||
DEBUG_PRINTF("ate exhausted match\n");
|
||||
goto do_return;
|
||||
}
|
||||
|
||||
#ifdef DEDUPE_MATCHES
|
||||
u64a offset = to_offset;
|
||||
#endif
|
||||
|
||||
to_offset += ri->offsetAdjust;
|
||||
assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset);
|
||||
|
||||
if (!is_simple && ri->minLength) {
|
||||
if (from_offset != HS_OFFSET_PAST_HORIZON &&
|
||||
(to_offset - from_offset < ri->minLength)) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
if (ri->quashSom) {
|
||||
from_offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n",
|
||||
from_offset, to_offset, ri->onmatch, ci->userContext);
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
if (ri->offsetAdjust != 0) {
|
||||
// alert testing tools that we've got adjusted matches
|
||||
flags |= HS_MATCH_FLAG_ADJUSTED;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEDUPE_MATCHES
|
||||
u32 dkeyCount = rose->dkeyCount;
|
||||
|
||||
if (offset != scratch->deduper.current_report_offset) {
|
||||
|
||||
assert(scratch->deduper.current_report_offset == ~0ULL
|
||||
|| scratch->deduper.current_report_offset < offset);
|
||||
if (offset == scratch->deduper.current_report_offset + 1) {
|
||||
fatbit_clear(scratch->deduper.log[offset % 2]);
|
||||
} else {
|
||||
fatbit_clear(scratch->deduper.log[0]);
|
||||
fatbit_clear(scratch->deduper.log[1]);
|
||||
}
|
||||
|
||||
halt = flushStoredSomMatches(scratch, offset);
|
||||
if (halt) {
|
||||
goto do_return;
|
||||
}
|
||||
|
||||
scratch->deduper.current_report_offset = offset;
|
||||
}
|
||||
|
||||
u32 dkey = ri->dkey;
|
||||
if (dkey != MO_INVALID_IDX) {
|
||||
if (ri->quashSom) {
|
||||
DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
|
||||
assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1);
|
||||
if (fatbit_set(scratch->deduper.log[to_offset % 2], dkeyCount,
|
||||
dkey)) {
|
||||
/* we have already raised this report at this offset, squash
|
||||
* dupe match. */
|
||||
DEBUG_PRINTF("dedupe\n");
|
||||
goto do_return;
|
||||
}
|
||||
} else {
|
||||
/* SOM external event */
|
||||
DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset);
|
||||
assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1);
|
||||
u64a *starts = scratch->deduper.som_start_log[to_offset % 2];
|
||||
if (fatbit_set(scratch->deduper.som_log[to_offset % 2], dkeyCount,
|
||||
dkey)) {
|
||||
starts[dkey] = MIN(starts[dkey], from_offset);
|
||||
} else {
|
||||
starts[dkey] = from_offset;
|
||||
}
|
||||
|
||||
if (ri->offsetAdjust) {
|
||||
scratch->deduper.som_log_dirty |= 1;
|
||||
} else {
|
||||
scratch->deduper.som_log_dirty |= 2;
|
||||
}
|
||||
|
||||
goto do_return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset,
|
||||
flags, ci->userContext);
|
||||
|
||||
if (!is_simple) {
|
||||
markAsMatched(ci->exhaustionVector, ri->ekey);
|
||||
}
|
||||
|
||||
do_return:
|
||||
if (halt) {
|
||||
DEBUG_PRINTF("callback requested to terminate matches\n");
|
||||
|
||||
setBroken(ci->state, BROKEN_FROM_USER);
|
||||
ci->broken = BROKEN_FROM_USER;
|
||||
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlmcb_rv_t multiDirectAdaptor(u64a real_end, ReportID direct_id, void *context,
|
||||
struct core_info *ci, char is_simple,
|
||||
@ -1055,8 +754,7 @@ hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags,
|
||||
static really_inline
|
||||
void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
|
||||
const struct RoseEngine *rose = id->rose;
|
||||
char *state = getMultiState(id);
|
||||
u8 broken = getBroken(state);
|
||||
u8 broken = scratch->core_info.broken;
|
||||
|
||||
if (broken) {
|
||||
DEBUG_PRINTF("stream already broken\n");
|
||||
@ -1076,8 +774,7 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
|
||||
static never_inline
|
||||
void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
|
||||
const struct RoseEngine *t = id->rose;
|
||||
char *state = getMultiState(id);
|
||||
u8 broken = getBroken(state);
|
||||
u8 broken = scratch->core_info.broken;
|
||||
|
||||
if (broken) {
|
||||
DEBUG_PRINTF("stream already broken\n");
|
||||
@ -1372,9 +1069,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
|
||||
if (!id->offset && rose->boundary.reportZeroOffset) {
|
||||
DEBUG_PRINTF("zero reports\n");
|
||||
processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch);
|
||||
broken = getBroken(state);
|
||||
broken = scratch->core_info.broken;
|
||||
if (unlikely(broken)) {
|
||||
DEBUG_PRINTF("stream is broken, halting scan\n");
|
||||
setBroken(state, broken);
|
||||
if (broken == BROKEN_FROM_USER) {
|
||||
return HS_SCAN_TERMINATED;
|
||||
} else {
|
||||
@ -1400,7 +1098,6 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
|
||||
if (rose->hasSom && !told_to_stop_matching(scratch)) {
|
||||
int halt = flushStoredSomMatches(scratch, ~0ULL);
|
||||
if (halt) {
|
||||
setBroken(state, BROKEN_FROM_USER);
|
||||
scratch->core_info.broken = BROKEN_FROM_USER;
|
||||
}
|
||||
}
|
||||
@ -1413,6 +1110,7 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
|
||||
storeSomToStream(scratch, id->offset);
|
||||
}
|
||||
} else if (told_to_stop_matching(scratch)) {
|
||||
setBroken(state, BROKEN_FROM_USER);
|
||||
return HS_SCAN_TERMINATED;
|
||||
} else { /* exhausted */
|
||||
setBroken(state, BROKEN_EXHAUSTED);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -487,6 +487,7 @@ int clearSomLog(struct hs_scratch *scratch, u64a offset, struct fatbit *log,
|
||||
int halt = ci->userCallback(onmatch, from_offset, offset, flags,
|
||||
ci->userContext);
|
||||
if (halt) {
|
||||
ci->broken = BROKEN_FROM_USER;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -184,6 +184,11 @@ bool isExternalReport(const Report &r) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline
|
||||
bool isExternalSomReport(const Report &r) {
|
||||
return r.type != EXTERNAL_CALLBACK && isExternalReport(r);
|
||||
}
|
||||
|
||||
static inline
|
||||
bool operator<(const Report &a, const Report &b) {
|
||||
ORDER_CHECK(type);
|
||||
|
Loading…
x
Reference in New Issue
Block a user