diff --git a/CMakeLists.txt b/CMakeLists.txt index 1abab0fe..7ae459d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -384,6 +384,7 @@ set (hs_exec_SRCS src/ue2common.h src/alloc.c src/allocator.h + src/report.h src/runtime.c src/fdr/fdr.c src/fdr/fdr.h diff --git a/src/report.h b/src/report.h new file mode 100644 index 00000000..2fff3b9a --- /dev/null +++ b/src/report.h @@ -0,0 +1,531 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Runtime functions to do with reports, inlined into callers. + */ + +#ifndef REPORT_H +#define REPORT_H + +#include "hs_internal.h" +#include "hs_runtime.h" +#include "scratch.h" +#include "ue2common.h" +#include "nfa/callback.h" +#include "nfa/nfa_internal.h" +#include "rose/runtime.h" +#include "som/som_runtime.h" +#include "util/exhaust.h" +#include "util/fatbit.h" +#include "util/internal_report.h" + +static really_inline +int satisfiesMinLength(u64a min_len, u64a from_offset, + u64a to_offset) { + assert(min_len); + + if (from_offset == HS_OFFSET_PAST_HORIZON) { + DEBUG_PRINTF("SOM beyond horizon\n"); + return 1; + } + + DEBUG_PRINTF("match len=%llu, min len=%llu\n", to_offset - from_offset, + min_len); + return to_offset - from_offset >= min_len; +} + +enum DedupeResult { + DEDUPE_CONTINUE, //!< Continue with match, not a dupe. + DEDUPE_SKIP, //!< Don't report this match, dupe or delayed due to SOM. + DEDUPE_HALT //!< User instructed us to stop matching. +}; + +static really_inline +enum DedupeResult dedupeCatchup(const struct RoseEngine *rose, + const struct internal_report *ri, + struct hs_scratch *scratch, u64a offset, + u64a from_offset, u64a to_offset, + const char do_som) { + DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u, do_som=%d\n", offset, + from_offset, to_offset, ri->dkey, do_som); + DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom); + const u32 dkey = ri->dkey; + if (!do_som && dkey == MO_INVALID_IDX) { + DEBUG_PRINTF("nothing to do\n"); + return DEDUPE_CONTINUE; + } + + struct match_deduper *deduper = &scratch->deduper; + if (offset != deduper->current_report_offset) { + assert(deduper->current_report_offset == ~0ULL || + deduper->current_report_offset < offset); + if (offset == deduper->current_report_offset + 1) { + fatbit_clear(deduper->log[offset % 2]); + } else { + fatbit_clear(deduper->log[0]); + fatbit_clear(deduper->log[1]); + } + + if (do_som && flushStoredSomMatches(scratch, offset)) { + return DEDUPE_HALT; + } + deduper->current_report_offset = offset; + } + + if (dkey != MO_INVALID_IDX) { + const u32 dkeyCount = rose->dkeyCount; + const s32 offset_adj = ri->offsetAdjust; + if (ri->type == EXTERNAL_CALLBACK || ri->quashSom) { + DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); + assert(offset_adj == 0 || offset_adj == -1); + if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) { + /* we have already raised this report at this offset, squash + * dupe match. */ + DEBUG_PRINTF("dedupe\n"); + return DEDUPE_SKIP; + } + } else if (do_som) { + /* SOM external event */ + DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); + assert(offset_adj == 0 || offset_adj == -1); + u64a *starts = deduper->som_start_log[to_offset % 2]; + if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) { + starts[dkey] = MIN(starts[dkey], from_offset); + } else { + starts[dkey] = from_offset; + } + DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]); + + if (offset_adj) { + deduper->som_log_dirty |= 1; + } else { + deduper->som_log_dirty |= 2; + } + + return DEDUPE_SKIP; + } + } + + return DEDUPE_CONTINUE; +} + +static really_inline +enum DedupeResult dedupeCatchupSom(const struct RoseEngine *rose, + const struct internal_report *ri, + struct hs_scratch *scratch, u64a offset, + u64a from_offset, u64a to_offset) { + DEBUG_PRINTF("offset=%llu, match=[%llu,%llu], dkey=%u\n", offset, + from_offset, to_offset, ri->dkey); + DEBUG_PRINTF("report type=%u, quashSom=%d\n", ri->type, ri->quashSom); + + struct match_deduper *deduper = &scratch->deduper; + if (offset != deduper->current_report_offset) { + assert(deduper->current_report_offset == ~0ULL || + deduper->current_report_offset < offset); + if (offset == deduper->current_report_offset + 1) { + fatbit_clear(deduper->log[offset % 2]); + } else { + fatbit_clear(deduper->log[0]); + fatbit_clear(deduper->log[1]); + } + + if (flushStoredSomMatches(scratch, offset)) { + return DEDUPE_HALT; + } + deduper->current_report_offset = offset; + } + + const u32 dkey = ri->dkey; + if (dkey != MO_INVALID_IDX) { + const u32 dkeyCount = rose->dkeyCount; + const s32 offset_adj = ri->offsetAdjust; + if (ri->quashSom) { + DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); + assert(offset_adj == 0 || offset_adj == -1); + if (fatbit_set(deduper->log[to_offset % 2], dkeyCount, dkey)) { + /* we have already raised this report at this offset, squash + * dupe match. */ + DEBUG_PRINTF("dedupe\n"); + return DEDUPE_SKIP; + } + } else { + /* SOM external event */ + DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); + assert(offset_adj == 0 || offset_adj == -1); + u64a *starts = deduper->som_start_log[to_offset % 2]; + if (fatbit_set(deduper->som_log[to_offset % 2], dkeyCount, dkey)) { + starts[dkey] = MIN(starts[dkey], from_offset); + } else { + starts[dkey] = from_offset; + } + DEBUG_PRINTF("starts[%u]=%llu\n", dkey, starts[dkey]); + + if (offset_adj) { + deduper->som_log_dirty |= 1; + } else { + deduper->som_log_dirty |= 2; + } + + return DEDUPE_SKIP; + } + } + + return DEDUPE_CONTINUE; +} + +static really_inline +int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, + char is_simple, char do_som) { + assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + struct core_info *ci = &scratch->core_info; + const struct RoseEngine *rose = ci->rose; + DEBUG_PRINTF("internal report %u\n", id); + const struct internal_report *ri = getInternalReport(rose, id); + + assert(isExternalReport(ri)); /* only external reports should reach here */ + + s32 offset_adj = ri->offsetAdjust; + u64a to_offset = offset; + u64a from_offset = 0; + + u32 flags = 0; +#ifndef RELEASE_BUILD + if (offset_adj) { + // alert testing tools that we've got adjusted matches + flags |= HS_MATCH_FLAG_ADJUSTED; + } +#endif + + DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " + "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch, + offset_adj); + + if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */ + DEBUG_PRINTF("pre broken - halting\n"); + return MO_HALT_MATCHING; + } + + if (!is_simple && ri->hasBounds) { + assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); + assert(ri->minOffset <= ri->maxOffset); + if (offset < ri->minOffset || offset > ri->maxOffset) { + DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", + offset, ri->minOffset, ri->maxOffset); + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } + } + + if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { + DEBUG_PRINTF("ate exhausted match\n"); + return MO_CONTINUE_MATCHING; + } + + if (ri->type == EXTERNAL_CALLBACK) { + from_offset = 0; + } else if (do_som) { + from_offset = handleSomExternal(scratch, ri, to_offset); + } + + to_offset += offset_adj; + assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); + + if (do_som && ri->minLength) { + if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) { + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } + if (ri->quashSom) { + from_offset = 0; + } + } + + DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", + from_offset, to_offset, ri->onmatch, ci->userContext); + + int halt = 0; + + enum DedupeResult dedupe_rv = dedupeCatchup(rose, ri, scratch, offset, + from_offset, to_offset, do_som); + switch (dedupe_rv) { + case DEDUPE_HALT: + halt = 1; + goto exit; + case DEDUPE_SKIP: + halt = 0; + goto exit; + case DEDUPE_CONTINUE: + break; + } + + halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, + flags, ci->userContext); +exit: + if (halt) { + DEBUG_PRINTF("callback requested to terminate matches\n"); + ci->broken = BROKEN_FROM_USER; + return MO_HALT_MATCHING; + } + + if (!is_simple && ri->ekey != END_EXHAUST) { + markAsMatched(ci->exhaustionVector, ri->ekey); + return MO_CONTINUE_MATCHING; + } else { + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } +} + +/** + * \brief Deliver the given report to the user callback. + * + * Assumes all preconditions (bounds, exhaustion etc) have been checked and + * that dedupe catchup has been done. + */ +static really_inline +int roseDeliverReport(u64a offset, ReportID id, struct hs_scratch *scratch, + char is_exhaustible) { + assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + struct core_info *ci = &scratch->core_info; + const struct RoseEngine *rose = ci->rose; + DEBUG_PRINTF("internal report %u\n", id); + const struct internal_report *ri = getInternalReport(rose, id); + + assert(isExternalReport(ri)); /* only external reports should reach here */ + + const s32 offset_adj = ri->offsetAdjust; + u32 flags = 0; +#ifndef RELEASE_BUILD + if (offset_adj) { + // alert testing tools that we've got adjusted matches + flags |= HS_MATCH_FLAG_ADJUSTED; + } +#endif + + DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " + "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch, + offset_adj); + + assert(!can_stop_matching(scratch)); + assert(!ri->hasBounds || + (offset >= ri->minOffset && offset <= ri->maxOffset)); + assert(ri->type == EXTERNAL_CALLBACK); + assert(!ri->minLength); + assert(!ri->quashSom); + assert(ri->ekey == INVALID_EKEY || + !isExhausted(ci->exhaustionVector, ri->ekey)); + + u64a from_offset = 0; + u64a to_offset = offset + offset_adj; + + DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", + from_offset, to_offset, ri->onmatch, ci->userContext); + + int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, + to_offset, flags, ci->userContext); + if (halt) { + DEBUG_PRINTF("callback requested to terminate matches\n"); + ci->broken = BROKEN_FROM_USER; + return MO_HALT_MATCHING; + } + + if (is_exhaustible) { + assert(ri->ekey != INVALID_EKEY); + markAsMatched(ci->exhaustionVector, ri->ekey); + return MO_CONTINUE_MATCHING; + } else { + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } +} + +static really_inline +int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, + struct hs_scratch *scratch, char is_simple) { + assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + u32 flags = 0; + + struct core_info *ci = &scratch->core_info; + const struct RoseEngine *rose = ci->rose; + const struct internal_report *ri = getInternalReport(rose, id); + + /* internal events should be handled by rose directly */ + assert(ri->type == EXTERNAL_CALLBACK); + + DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " + "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch, + ri->offsetAdjust); + + if (unlikely(can_stop_matching(scratch))) { + DEBUG_PRINTF("pre broken - halting\n"); + return MO_HALT_MATCHING; + } + + if (!is_simple && ri->hasBounds) { + assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); + if (to_offset < ri->minOffset || to_offset > ri->maxOffset) { + DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", + to_offset, ri->minOffset, ri->maxOffset); + return MO_CONTINUE_MATCHING; + } + } + + int halt = 0; + + if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { + DEBUG_PRINTF("ate exhausted match\n"); + goto exit; + } + + u64a offset = to_offset; + + to_offset += ri->offsetAdjust; + assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); + + if (!is_simple && ri->minLength) { + if (!satisfiesMinLength(ri->minLength, from_offset, to_offset)) { + return MO_CONTINUE_MATCHING; + } + if (ri->quashSom) { + from_offset = 0; + } + } + + DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", + from_offset, to_offset, ri->onmatch, ci->userContext); + +#ifndef RELEASE_BUILD + if (ri->offsetAdjust != 0) { + // alert testing tools that we've got adjusted matches + flags |= HS_MATCH_FLAG_ADJUSTED; + } +#endif + + enum DedupeResult dedupe_rv = + dedupeCatchupSom(rose, ri, scratch, offset, from_offset, to_offset); + switch (dedupe_rv) { + case DEDUPE_HALT: + halt = 1; + goto exit; + case DEDUPE_SKIP: + halt = 0; + goto exit; + case DEDUPE_CONTINUE: + break; + } + + halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, + flags, ci->userContext); + + if (!is_simple) { + markAsMatched(ci->exhaustionVector, ri->ekey); + } + +exit: + if (halt) { + DEBUG_PRINTF("callback requested to terminate matches\n"); + ci->broken = BROKEN_FROM_USER; + return MO_HALT_MATCHING; + } + + return MO_CONTINUE_MATCHING; +} + +/** + * \brief Deliver the given SOM report to the user callback. + * + * Assumes all preconditions (bounds, exhaustion etc) have been checked and + * that dedupe catchup has been done. + */ +static really_inline +int roseDeliverSomReport(u64a from_offset, u64a to_offset, ReportID id, + struct hs_scratch *scratch, char is_exhaustible) { + assert(id != MO_INVALID_IDX); // Should never get an invalid ID. + assert(scratch); + assert(scratch->magic == SCRATCH_MAGIC); + + u32 flags = 0; + + struct core_info *ci = &scratch->core_info; + const struct RoseEngine *rose = ci->rose; + const struct internal_report *ri = getInternalReport(rose, id); + + assert(isExternalReport(ri)); /* only external reports should reach here */ + + DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " + "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch, + ri->offsetAdjust); + + assert(!can_stop_matching(scratch)); + assert(!ri->hasBounds || + (to_offset >= ri->minOffset && to_offset <= ri->maxOffset)); + assert(ri->ekey == INVALID_EKEY || + !isExhausted(ci->exhaustionVector, ri->ekey)); + + to_offset += ri->offsetAdjust; + assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); + + assert(!ri->minLength || + satisfiesMinLength(ri->minLength, from_offset, to_offset)); + assert(!ri->quashSom || from_offset == 0); + + DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", + from_offset, to_offset, ri->onmatch, ci->userContext); + +#ifndef RELEASE_BUILD + if (ri->offsetAdjust != 0) { + // alert testing tools that we've got adjusted matches + flags |= HS_MATCH_FLAG_ADJUSTED; + } +#endif + + int halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, + to_offset, flags, ci->userContext); + + if (halt) { + DEBUG_PRINTF("callback requested to terminate matches\n"); + ci->broken = BROKEN_FROM_USER; + return MO_HALT_MATCHING; + } + + if (is_exhaustible) { + assert(ri->ekey != INVALID_EKEY); + markAsMatched(ci->exhaustionVector, ri->ekey); + return MO_CONTINUE_MATCHING; + } else { + return ROSE_CONTINUE_MATCHING_NO_EXHAUST; + } +} + +#endif // REPORT_H diff --git a/src/rose/eod.c b/src/rose/eod.c index 7bbf8faf..ade45727 100644 --- a/src/rose/eod.c +++ b/src/rose/eod.c @@ -47,9 +47,9 @@ void initContext(const struct RoseEngine *t, char *state, u64a offset, tctxt->cb = callback; tctxt->cb_som = som_callback; tctxt->lastMatchOffset = 0; - tctxt->minMatchOffset = 0; - tctxt->minNonMpvMatchOffset = 0; - tctxt->next_mpv_offset = 0; + tctxt->minMatchOffset = offset; + tctxt->minNonMpvMatchOffset = offset; + tctxt->next_mpv_offset = offset; tctxt->curr_anchored_loc = MMB_INVALID; tctxt->curr_row_offset = 0; @@ -146,14 +146,16 @@ int eodNfaSomCallback(u64a from_offset, u64a to_offset, ReportID report, /** * \brief Check for (and deliver) reports from active output-exposed (suffix * or outfix) NFAs. + * + * \return MO_HALT_MATCHING if the user instructs us to stop. */ static rose_inline -void roseCheckNfaEod(const struct RoseEngine *t, char *state, +int roseCheckNfaEod(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u64a offset, const char is_streaming) { if (!t->eodNfaIterOffset) { DEBUG_PRINTF("no engines that report at EOD\n"); - return; + return MO_CONTINUE_MATCHING; } /* data, len is used for state decompress, should be full available data */ @@ -194,9 +196,11 @@ void roseCheckNfaEod(const struct RoseEngine *t, char *state, eodNfaSomCallback, scratch) == MO_HALT_MATCHING) { DEBUG_PRINTF("user instructed us to stop\n"); - return; + return MO_HALT_MATCHING; } } + + return MO_CONTINUE_MATCHING; } static rose_inline @@ -283,7 +287,10 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, return; } - roseCheckNfaEod(t, state, scratch, offset, is_streaming); + if (roseCheckNfaEod(t, state, scratch, offset, is_streaming) == + MO_HALT_MATCHING) { + return; + } if (!t->eodIterProgramOffset && !t->ematcherOffset) { DEBUG_PRINTF("no eod accepts\n"); @@ -291,8 +298,7 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, } // Handle pending EOD reports. - int itrv = roseEodRunIterator(t, offset, scratch); - if (itrv == MO_HALT_MATCHING) { + if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) { return; } @@ -303,15 +309,17 @@ void roseEodExec_i(const struct RoseEngine *t, char *state, u64a offset, mmbit_clear(getRoleState(state), t->rolesWithStateCount); mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount); - hwlmcb_rv_t rv = roseEodRunMatcher(t, offset, scratch, is_streaming); - if (rv == HWLM_TERMINATE_MATCHING) { + if (roseEodRunMatcher(t, offset, scratch, is_streaming) == + HWLM_TERMINATE_MATCHING) { return; } cleanupAfterEodMatcher(t, state, offset, scratch); // Fire any new EOD reports. - roseEodRunIterator(t, offset, scratch); + if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) { + return; + } roseCheckEodSuffixes(t, state, offset, scratch); } diff --git a/src/rose/match.c b/src/rose/match.c index 6397b90e..a91c0365 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -253,33 +253,53 @@ event_enqueued: return HWLM_CONTINUE_MATCHING; } +/* handles the firing of external matches */ +static rose_inline +hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, ReportID id, u64a end, + struct hs_scratch *scratch) { + struct RoseContext *tctxt = &scratch->tctxt; + + assert(end == tctxt->minMatchOffset); + DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end); + updateLastMatchOffset(tctxt, end); + + int cb_rv = tctxt->cb(end, id, scratch); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + return roseHaltIfExhausted(t, scratch); +} + /* handles catchup, som, cb, etc */ static really_inline hwlmcb_rv_t roseHandleReport(const struct RoseEngine *t, char *state, struct RoseContext *tctxt, ReportID id, u64a offset, char in_anchored) { + struct hs_scratch *scratch = tctxtToScratch(tctxt); + + if (roseCatchUpTo(t, state, offset, scratch, in_anchored) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + const struct internal_report *ri = getInternalReport(t, id); - if (ri) { - // Mildly cheesy performance hack: if this report is already exhausted, - // we can quash the match here. - if (ri->ekey != INVALID_EKEY) { - const struct hs_scratch *scratch = tctxtToScratch(tctxt); - if (isExhausted(scratch->core_info.exhaustionVector, ri->ekey)) { - DEBUG_PRINTF("eating exhausted match (report %u, ekey %u)\n", - ri->onmatch, ri->ekey); - return HWLM_CONTINUE_MATCHING; - } - } - if (isInternalSomReport(ri)) { - return roseHandleSom(t, state, id, offset, tctxt, in_anchored); + roseHandleSom(t, scratch, id, offset); + return HWLM_CONTINUE_MATCHING; } else if (ri->type == INTERNAL_ROSE_CHAIN) { return roseCatchUpAndHandleChainMatch(t, state, id, offset, tctxt, in_anchored); } } - return roseHandleMatch(t, state, id, offset, tctxt, in_anchored); + + return roseHandleMatch(t, id, offset, scratch); } static really_inline diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h index f76689f4..1e1356e1 100644 --- a/src/rose/program_runtime.h +++ b/src/rose/program_runtime.h @@ -34,6 +34,7 @@ #include "infix.h" #include "match.h" #include "miracle.h" +#include "report.h" #include "rose.h" #include "rose_internal.h" #include "rose_program.h" @@ -566,29 +567,20 @@ void roseTriggerInfix(const struct RoseEngine *t, u64a start, u64a end, u32 qi, pushQueueSom(q, topEvent, loc, start); } -/* handles the firing of external matches */ static rose_inline -hwlmcb_rv_t roseHandleMatch(const struct RoseEngine *t, char *state, - ReportID id, u64a end, struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); +hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, + ReportID id, u64a end, char is_exhaustible) { + assert(end == scratch->tctxt.minMatchOffset); + DEBUG_PRINTF("firing callback id=%u, end=%llu\n", id, end); + updateLastMatchOffset(&scratch->tctxt, end); - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - assert(end == tctxt->minMatchOffset); - DEBUG_PRINTF("firing callback reportId=%u, end=%llu\n", id, end); - updateLastMatchOffset(tctxt, end); - - int cb_rv = tctxt->cb(end, id, scratch); + int cb_rv = roseDeliverReport(end, id, scratch, is_exhaustible); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; } - if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return HWLM_CONTINUE_MATCHING; } @@ -613,76 +605,38 @@ hwlmcb_rv_t roseCatchUpAndHandleChainMatch(const struct RoseEngine *t, return roseHandleChainMatch(t, r, end, tctxt, in_anchored, 0); } -static rose_inline -hwlmcb_rv_t roseSomCatchup(const struct RoseEngine *t, char *state, u64a end, - struct RoseContext *tctxt, char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - - // In SOM processing, we may be able to limit or entirely avoid catchup. - - DEBUG_PRINTF("entry\n"); - - if (end == tctxt->minMatchOffset) { - DEBUG_PRINTF("already caught up\n"); - return HWLM_CONTINUE_MATCHING; - } - - DEBUG_PRINTF("catching up all NFAs\n"); - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - updateMinMatchOffset(tctxt, end); - return HWLM_CONTINUE_MATCHING; -} - static really_inline -hwlmcb_rv_t roseHandleSom(const struct RoseEngine *t, char *state, ReportID id, - u64a end, struct RoseContext *tctxt, - char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); - +void roseHandleSom(const struct RoseEngine *t, struct hs_scratch *scratch, + ReportID id, u64a end) { DEBUG_PRINTF("id=%u, end=%llu, minMatchOffset=%llu\n", id, end, - tctxt->minMatchOffset); + scratch->tctxt.minMatchOffset); // Reach into reports and handle internal reports that just manipulate SOM // slots ourselves, rather than going through the callback. - if (roseSomCatchup(t, state, end, tctxt, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } + assert(end == scratch->tctxt.minMatchOffset); + DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end); + updateLastMatchOffset(&scratch->tctxt, end); const struct internal_report *ri = getInternalReport(t, id); handleSomInternal(scratch, ri, end); - - return HWLM_CONTINUE_MATCHING; } static rose_inline -hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state, - ReportID id, u64a start, u64a end, - struct RoseContext *tctxt, char in_anchored) { - struct hs_scratch *scratch = tctxtToScratch(tctxt); +hwlmcb_rv_t roseReportSom(const struct RoseEngine *t, + struct hs_scratch *scratch, ReportID id, u64a start, + u64a end, char is_exhaustible) { + assert(end == scratch->tctxt.minMatchOffset); + DEBUG_PRINTF("firing som callback id=%u, end=%llu\n", id, end); + updateLastMatchOffset(&scratch->tctxt, end); - if (roseCatchUpTo(t, state, end, scratch, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - - DEBUG_PRINTF("firing som callback reportId=%u, start=%llu end=%llu\n", id, - start, end); - DEBUG_PRINTF(" last match %llu\n", tctxt->lastMatchOffset); - assert(end == tctxt->minMatchOffset); - - updateLastMatchOffset(tctxt, end); - int cb_rv = tctxt->cb_som(start, end, id, scratch); + int cb_rv = roseDeliverSomReport(start, end, id, scratch, is_exhaustible); if (cb_rv == MO_HALT_MATCHING) { DEBUG_PRINTF("termination requested\n"); return HWLM_TERMINATE_MATCHING; } - if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + if (!is_exhaustible || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return HWLM_CONTINUE_MATCHING; } @@ -690,23 +644,19 @@ hwlmcb_rv_t roseHandleSomMatch(const struct RoseEngine *t, char *state, } static rose_inline -hwlmcb_rv_t roseHandleSomSom(const struct RoseEngine *t, char *state, - ReportID id, u64a start, u64a end, - struct RoseContext *tctxt, char in_anchored) { +void roseHandleSomSom(const struct RoseEngine *t, ReportID id, u64a start, + u64a end, struct hs_scratch *scratch) { DEBUG_PRINTF("id=%u, start=%llu, end=%llu, minMatchOffset=%llu\n", - id, start, end, tctxt->minMatchOffset); + id, start, end, scratch->tctxt.minMatchOffset); // Reach into reports and handle internal reports that just manipulate SOM // slots ourselves, rather than going through the callback. - if (roseSomCatchup(t, state, end, tctxt, in_anchored) - == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } + assert(end == scratch->tctxt.minMatchOffset); + updateLastMatchOffset(&scratch->tctxt, end); const struct internal_report *ri = getInternalReport(t, id); - setSomFromSomAware(tctxtToScratch(tctxt), ri, start, end); - return HWLM_CONTINUE_MATCHING; + setSomFromSomAware(scratch, ri, start, end); } static really_inline @@ -848,14 +798,11 @@ u64a roseGetHaigSom(const struct RoseEngine *t, const u32 qi, } static rose_inline -char roseCheckRootBounds(u64a end, u32 min_bound, u32 max_bound) { - assert(max_bound <= ROSE_BOUND_INF); +char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) { + DEBUG_PRINTF("check offset=%llu against bounds [%llu,%llu]\n", end, + min_bound, max_bound); assert(min_bound <= max_bound); - - if (end < min_bound) { - return 0; - } - return max_bound == ROSE_BOUND_INF || end <= max_bound; + return end >= min_bound && end <= max_bound; } @@ -956,9 +903,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(CHECK_BOUNDS) { - if (!in_anchored && - !roseCheckRootBounds(end, ri->min_bound, ri->max_bound)) { - DEBUG_PRINTF("failed root bounds check\n"); + if (!roseCheckBounds(end, ri->min_bound, ri->max_bound)) { + DEBUG_PRINTF("failed bounds check\n"); assert(ri->fail_jump); // must progress pc += ri->fail_jump; continue; @@ -1003,6 +949,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP) { + if (roseCatchUpTo(t, scratch->core_info.state, end, scratch, + in_anchored) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { assert(ri->distance <= end); som = end - ri->distance; @@ -1016,6 +970,20 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_FROM_REPORT) { + const struct internal_report *ir = + getInternalReport(t, ri->report); + som = handleSomExternal(scratch, ir, end); + DEBUG_PRINTF("som from report %u is %llu\n", ri->report, som); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ZERO) { + DEBUG_PRINTF("setting SOM to zero\n"); + som = 0; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(TRIGGER_INFIX) { roseTriggerInfix(t, som, end, ri->queue, ri->event, ri->cancel, tctxt); @@ -1033,13 +1001,40 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT) { - if (roseHandleMatch(t, scratch->core_info.state, - ri->report, end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { + PROGRAM_CASE(DEDUPE) { + const struct internal_report *ir = + getInternalReport(t, ri->report); + const char do_som = t->hasSom; // FIXME: constant propagate + enum DedupeResult rv = dedupeCatchup( + t, ir, scratch, end, som, end + ir->offsetAdjust, do_som); + switch (rv) { + case DEDUPE_HALT: return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + case DEDUPE_CONTINUE: + break; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_SOM) { + const struct internal_report *ir = + getInternalReport(t, ri->report); + enum DedupeResult rv = dedupeCatchupSom( + t, ir, scratch, end, som, end + ir->offsetAdjust); + switch (rv) { + case DEDUPE_HALT: + return HWLM_TERMINATE_MATCHING; + case DEDUPE_SKIP: + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + case DEDUPE_CONTINUE: + break; } - work_done = 1; } PROGRAM_NEXT_INSTRUCTION @@ -1053,18 +1048,32 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_EOD) { - if (tctxt->cb(end, ri->report, scratch) == MO_HALT_MATCHING) { + PROGRAM_CASE(REPORT_SOM_INT) { + roseHandleSom(t, scratch, ri->report, end); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_AWARE) { + roseHandleSomSom(t, ri->report, som, end, scratch); + work_done = 1; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + const char is_exhaustible = 0; + if (roseReport(t, scratch, ri->report, end, is_exhaustible) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_SOM_INT) { - if (roseHandleSom(t, scratch->core_info.state, ri->report, - end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { + PROGRAM_CASE(REPORT_EXHAUST) { + const char is_exhaustible = 1; + if (roseReport(t, scratch, ri->report, end, is_exhaustible) == + HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; @@ -1072,25 +1081,57 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, u32 programOffset, PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM) { - if (roseHandleSomSom(t, scratch->core_info.state, - ri->report, som, end, tctxt, - in_anchored) == HWLM_TERMINATE_MATCHING) { + const char is_exhaustible = 0; + if (roseReportSom(t, scratch, ri->report, som, end, + is_exhaustible) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_SOM_KNOWN) { - if (roseHandleSomMatch(t, scratch->core_info.state, ri->report, - som, end, tctxt, in_anchored) == - HWLM_TERMINATE_MATCHING) { + PROGRAM_CASE(REPORT_SOM_EXHAUST) { + const char is_exhaustible = 1; + if (roseReportSom(t, scratch, ri->report, som, end, + is_exhaustible) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } work_done = 1; } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_EXHAUSTED) { + DEBUG_PRINTF("check ekey %u\n", ri->ekey); + assert(ri->ekey != INVALID_EKEY); + assert(ri->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(evec, ri->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ri->ekey); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MIN_LENGTH) { + DEBUG_PRINTF("check min length %llu (adj %d)\n", ri->min_length, + ri->end_adj); + assert(ri->min_length > 0); + assert(ri->end_adj == 0 || ri->end_adj == -1); + assert(som == HS_OFFSET_PAST_HORIZON || som <= end); + if (som != HS_OFFSET_PAST_HORIZON && + ((end + ri->end_adj) - som < ri->min_length)) { + DEBUG_PRINTF("failed check, match len %llu\n", + (u64a)((end + ri->end_adj) - som)); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + continue; + } + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SET_STATE) { DEBUG_PRINTF("set state index %u\n", ri->index); mmbit_set(getRoleState(scratch->core_info.state), diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index a042eb2c..c7c0891a 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -141,25 +141,42 @@ struct left_build_info { vector lookaround; // alternative implementation to the NFA }; +/** + * \brief Possible jump targets for roles that perform checks. + * + * Fixed up into offsets before the program is written to bytecode. + */ +enum class JumpTarget { + NO_JUMP, //!< Instruction does not jump. + PROGRAM_END, //!< Jump to end of program. + NEXT_BLOCK, //!< Jump to start of next block (sparse iter check, etc). + FIXUP_DONE, //!< Target fixup already applied. +}; + /** \brief Role instruction model used at compile time. */ class RoseInstruction { public: - RoseInstruction() { - memset(&u, 0, sizeof(u)); - u.end.code = ROSE_INSTR_END; - } - - explicit RoseInstruction(enum RoseInstructionCode c) { + RoseInstruction(enum RoseInstructionCode c, JumpTarget j) : target(j) { memset(&u, 0, sizeof(u)); u.end.code = c; } + explicit RoseInstruction(enum RoseInstructionCode c) + : RoseInstruction(c, JumpTarget::NO_JUMP) {} + bool operator<(const RoseInstruction &a) const { + if (code() != a.code()) { + return code() < a.code(); + } + if (target != a.target) { + return target < a.target; + } return memcmp(&u, &a.u, sizeof(u)) < 0; } bool operator==(const RoseInstruction &a) const { - return memcmp(&u, &a.u, sizeof(u)) == 0; + return code() == a.code() && target == a.target && + memcmp(&u, &a.u, sizeof(u)) == 0; } enum RoseInstructionCode code() const { @@ -180,16 +197,24 @@ public: case ROSE_INSTR_CHECK_LEFTFIX: return &u.checkLeftfix; case ROSE_INSTR_ANCHORED_DELAY: return &u.anchoredDelay; case ROSE_INSTR_PUSH_DELAYED: return &u.pushDelayed; + case ROSE_INSTR_CATCH_UP: return &u.catchUp; case ROSE_INSTR_SOM_ADJUST: return &u.somAdjust; case ROSE_INSTR_SOM_LEFTFIX: return &u.somLeftfix; + case ROSE_INSTR_SOM_FROM_REPORT: return &u.somFromReport; + case ROSE_INSTR_SOM_ZERO: return &u.somZero; case ROSE_INSTR_TRIGGER_INFIX: return &u.triggerInfix; case ROSE_INSTR_TRIGGER_SUFFIX: return &u.triggerSuffix; - case ROSE_INSTR_REPORT: return &u.report; + case ROSE_INSTR_DEDUPE: return &u.dedupe; + case ROSE_INSTR_DEDUPE_SOM: return &u.dedupeSom; case ROSE_INSTR_REPORT_CHAIN: return &u.reportChain; - case ROSE_INSTR_REPORT_EOD: return &u.reportEod; case ROSE_INSTR_REPORT_SOM_INT: return &u.reportSomInt; + case ROSE_INSTR_REPORT_SOM_AWARE: return &u.reportSom; + case ROSE_INSTR_REPORT: return &u.report; + case ROSE_INSTR_REPORT_EXHAUST: return &u.reportExhaust; case ROSE_INSTR_REPORT_SOM: return &u.reportSom; - case ROSE_INSTR_REPORT_SOM_KNOWN: return &u.reportSomKnown; + case ROSE_INSTR_REPORT_SOM_EXHAUST: return &u.reportSomExhaust; + case ROSE_INSTR_CHECK_EXHAUSTED: return &u.checkExhausted; + case ROSE_INSTR_CHECK_MIN_LENGTH: return &u.checkMinLength; case ROSE_INSTR_SET_STATE: return &u.setState; case ROSE_INSTR_SET_GROUPS: return &u.setGroups; case ROSE_INSTR_SQUASH_GROUPS: return &u.squashGroups; @@ -214,16 +239,24 @@ public: case ROSE_INSTR_CHECK_LEFTFIX: return sizeof(u.checkLeftfix); case ROSE_INSTR_ANCHORED_DELAY: return sizeof(u.anchoredDelay); case ROSE_INSTR_PUSH_DELAYED: return sizeof(u.pushDelayed); + case ROSE_INSTR_CATCH_UP: return sizeof(u.catchUp); case ROSE_INSTR_SOM_ADJUST: return sizeof(u.somAdjust); case ROSE_INSTR_SOM_LEFTFIX: return sizeof(u.somLeftfix); + case ROSE_INSTR_SOM_FROM_REPORT: return sizeof(u.somFromReport); + case ROSE_INSTR_SOM_ZERO: return sizeof(u.somZero); case ROSE_INSTR_TRIGGER_INFIX: return sizeof(u.triggerInfix); case ROSE_INSTR_TRIGGER_SUFFIX: return sizeof(u.triggerSuffix); - case ROSE_INSTR_REPORT: return sizeof(u.report); + case ROSE_INSTR_DEDUPE: return sizeof(u.dedupe); + case ROSE_INSTR_DEDUPE_SOM: return sizeof(u.dedupeSom); case ROSE_INSTR_REPORT_CHAIN: return sizeof(u.reportChain); - case ROSE_INSTR_REPORT_EOD: return sizeof(u.reportEod); case ROSE_INSTR_REPORT_SOM_INT: return sizeof(u.reportSomInt); + case ROSE_INSTR_REPORT_SOM_AWARE: return sizeof(u.reportSom); + case ROSE_INSTR_REPORT: return sizeof(u.report); + case ROSE_INSTR_REPORT_EXHAUST: return sizeof(u.reportExhaust); case ROSE_INSTR_REPORT_SOM: return sizeof(u.reportSom); - case ROSE_INSTR_REPORT_SOM_KNOWN: return sizeof(u.reportSomKnown); + case ROSE_INSTR_REPORT_SOM_EXHAUST: return sizeof(u.reportSomExhaust); + case ROSE_INSTR_CHECK_EXHAUSTED: return sizeof(u.checkExhausted); + case ROSE_INSTR_CHECK_MIN_LENGTH: return sizeof(u.checkMinLength); case ROSE_INSTR_SET_STATE: return sizeof(u.setState); case ROSE_INSTR_SET_GROUPS: return sizeof(u.setGroups); case ROSE_INSTR_SQUASH_GROUPS: return sizeof(u.squashGroups); @@ -232,6 +265,7 @@ public: case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext); case ROSE_INSTR_END: return sizeof(u.end); } + assert(0); return 0; } @@ -246,16 +280,24 @@ public: ROSE_STRUCT_CHECK_LEFTFIX checkLeftfix; ROSE_STRUCT_ANCHORED_DELAY anchoredDelay; ROSE_STRUCT_PUSH_DELAYED pushDelayed; + ROSE_STRUCT_CATCH_UP catchUp; ROSE_STRUCT_SOM_ADJUST somAdjust; ROSE_STRUCT_SOM_LEFTFIX somLeftfix; + ROSE_STRUCT_SOM_FROM_REPORT somFromReport; + ROSE_STRUCT_SOM_ZERO somZero; ROSE_STRUCT_TRIGGER_INFIX triggerInfix; ROSE_STRUCT_TRIGGER_SUFFIX triggerSuffix; - ROSE_STRUCT_REPORT report; + ROSE_STRUCT_DEDUPE dedupe; + ROSE_STRUCT_DEDUPE_SOM dedupeSom; ROSE_STRUCT_REPORT_CHAIN reportChain; - ROSE_STRUCT_REPORT_EOD reportEod; ROSE_STRUCT_REPORT_SOM_INT reportSomInt; + ROSE_STRUCT_REPORT_SOM_AWARE reportSomAware; + ROSE_STRUCT_REPORT report; + ROSE_STRUCT_REPORT_EXHAUST reportExhaust; ROSE_STRUCT_REPORT_SOM reportSom; - ROSE_STRUCT_REPORT_SOM_KNOWN reportSomKnown; + ROSE_STRUCT_REPORT_SOM_EXHAUST reportSomExhaust; + ROSE_STRUCT_CHECK_EXHAUSTED checkExhausted; + ROSE_STRUCT_CHECK_MIN_LENGTH checkMinLength; ROSE_STRUCT_SET_STATE setState; ROSE_STRUCT_SET_GROUPS setGroups; ROSE_STRUCT_SQUASH_GROUPS squashGroups; @@ -264,11 +306,15 @@ public: ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext; ROSE_STRUCT_END end; } u; + + JumpTarget target; }; static size_t hash_value(const RoseInstruction &ri) { size_t val = 0; + boost::hash_combine(val, ri.code()); + boost::hash_combine(val, ri.target); const char *bytes = (const char *)ri.get(); const size_t len = ri.length(); for (size_t i = 0; i < len; i++) { @@ -2619,61 +2665,100 @@ flattenProgram(const vector> &programs) { vector out; vector offsets; // offset of each instruction (bytes) - vector targets; // jump target for each instruction + vector blocks; // track which block we're in + vector block_offsets; // start offsets for each block - DEBUG_PRINTF("%zu programs\n", programs.size()); + DEBUG_PRINTF("%zu program blocks\n", programs.size()); size_t curr_offset = 0; for (const auto &program : programs) { - DEBUG_PRINTF("program with %zu instructions\n", program.size()); + DEBUG_PRINTF("block with %zu instructions\n", program.size()); + block_offsets.push_back(curr_offset); for (const auto &ri : program) { + assert(ri.code() != ROSE_INSTR_END); out.push_back(ri); offsets.push_back(curr_offset); + blocks.push_back(block_offsets.size() - 1); curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } - for (size_t i = 0; i < program.size(); i++) { - targets.push_back(curr_offset); - } } - // Add an END instruction. + // Add a final END instruction, which is its own block. out.emplace_back(ROSE_INSTR_END); + block_offsets.push_back(curr_offset); offsets.push_back(curr_offset); - targets.push_back(curr_offset); - assert(targets.size() == out.size()); assert(offsets.size() == out.size()); for (size_t i = 0; i < out.size(); i++) { auto &ri = out[i]; - switch (ri.code()) { - case ROSE_INSTR_ANCHORED_DELAY: - assert(targets[i] > offsets[i]); // jumps always progress - ri.u.anchoredDelay.done_jump = targets[i] - offsets[i]; + + u32 jump_target = 0; + switch (ri.target) { + case JumpTarget::NO_JUMP: + case JumpTarget::FIXUP_DONE: + continue; // Next instruction. + case JumpTarget::PROGRAM_END: + assert(i != out.size() - 1); + jump_target = offsets.back(); break; - case ROSE_INSTR_CHECK_ONLY_EOD: - assert(targets[i] > offsets[i]); - ri.u.checkOnlyEod.fail_jump = targets[i] - offsets[i]; - break; - case ROSE_INSTR_CHECK_BOUNDS: - assert(targets[i] > offsets[i]); - ri.u.checkBounds.fail_jump = targets[i] - offsets[i]; - break; - case ROSE_INSTR_CHECK_NOT_HANDLED: - assert(targets[i] > offsets[i]); - ri.u.checkNotHandled.fail_jump = targets[i] - offsets[i]; - break; - case ROSE_INSTR_CHECK_LOOKAROUND: - assert(targets[i] > offsets[i]); - ri.u.checkLookaround.fail_jump = targets[i] - offsets[i]; - break; - case ROSE_INSTR_CHECK_LEFTFIX: - assert(targets[i] > offsets[i]); - ri.u.checkLeftfix.fail_jump = targets[i] - offsets[i]; - break; - default: + case JumpTarget::NEXT_BLOCK: + assert(blocks[i] + 1 < block_offsets.size()); + jump_target = block_offsets[blocks[i] + 1]; break; } + + // We currently always make progress and never jump backwards. + assert(jump_target > offsets[i]); + assert(jump_target <= offsets.back()); + u32 jump_val = jump_target - offsets[i]; + + switch (ri.code()) { + case ROSE_INSTR_ANCHORED_DELAY: + ri.u.anchoredDelay.done_jump = jump_val; + break; + case ROSE_INSTR_CHECK_ONLY_EOD: + ri.u.checkOnlyEod.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_BOUNDS: + ri.u.checkBounds.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_NOT_HANDLED: + ri.u.checkNotHandled.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_LOOKAROUND: + ri.u.checkLookaround.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_LEFTFIX: + ri.u.checkLeftfix.fail_jump = jump_val; + break; + case ROSE_INSTR_DEDUPE: + ri.u.dedupe.fail_jump = jump_val; + break; + case ROSE_INSTR_DEDUPE_SOM: + ri.u.dedupeSom.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_EXHAUSTED: + ri.u.checkExhausted.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_MIN_LENGTH: + ri.u.checkMinLength.fail_jump = jump_val; + break; + case ROSE_INSTR_CHECK_STATE: + ri.u.checkState.fail_jump = jump_val; + break; + case ROSE_INSTR_SPARSE_ITER_BEGIN: + ri.u.sparseIterBegin.fail_jump = jump_val; + break; + case ROSE_INSTR_SPARSE_ITER_NEXT: + ri.u.sparseIterNext.fail_jump = jump_val; + break; + default: + assert(0); // Unhandled opcode? + break; + } + + ri.target = JumpTarget::FIXUP_DONE; } return out; @@ -2689,6 +2774,13 @@ u32 writeProgram(build_context &bc, const vector &program) { assert(program.back().code() == ROSE_INSTR_END); assert(program.size() >= 1); + // This program must have been flattened; i.e. all check instructions must + // have their jump offsets set. + assert(all_of(begin(program), end(program), [](const RoseInstruction &ri) { + return ri.target == JumpTarget::NO_JUMP || + ri.target == JumpTarget::FIXUP_DONE; + })); + auto it = bc.program_cache.find(program); if (it != end(bc.program_cache)) { DEBUG_PRINTF("reusing cached program at %u\n", it->second); @@ -2877,7 +2969,8 @@ void makeRoleLookaround(RoseBuildImpl &build, build_context &bc, RoseVertex v, } u32 look_count = verify_u32(look.size()); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LOOKAROUND, + JumpTarget::NEXT_BLOCK); ri.u.checkLookaround.index = look_idx; ri.u.checkLookaround.count = look_count; program.push_back(ri); @@ -2898,7 +2991,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, assert(!build.cc.streaming || build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_LEFTFIX, JumpTarget::NEXT_BLOCK); ri.u.checkLeftfix.queue = lni.queue; ri.u.checkLeftfix.lag = build.g[v].left.lag; ri.u.checkLeftfix.report = build.g[v].left.leftfix_report; @@ -2906,7 +2999,7 @@ void makeRoleCheckLeftfix(RoseBuildImpl &build, build_context &bc, RoseVertex v, } static -void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc, +void makeRoleAnchoredDelay(RoseBuildImpl &build, build_context &bc, RoseVertex v, vector &program) { // Only relevant for roles that can be triggered by the anchored table. if (!build.isAnchored(v)) { @@ -2919,11 +3012,150 @@ void makeRoleAnchoredDelay(RoseBuildImpl &build, UNUSED build_context &bc, return; } - auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY); + auto ri = RoseInstruction(ROSE_INSTR_ANCHORED_DELAY, + JumpTarget::NEXT_BLOCK); ri.u.anchoredDelay.groups = build.g[v].groups; program.push_back(ri); } +static +void makeDedupe(const ReportID id, vector &report_block) { + auto ri = RoseInstruction(ROSE_INSTR_DEDUPE, JumpTarget::NEXT_BLOCK); + ri.u.dedupe.report = id; + report_block.push_back(move(ri)); +} + +static +void makeDedupeSom(const ReportID id, vector &report_block) { + auto ri = RoseInstruction(ROSE_INSTR_DEDUPE_SOM, JumpTarget::NEXT_BLOCK); + ri.u.dedupeSom.report = id; + report_block.push_back(move(ri)); +} + +static +void makeReport(RoseBuildImpl &build, const ReportID id, const bool has_som, + vector &program) { + assert(id < build.rm.numReports()); + const Report &report = build.rm.getReport(id); + + vector report_block; + + // If this report has an exhaustion key, we can check it in the program + // rather than waiting until we're in the callback adaptor. + if (report.ekey != INVALID_EKEY) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_EXHAUSTED, + JumpTarget::NEXT_BLOCK); + ri.u.checkExhausted.ekey = report.ekey; + report_block.push_back(move(ri)); + } + + // Similarly, we can handle min/max offset checks. + if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { + auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, + JumpTarget::NEXT_BLOCK); + ri.u.checkBounds.min_bound = report.minOffset; + ri.u.checkBounds.max_bound = report.maxOffset; + report_block.push_back(move(ri)); + } + + // Catch up -- everything except the INTERNAL_ROSE_CHAIN report needs this. + // TODO: this could be floated in front of all the reports and only done + // once. + if (report.type != INTERNAL_ROSE_CHAIN) { + program.emplace_back(ROSE_INSTR_CATCH_UP); + } + + // External SOM reports need their SOM value calculated. + if (isExternalSomReport(report)) { + auto ri = RoseInstruction(ROSE_INSTR_SOM_FROM_REPORT); + ri.u.somFromReport.report = id; + report_block.push_back(move(ri)); + } + + // Min length constraint. + if (report.minLength > 0) { + assert(build.hasSom); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_MIN_LENGTH, + JumpTarget::NEXT_BLOCK); + ri.u.checkMinLength.end_adj = report.offsetAdjust; + ri.u.checkMinLength.min_length = report.minLength; + report_block.push_back(move(ri)); + } + + if (report.quashSom) { + report_block.emplace_back(ROSE_INSTR_SOM_ZERO); + } + + switch (report.type) { + case EXTERNAL_CALLBACK: + if (!has_som) { + makeDedupe(id, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.emplace_back(ROSE_INSTR_REPORT); + report_block.back().u.report.report = id; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT_EXHAUST); + report_block.back().u.reportExhaust.report = id; + } + } else { // has_som + makeDedupeSom(id, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM); + report_block.back().u.reportSom.report = id; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); + report_block.back().u.reportSomExhaust.report = id; + } + } + break; + case INTERNAL_SOM_LOC_SET: + case INTERNAL_SOM_LOC_SET_IF_UNSET: + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + case INTERNAL_SOM_LOC_COPY: + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + case INTERNAL_SOM_LOC_SET_FROM: + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + if (has_som) { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM_AWARE); + report_block.back().u.reportSomAware.report = id; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM_INT); + report_block.back().u.reportSomInt.report = id; + } + break; + case INTERNAL_ROSE_CHAIN: + report_block.emplace_back(ROSE_INSTR_REPORT_CHAIN); + report_block.back().u.reportChain.report = id; + break; + case EXTERNAL_CALLBACK_SOM_REL: + case EXTERNAL_CALLBACK_SOM_STORED: + case EXTERNAL_CALLBACK_SOM_ABS: + case EXTERNAL_CALLBACK_SOM_REV_NFA: + makeDedupeSom(id, report_block); + if (report.ekey == INVALID_EKEY) { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM); + report_block.back().u.reportSom.report = id; + } else { + report_block.emplace_back(ROSE_INSTR_REPORT_SOM_EXHAUST); + report_block.back().u.reportSomExhaust.report = id; + } + break; + default: + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + assert(!report_block.empty()); + report_block = flattenProgram({report_block}); + assert(report_block.back().code() == ROSE_INSTR_END); + report_block.pop_back(); + insert(&program, program.end(), report_block); +} + static void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, vector &program) { @@ -2947,25 +3179,8 @@ void makeRoleReports(RoseBuildImpl &build, build_context &bc, RoseVertex v, has_som = true; } - // Write program instructions for reports. for (ReportID id : g[v].reports) { - assert(id < build.rm.numReports()); - const Report &ir = build.rm.getReport(id); - if (isInternalSomReport(ir)) { - auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM - : ROSE_INSTR_REPORT_SOM_INT); - ri.u.report.report = id; - program.push_back(ri); - } else if (ir.type == INTERNAL_ROSE_CHAIN) { - auto ri = RoseInstruction(ROSE_INSTR_REPORT_CHAIN); - ri.u.report.report = id; - program.push_back(ri); - } else { - auto ri = RoseInstruction(has_som ? ROSE_INSTR_REPORT_SOM_KNOWN - : ROSE_INSTR_REPORT); - ri.u.report.report = id; - program.push_back(ri); - } + makeReport(build, id, has_som, program); } } @@ -3093,10 +3308,10 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, // Use the minimum literal length. u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); - u32 min_bound = g[e].minBound + lit_length; - u32 max_bound = g[e].maxBound == ROSE_BOUND_INF - ? ROSE_BOUND_INF - : g[e].maxBound + lit_length; + u64a min_bound = g[e].minBound + lit_length; + u64a max_bound = g[e].maxBound == ROSE_BOUND_INF + ? ROSE_BOUND_INF + : g[e].maxBound + lit_length; if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { assert(g[u].max_offset != ROSE_BOUND_INF); @@ -3110,7 +3325,13 @@ void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, assert(max_bound <= ROSE_BOUND_INF); assert(min_bound <= max_bound); - auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS); + // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET + // (max value of a u64a) to represent ROSE_BOUND_INF. + if (max_bound == ROSE_BOUND_INF) { + max_bound = MAX_OFFSET; + } + + auto ri = RoseInstruction(ROSE_INSTR_CHECK_BOUNDS, JumpTarget::NEXT_BLOCK); ri.u.checkBounds.min_bound = min_bound; ri.u.checkBounds.max_bound = max_bound; @@ -3138,7 +3359,8 @@ vector makeProgram(RoseBuildImpl &build, build_context &bc, if (onlyAtEod(build, v)) { DEBUG_PRINTF("only at eod\n"); - program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD)); + program.push_back(RoseInstruction(ROSE_INSTR_CHECK_ONLY_EOD, + JumpTarget::NEXT_BLOCK)); } if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { @@ -3287,7 +3509,8 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, static void makeRoleCheckNotHandled(build_context &bc, RoseVertex v, vector &program) { - auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED); + auto ri = RoseInstruction(ROSE_INSTR_CHECK_NOT_HANDLED, + JumpTarget::NEXT_BLOCK); u32 handled_key; if (contains(bc.handledKeys, v)) { @@ -3328,48 +3551,42 @@ vector makePredProgram(RoseBuildImpl &build, build_context &bc, static u32 addPredBlocksSingle( map>> &predProgramLists, - u32 curr_offset, vector &program) { - assert(predProgramLists.size() == 1); + vector &program) { - u32 pred_state = predProgramLists.begin()->first; - auto subprog = flattenProgram(predProgramLists.begin()->second); + vector> prog_blocks; - // Check our pred state. - auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE); - ri.u.checkState.index = pred_state; - program.push_back(ri); - curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + for (const auto &m : predProgramLists) { + const u32 &pred_state = m.first; + auto subprog = flattenProgram(m.second); - // Add subprogram. - for (const auto &ri : subprog) { - program.push_back(ri); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } - - const u32 end_offset = - curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); - - // Fix up the instruction operands. - curr_offset = 0; - for (size_t i = 0; i < program.size(); i++) { - auto &ri = program[i]; - switch (ri.code()) { - case ROSE_INSTR_CHECK_STATE: - ri.u.checkState.fail_jump = end_offset - curr_offset; - break; - default: - break; - } - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + // Check our pred state. + auto ri = RoseInstruction(ROSE_INSTR_CHECK_STATE, + JumpTarget::NEXT_BLOCK); + ri.u.checkState.index = pred_state; + subprog.insert(begin(subprog), ri); + assert(subprog.back().code() == ROSE_INSTR_END); + subprog.pop_back(); + prog_blocks.push_back(move(subprog)); } + auto prog = flattenProgram(prog_blocks); + program.insert(end(program), begin(prog), end(prog)); return 0; // No iterator. } +static +u32 programLength(const vector &program) { + u32 len = 0; + for (const auto &ri : program) { + len += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); + } + return len; +} + static u32 addPredBlocksMulti(build_context &bc, map>> &predProgramLists, - u32 curr_offset, vector &program) { + vector &program) { assert(!predProgramLists.empty()); // First, add the iterator itself. @@ -3386,10 +3603,12 @@ u32 addPredBlocksMulti(build_context &bc, // Construct our program, starting with the SPARSE_ITER_BEGIN // instruction, keeping track of the jump offset for each sub-program. + vector sparse_program; vector jump_table; - program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN)); - curr_offset += ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + sparse_program.push_back(RoseInstruction(ROSE_INSTR_SPARSE_ITER_BEGIN, + JumpTarget::PROGRAM_END)); + u32 curr_offset = programLength(program) + programLength(sparse_program); for (const auto &e : predProgramLists) { DEBUG_PRINTF("subprogram %zu has offset %u\n", jump_table.size(), @@ -3402,62 +3621,61 @@ u32 addPredBlocksMulti(build_context &bc, // with a SPARSE_ITER_NEXT. assert(!subprog.empty()); assert(subprog.back().code() == ROSE_INSTR_END); - subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT); + subprog.back() = RoseInstruction(ROSE_INSTR_SPARSE_ITER_NEXT, + JumpTarget::PROGRAM_END); } - for (const auto &ri : subprog) { - program.push_back(ri); - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); - } + curr_offset += programLength(subprog); + insert(&sparse_program, end(sparse_program), subprog); } - const u32 end_offset = - curr_offset - ROUNDUP_N(program.back().length(), ROSE_INSTR_MIN_ALIGN); + // Strip the END instruction from the last block. + assert(sparse_program.back().code() == ROSE_INSTR_END); + sparse_program.pop_back(); + + sparse_program = flattenProgram({sparse_program}); // Write the jump table into the bytecode. const u32 jump_table_offset = add_to_engine_blob(bc, begin(jump_table), end(jump_table)); - // Fix up the instruction operands. + // Write jump table and iterator offset into sparse iter instructions. auto keys_it = begin(keys); - curr_offset = 0; - for (size_t i = 0; i < program.size(); i++) { - auto &ri = program[i]; + for (auto &ri : sparse_program) { switch (ri.code()) { case ROSE_INSTR_SPARSE_ITER_BEGIN: ri.u.sparseIterBegin.iter_offset = iter_offset; ri.u.sparseIterBegin.jump_table = jump_table_offset; - ri.u.sparseIterBegin.fail_jump = end_offset - curr_offset; break; case ROSE_INSTR_SPARSE_ITER_NEXT: ri.u.sparseIterNext.iter_offset = iter_offset; ri.u.sparseIterNext.jump_table = jump_table_offset; assert(keys_it != end(keys)); ri.u.sparseIterNext.state = *keys_it++; - ri.u.sparseIterNext.fail_jump = end_offset - curr_offset; break; default: break; } - curr_offset += ROUNDUP_N(ri.length(), ROSE_INSTR_MIN_ALIGN); } + program.insert(end(program), begin(sparse_program), end(sparse_program)); + return iter_offset; } static u32 addPredBlocks(build_context &bc, map>> &predProgramLists, - u32 curr_offset, vector &program, + vector &program, bool force_sparse_iter) { const size_t num_preds = predProgramLists.size(); if (num_preds == 0) { program = flattenProgram({program}); return 0; // No iterator. } else if (!force_sparse_iter && num_preds == 1) { - return addPredBlocksSingle(predProgramLists, curr_offset, program); + return addPredBlocksSingle(predProgramLists, program); } else { - return addPredBlocksMulti(bc, predProgramLists, curr_offset, program); + return addPredBlocksMulti(bc, predProgramLists, program); } } @@ -3481,8 +3699,7 @@ pair makeSparseIterProgram(build_context &bc, // Add blocks to deal with non-root edges (triggered by sparse iterator or // mmbit_isset checks). This operation will flatten the program up to this // point. - u32 iter_offset = - addPredBlocks(bc, predProgramLists, curr_offset, program, false); + u32 iter_offset = addPredBlocks(bc, predProgramLists, program, false); // If we have a root program, replace the END instruction with it. Note // that the root program has already been flattened. @@ -3823,10 +4040,8 @@ vector makeEodAnchorProgram(RoseBuildImpl &build, makeRoleCheckNotHandled(bc, v, program); } - for (const auto &report : g[v].reports) { - auto ri = RoseInstruction(ROSE_INSTR_REPORT_EOD); - ri.u.report.report = report; - program.push_back(ri); + for (const auto &id : g[v].reports) { + makeReport(build, id, false, program); } return program; @@ -3870,7 +4085,7 @@ pair buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) { // Note: we force the use of a sparse iterator for the EOD program so we // can easily guard EOD execution at runtime. - u32 iter_offset = addPredBlocks(bc, predProgramLists, 0, program, true); + u32 iter_offset = addPredBlocks(bc, predProgramLists, program, true); assert(program.size() > 1); return {writeProgram(bc, program), iter_offset}; diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 25ec7bae..adf73726 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -41,6 +41,7 @@ #include "nfa/nfa_dump_api.h" #include "nfa/nfa_internal.h" #include "util/dump_charclass.h" +#include "util/internal_report.h" #include "util/multibit_internal.h" #include "util/multibit.h" @@ -152,6 +153,61 @@ void dumpLookaround(ofstream &os, const RoseEngine *t, } } +static +vector sparseIterValues(const mmbit_sparse_iter *it, u32 num_bits) { + vector keys; + + if (num_bits == 0) { + return keys; + } + + vector bits(mmbit_size(num_bits), u8{0xff}); // All bits on. + vector state(MAX_SPARSE_ITER_STATES); + + const u8 *b = bits.data(); + mmbit_sparse_state *s = state.data(); + + u32 idx = 0; + u32 i = mmbit_sparse_iter_begin(b, num_bits, &idx, it, s); + while (i != MMB_INVALID) { + keys.push_back(i); + i = mmbit_sparse_iter_next(b, num_bits, i, &idx, it, s); + } + + return keys; +} + +static +void dumpJumpTable(ofstream &os, const RoseEngine *t, + const ROSE_STRUCT_SPARSE_ITER_BEGIN *ri) { + auto *it = + (const mmbit_sparse_iter *)loadFromByteCodeOffset(t, ri->iter_offset); + auto *jumps = (const u32 *)loadFromByteCodeOffset(t, ri->jump_table); + + for (const auto &key : sparseIterValues(it, t->rolesWithStateCount)) { + os << " " << std::setw(4) << std::setfill(' ') << key << " : +" + << *jumps << endl; + ++jumps; + } +} + +static +void dumpReport(ofstream &os, const RoseEngine *t, ReportID report) { + const auto *ir = + (const internal_report *)loadFromByteCodeOffset(t, t->intReportOffset) + + report; + os << " type=" << u32{ir->type}; + os << ", onmatch=" << ir->onmatch; + if (ir->ekey != INVALID_EKEY) { + os << ", ekey=" << ir->ekey; + } + if (ir->dkey != MO_INVALID_IDX) { + os << ", dkey=" << ir->dkey; + } + + os << endl; +} + static string dumpStrMask(const u8 *mask, size_t len) { ostringstream oss; @@ -211,6 +267,13 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_BOUNDS) { + os << " min_bound " << ri->min_bound << endl; + os << " max_bound " << ri->max_bound << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CHECK_NOT_HANDLED) { os << " key " << ri->key << endl; os << " fail_jump +" << ri->fail_jump << endl; @@ -239,6 +302,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(CATCH_UP) {} + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_ADJUST) { os << " distance " << ri->distance << endl; } @@ -250,6 +316,15 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(SOM_FROM_REPORT) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(SOM_ZERO) {} + PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(TRIGGER_INFIX) { os << " queue " << ri->queue << endl; os << " event " << ri->event << endl; @@ -263,33 +338,72 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT) { + PROGRAM_CASE(DEDUPE) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(DEDUPE_SOM) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_CHAIN) { os << " report " << ri->report << endl; - } - PROGRAM_NEXT_INSTRUCTION - - PROGRAM_CASE(REPORT_EOD) { - os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM_INT) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_SOM_AWARE) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(REPORT_EXHAUST) { + os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); } PROGRAM_NEXT_INSTRUCTION PROGRAM_CASE(REPORT_SOM) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); } PROGRAM_NEXT_INSTRUCTION - PROGRAM_CASE(REPORT_SOM_KNOWN) { + PROGRAM_CASE(REPORT_SOM_EXHAUST) { os << " report " << ri->report << endl; + dumpReport(os, t, ri->report); + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_EXHAUSTED) { + os << " ekey " << ri->ekey << endl; + os << " fail_jump +" << ri->fail_jump << endl; + } + PROGRAM_NEXT_INSTRUCTION + + PROGRAM_CASE(CHECK_MIN_LENGTH) { + os << " end_adj " << ri->end_adj << endl; + os << " min_length " << ri->min_length << endl; + os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION @@ -319,6 +433,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { PROGRAM_CASE(SPARSE_ITER_BEGIN) { os << " iter_offset " << ri->iter_offset << endl; os << " jump_table " << ri->jump_table << endl; + dumpJumpTable(os, t, ri); os << " fail_jump +" << ri->fail_jump << endl; } PROGRAM_NEXT_INSTRUCTION diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 37017ca0..81852f09 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -51,16 +51,33 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. ROSE_INSTR_CHECK_LEFTFIX, //!< Leftfix must be in accept state. ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. + ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. + ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from an internal_report. + ROSE_INSTR_SOM_ZERO, //!< Set SOM to zero. ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. - ROSE_INSTR_REPORT, //!< Fire an ordinary report. + ROSE_INSTR_DEDUPE, //!< Run deduplication for report. + ROSE_INSTR_DEDUPE_SOM, //!< Run deduplication for SOM report. ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). - ROSE_INSTR_REPORT_EOD, //!< Fire a callback at EOD time. ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. - ROSE_INSTR_REPORT_SOM, //!< Manipulate SOM and report. - ROSE_INSTR_REPORT_SOM_KNOWN, //!< Rose role knows its SOM offset. + ROSE_INSTR_REPORT_SOM_AWARE, //!< Manipulate SOM from SOM-aware source. + + /** \brief Fire a report. */ + ROSE_INSTR_REPORT, + + /** \brief Fire an exhaustible report. */ + ROSE_INSTR_REPORT_EXHAUST, + + /** \brief Fire a SOM report. */ + ROSE_INSTR_REPORT_SOM, + + /** \brief Fire an exhaustible SOM report. */ + ROSE_INSTR_REPORT_SOM_EXHAUST, + + ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set. + ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length. ROSE_INSTR_SET_STATE, //!< Switch a state index on. ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups. @@ -106,8 +123,8 @@ struct ROSE_STRUCT_CHECK_ONLY_EOD { struct ROSE_STRUCT_CHECK_BOUNDS { u8 code; //!< From enum RoseInstructionCode. - u32 min_bound; //!< Min distance from zero. - u32 max_bound; //!< Max distance from zero (or ROSE_BOUND_INF). + u64a min_bound; //!< Min distance from zero. + u64a max_bound; //!< Max distance from zero. u32 fail_jump; //!< Jump forward this many bytes on failure. }; @@ -138,6 +155,10 @@ struct ROSE_STRUCT_PUSH_DELAYED { u32 index; // Delay literal index (relative to first delay lit). }; +struct ROSE_STRUCT_CATCH_UP { + u8 code; //!< From enum RoseInstructionCode. +}; + struct ROSE_STRUCT_SOM_ADJUST { u8 code; //!< From enum RoseInstructionCode. u32 distance; //!< Distance to EOM. @@ -149,6 +170,15 @@ struct ROSE_STRUCT_SOM_LEFTFIX { u32 lag; //!< Lag of leftfix for this case. }; +struct ROSE_STRUCT_SOM_FROM_REPORT { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; //!< EXTERNAL_CALLBACK_SOM_* report to use. +}; + +struct ROSE_STRUCT_SOM_ZERO { + u8 code; //!< From enum RoseInstructionCode. +}; + struct ROSE_STRUCT_TRIGGER_INFIX { u8 code; //!< From enum RoseInstructionCode. u8 cancel; //!< Cancels previous top event. @@ -162,9 +192,16 @@ struct ROSE_STRUCT_TRIGGER_SUFFIX { u32 event; //!< Queue event, from MQE_*. }; -struct ROSE_STRUCT_REPORT { +struct ROSE_STRUCT_DEDUPE { u8 code; //!< From enum RoseInstructionCode. ReportID report; + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_DEDUPE_SOM { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; + u32 fail_jump; //!< Jump forward this many bytes on failure. }; struct ROSE_STRUCT_REPORT_CHAIN { @@ -172,26 +209,54 @@ struct ROSE_STRUCT_REPORT_CHAIN { ReportID report; }; -struct ROSE_STRUCT_REPORT_EOD { - u8 code; //!< From enum RoseInstructionCode. - ReportID report; -}; - struct ROSE_STRUCT_REPORT_SOM_INT { u8 code; //!< From enum RoseInstructionCode. ReportID report; }; +struct ROSE_STRUCT_REPORT_SOM_AWARE { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; +}; + +struct ROSE_STRUCT_REPORT { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; +}; + +struct ROSE_STRUCT_REPORT_EXHAUST { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; +}; + struct ROSE_STRUCT_REPORT_SOM { u8 code; //!< From enum RoseInstructionCode. ReportID report; }; -struct ROSE_STRUCT_REPORT_SOM_KNOWN { +struct ROSE_STRUCT_REPORT_SOM_EXHAUST { u8 code; //!< From enum RoseInstructionCode. ReportID report; }; +struct ROSE_STRUCT_REPORT_SOM_EXT { + u8 code; //!< From enum RoseInstructionCode. + ReportID report; +}; + +struct ROSE_STRUCT_CHECK_EXHAUSTED { + u8 code; //!< From enum RoseInstructionCode. + u32 ekey; //!< Exhaustion key to check. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MIN_LENGTH { + u8 code; //!< From enum RoseInstructionCode. + s32 end_adj; //!< Offset adjustment to add to EOM first. + u64a min_length; //!< Minimum distance from SOM to EOM. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_SET_STATE { u8 code; //!< From enum RoseInstructionCode. u32 index; //!< State index in multibit. diff --git a/src/runtime.c b/src/runtime.c index d51db18b..e38434fd 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -47,6 +47,7 @@ #include "rose/rose.h" #include "rose/runtime.h" #include "database.h" +#include "report.h" #include "scratch.h" #include "som/som_runtime.h" #include "som/som_stream.h" @@ -56,8 +57,6 @@ #include "util/fatbit.h" #include "util/multibit.h" -#define DEDUPE_MATCHES - static really_inline void prefetch_data(const char *data, unsigned length) { __builtin_prefetch(data); @@ -170,306 +169,6 @@ void setBroken(char *state, u8 broken) { ts->broken = broken; } -static really_inline -int roseAdaptor_i(u64a offset, ReportID id, struct hs_scratch *scratch, - char is_simple, char do_som) { - assert(id != MO_INVALID_IDX); // Should never get an invalid ID. - assert(scratch); - assert(scratch->magic == SCRATCH_MAGIC); - - struct core_info *ci = &scratch->core_info; - const struct RoseEngine *rose = ci->rose; - DEBUG_PRINTF("internal report %u\n", id); - const struct internal_report *ri = getInternalReport(rose, id); - - assert(isExternalReport(ri)); /* only external reports should reach here */ - - s32 offset_adj = ri->offsetAdjust; - UNUSED u32 dkey = ri->dkey; - u64a to_offset = offset; - u64a from_offset = 0; - UNUSED u32 dkeyCount = rose->dkeyCount; - - u32 flags = 0; -#ifndef RELEASE_BUILD - if (offset_adj) { - // alert testing tools that we've got adjusted matches - flags |= HS_MATCH_FLAG_ADJUSTED; - } -#endif - - DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", offset, id, ri->type, ri->onmatch, - offset_adj); - - if (unlikely(can_stop_matching(scratch))) { /* ok - we are from rose */ - DEBUG_PRINTF("pre broken - halting\n"); - return MO_HALT_MATCHING; - } - - if (!is_simple && ri->hasBounds) { - assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); - assert(ri->minOffset <= ri->maxOffset); - if (offset < ri->minOffset || offset > ri->maxOffset) { - DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", - offset, ri->minOffset, ri->maxOffset); - return ROSE_CONTINUE_MATCHING_NO_EXHAUST; - } - } - - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { - DEBUG_PRINTF("ate exhausted match\n"); - return MO_CONTINUE_MATCHING; - } - - if (ri->type == EXTERNAL_CALLBACK) { - from_offset = 0; - } else if (do_som) { - from_offset = handleSomExternal(scratch, ri, to_offset); - } - - to_offset += offset_adj; - assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - - if (do_som && ri->minLength) { - if (from_offset != HS_OFFSET_PAST_HORIZON && - (to_offset - from_offset < ri->minLength)) { - return ROSE_CONTINUE_MATCHING_NO_EXHAUST; - } - if (ri->quashSom) { - from_offset = 0; - } - } - - DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ri->onmatch, ci->userContext); - - int halt = 0; - - if (do_som || dkey != MO_INVALID_IDX) { - if (offset != scratch->deduper.current_report_offset) { - assert(scratch->deduper.current_report_offset == ~0ULL || - scratch->deduper.current_report_offset < offset); - if (offset == scratch->deduper.current_report_offset + 1) { - fatbit_clear(scratch->deduper.log[offset % 2]); - } else { - fatbit_clear(scratch->deduper.log[0]); - fatbit_clear(scratch->deduper.log[1]); - } - - DEBUG_PRINTF("adj dedupe offset %hhd\n", do_som); - if (do_som) { - halt = flushStoredSomMatches(scratch, offset); - if (halt) { - goto exit; - } - } - scratch->deduper.current_report_offset = offset; - } - } - -#ifdef DEDUPE_MATCHES - if (dkey != MO_INVALID_IDX) { - if (ri->type == EXTERNAL_CALLBACK || ri->quashSom) { - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(offset_adj == 0 || offset_adj == -1); - if (fatbit_set(scratch->deduper.log[to_offset % 2], dkeyCount, - dkey)) { - /* we have already raised this report at this offset, squash dupe - * match. */ - DEBUG_PRINTF("dedupe\n"); - goto exit; - } - } else if (do_som) { - /* SOM external event */ - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(offset_adj == 0 || offset_adj == -1); - u64a *starts = scratch->deduper.som_start_log[to_offset % 2]; - if (fatbit_set(scratch->deduper.som_log[to_offset % 2], dkeyCount, - dkey)) { - starts[dkey] = MIN(starts[dkey], from_offset); - } else { - starts[dkey] = from_offset; - } - - if (offset_adj) { - scratch->deduper.som_log_dirty |= 1; - } else { - scratch->deduper.som_log_dirty |= 2; - } - - goto exit; - } - } -#endif - - halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, - flags, ci->userContext); -#ifdef DEDUPE_MATCHES -exit: -#endif - if (halt) { - DEBUG_PRINTF("callback requested to terminate matches\n"); - - setBroken(ci->state, BROKEN_FROM_USER); - ci->broken = BROKEN_FROM_USER; - - return MO_HALT_MATCHING; - } - - if (!is_simple && ri->ekey != END_EXHAUST) { - markAsMatched(ci->exhaustionVector, ri->ekey); - return MO_CONTINUE_MATCHING; - } else { - return ROSE_CONTINUE_MATCHING_NO_EXHAUST; - } -} - -static really_inline -int roseSomAdaptor_i(u64a from_offset, u64a to_offset, ReportID id, - struct hs_scratch *scratch, char is_simple) { - assert(id != MO_INVALID_IDX); // Should never get an invalid ID. - assert(scratch); - assert(scratch->magic == SCRATCH_MAGIC); - - u32 flags = 0; - - struct core_info *ci = &scratch->core_info; - const struct RoseEngine *rose = ci->rose; - const struct internal_report *ri = getInternalReport(rose, id); - - /* internal events should be handled by rose directly */ - assert(ri->type == EXTERNAL_CALLBACK); - - DEBUG_PRINTF("internal match at %llu: IID=%u type=%hhu RID=%u " - "offsetAdj=%d\n", to_offset, id, ri->type, ri->onmatch, - ri->offsetAdjust); - - if (unlikely(can_stop_matching(scratch))) { - DEBUG_PRINTF("pre broken - halting\n"); - return MO_HALT_MATCHING; - } - - if (!is_simple && ri->hasBounds) { - assert(ri->minOffset || ri->minLength || ri->maxOffset < MAX_OFFSET); - if (to_offset < ri->minOffset || to_offset > ri->maxOffset) { - DEBUG_PRINTF("match fell outside valid range %llu !: [%llu,%llu]\n", - to_offset, ri->minOffset, ri->maxOffset); - return MO_CONTINUE_MATCHING; - } - } - - int halt = 0; - - if (!is_simple && unlikely(isExhausted(ci->exhaustionVector, ri->ekey))) { - DEBUG_PRINTF("ate exhausted match\n"); - goto do_return; - } - -#ifdef DEDUPE_MATCHES - u64a offset = to_offset; -#endif - - to_offset += ri->offsetAdjust; - assert(from_offset == HS_OFFSET_PAST_HORIZON || from_offset <= to_offset); - - if (!is_simple && ri->minLength) { - if (from_offset != HS_OFFSET_PAST_HORIZON && - (to_offset - from_offset < ri->minLength)) { - return MO_CONTINUE_MATCHING; - } - if (ri->quashSom) { - from_offset = 0; - } - } - - DEBUG_PRINTF(">> reporting match @[%llu,%llu] for sig %u ctxt %p <<\n", - from_offset, to_offset, ri->onmatch, ci->userContext); - -#ifndef RELEASE_BUILD - if (ri->offsetAdjust != 0) { - // alert testing tools that we've got adjusted matches - flags |= HS_MATCH_FLAG_ADJUSTED; - } -#endif - -#ifdef DEDUPE_MATCHES - u32 dkeyCount = rose->dkeyCount; - - if (offset != scratch->deduper.current_report_offset) { - - assert(scratch->deduper.current_report_offset == ~0ULL - || scratch->deduper.current_report_offset < offset); - if (offset == scratch->deduper.current_report_offset + 1) { - fatbit_clear(scratch->deduper.log[offset % 2]); - } else { - fatbit_clear(scratch->deduper.log[0]); - fatbit_clear(scratch->deduper.log[1]); - } - - halt = flushStoredSomMatches(scratch, offset); - if (halt) { - goto do_return; - } - - scratch->deduper.current_report_offset = offset; - } - - u32 dkey = ri->dkey; - if (dkey != MO_INVALID_IDX) { - if (ri->quashSom) { - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1); - if (fatbit_set(scratch->deduper.log[to_offset % 2], dkeyCount, - dkey)) { - /* we have already raised this report at this offset, squash - * dupe match. */ - DEBUG_PRINTF("dedupe\n"); - goto do_return; - } - } else { - /* SOM external event */ - DEBUG_PRINTF("checking dkey %u at offset %llu\n", dkey, to_offset); - assert(ri->offsetAdjust == 0 || ri->offsetAdjust == -1); - u64a *starts = scratch->deduper.som_start_log[to_offset % 2]; - if (fatbit_set(scratch->deduper.som_log[to_offset % 2], dkeyCount, - dkey)) { - starts[dkey] = MIN(starts[dkey], from_offset); - } else { - starts[dkey] = from_offset; - } - - if (ri->offsetAdjust) { - scratch->deduper.som_log_dirty |= 1; - } else { - scratch->deduper.som_log_dirty |= 2; - } - - goto do_return; - } - } -#endif - - halt = ci->userCallback((unsigned int)ri->onmatch, from_offset, to_offset, - flags, ci->userContext); - - if (!is_simple) { - markAsMatched(ci->exhaustionVector, ri->ekey); - } - -do_return: - if (halt) { - DEBUG_PRINTF("callback requested to terminate matches\n"); - - setBroken(ci->state, BROKEN_FROM_USER); - ci->broken = BROKEN_FROM_USER; - - return MO_HALT_MATCHING; - } - - return MO_CONTINUE_MATCHING; -} - static really_inline hwlmcb_rv_t multiDirectAdaptor(u64a real_end, ReportID direct_id, void *context, struct core_info *ci, char is_simple, @@ -1055,8 +754,7 @@ hs_error_t hs_open_stream(const hs_database_t *db, UNUSED unsigned flags, static really_inline void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { const struct RoseEngine *rose = id->rose; - char *state = getMultiState(id); - u8 broken = getBroken(state); + u8 broken = scratch->core_info.broken; if (broken) { DEBUG_PRINTF("stream already broken\n"); @@ -1076,8 +774,7 @@ void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) { static never_inline void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) { const struct RoseEngine *t = id->rose; - char *state = getMultiState(id); - u8 broken = getBroken(state); + u8 broken = scratch->core_info.broken; if (broken) { DEBUG_PRINTF("stream already broken\n"); @@ -1372,9 +1069,10 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, if (!id->offset && rose->boundary.reportZeroOffset) { DEBUG_PRINTF("zero reports\n"); processReportList(rose, rose->boundary.reportZeroOffset, 0, scratch); - broken = getBroken(state); + broken = scratch->core_info.broken; if (unlikely(broken)) { DEBUG_PRINTF("stream is broken, halting scan\n"); + setBroken(state, broken); if (broken == BROKEN_FROM_USER) { return HS_SCAN_TERMINATED; } else { @@ -1400,7 +1098,6 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, if (rose->hasSom && !told_to_stop_matching(scratch)) { int halt = flushStoredSomMatches(scratch, ~0ULL); if (halt) { - setBroken(state, BROKEN_FROM_USER); scratch->core_info.broken = BROKEN_FROM_USER; } } @@ -1413,6 +1110,7 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, storeSomToStream(scratch, id->offset); } } else if (told_to_stop_matching(scratch)) { + setBroken(state, BROKEN_FROM_USER); return HS_SCAN_TERMINATED; } else { /* exhausted */ setBroken(state, BROKEN_EXHAUSTED); diff --git a/src/som/som_runtime.c b/src/som/som_runtime.c index 23f2b282..418fcbab 100644 --- a/src/som/som_runtime.c +++ b/src/som/som_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -487,6 +487,7 @@ int clearSomLog(struct hs_scratch *scratch, u64a offset, struct fatbit *log, int halt = ci->userCallback(onmatch, from_offset, offset, flags, ci->userContext); if (halt) { + ci->broken = BROKEN_FROM_USER; return 1; } } diff --git a/src/util/report.h b/src/util/report.h index 0e5bccf8..c4f3bd8c 100644 --- a/src/util/report.h +++ b/src/util/report.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -184,6 +184,11 @@ bool isExternalReport(const Report &r) { return true; } +static inline +bool isExternalSomReport(const Report &r) { + return r.type != EXTERNAL_CALLBACK && isExternalReport(r); +} + static inline bool operator<(const Report &a, const Report &b) { ORDER_CHECK(type);