mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
eod: move hwlm execution into MATCHER_EOD instr
This commit is contained in:
parent
b8f771e824
commit
39461cc806
101
src/rose/eod.c
101
src/rose/eod.c
@ -55,80 +55,6 @@ void initContext(const struct RoseEngine *t, u64a offset,
|
||||
fatbit_clear(scratch->aqa);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseEodRunMatcher(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch,
|
||||
const char is_streaming) {
|
||||
assert(t->ematcherOffset);
|
||||
|
||||
size_t eod_len;
|
||||
const u8 *eod_data;
|
||||
if (!is_streaming) { /* Block */
|
||||
eod_data = scratch->core_info.buf;
|
||||
eod_len = scratch->core_info.len;
|
||||
} else { /* Streaming */
|
||||
eod_len = scratch->core_info.hlen;
|
||||
eod_data = scratch->core_info.hbuf;
|
||||
}
|
||||
|
||||
assert(eod_data);
|
||||
assert(eod_len);
|
||||
|
||||
// If we don't have enough bytes to produce a match from an EOD table scan,
|
||||
// there's no point scanning.
|
||||
if (eod_len < t->eodmatcherMinWidth) {
|
||||
DEBUG_PRINTF("len=%zu < eodmatcherMinWidth=%u\n", eod_len,
|
||||
t->eodmatcherMinWidth);
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
// Ensure that we only need scan the last N bytes, where N is the length of
|
||||
// the eod-anchored matcher region.
|
||||
size_t adj = eod_len - MIN(eod_len, t->ematcherRegionSize);
|
||||
|
||||
DEBUG_PRINTF("eod offset=%llu, eod length=%zu\n", offset, eod_len);
|
||||
|
||||
struct RoseContext *tctxt = &scratch->tctxt;
|
||||
const struct HWLM *etable = getELiteralMatcher(t);
|
||||
|
||||
hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch,
|
||||
tctxt->groups);
|
||||
|
||||
// We may need to fire delayed matches
|
||||
return cleanUpDelayed(t, scratch, 0, offset);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
if (!t->eodIterProgramOffset) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset);
|
||||
|
||||
const u64a som = 0;
|
||||
const size_t match_len = 0;
|
||||
const char in_anchored = 0;
|
||||
const char in_catchup = 0;
|
||||
const char from_mpv = 0;
|
||||
const char skip_mpv_catchup = 1;
|
||||
if (roseRunProgram(t, scratch, t->eodIterProgramOffset, som, offset,
|
||||
match_len, in_anchored, in_catchup,
|
||||
from_mpv, skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void cleanupAfterEodMatcher(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
// Flush history to make sure it's consistent.
|
||||
roseFlushLastByteHistory(t, scratch, offset);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
int roseRunEodProgram(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
@ -158,37 +84,14 @@ int roseRunEodProgram(const struct RoseEngine *t, u64a offset,
|
||||
|
||||
static really_inline
|
||||
void roseEodExec_i(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch, const char is_streaming) {
|
||||
struct hs_scratch *scratch, UNUSED const char is_streaming) {
|
||||
assert(t);
|
||||
assert(scratch->core_info.buf || scratch->core_info.hbuf);
|
||||
assert(!scratch->core_info.buf || !scratch->core_info.hbuf);
|
||||
assert(!can_stop_matching(scratch));
|
||||
|
||||
// Run the unconditional EOD program.
|
||||
if (roseRunEodProgram(t, offset, scratch) == MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Run the EOD anchored matcher if there is one.
|
||||
if (t->ematcherOffset) {
|
||||
assert(t->ematcherRegionSize);
|
||||
// Unset the reports we just fired so we don't fire them again below.
|
||||
char *state = scratch->core_info.state;
|
||||
mmbit_clear(getRoleState(state), t->rolesWithStateCount);
|
||||
mmbit_clear(getActiveLeafArray(t, state), t->activeArrayCount);
|
||||
|
||||
if (roseEodRunMatcher(t, offset, scratch, is_streaming) ==
|
||||
HWLM_TERMINATE_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
cleanupAfterEodMatcher(t, offset, scratch);
|
||||
|
||||
// Fire any new EOD reports.
|
||||
if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
roseRunEodProgram(t, offset, scratch);
|
||||
}
|
||||
|
||||
void roseEodExec(const struct RoseEngine *t, u64a offset,
|
||||
|
@ -46,6 +46,13 @@
|
||||
#include "util/fatbit.h"
|
||||
#include "util/multibit.h"
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
||||
struct hs_scratch *scratch, u32 programOffset,
|
||||
u64a som, u64a end, size_t match_len,
|
||||
char in_anchored, char in_catchup, char from_mpv,
|
||||
char skip_mpv_catchup);
|
||||
|
||||
static rose_inline
|
||||
int roseCheckBenefits(const struct core_info *ci, u64a end, u32 mask_rewind,
|
||||
const u8 *and_mask, const u8 *exp_mask) {
|
||||
@ -893,6 +900,93 @@ hwlmcb_rv_t roseSuffixesEod(const struct RoseEngine *rose,
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
if (!t->eodIterProgramOffset) {
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("running eod program at offset %u\n", t->eodIterProgramOffset);
|
||||
|
||||
const u64a som = 0;
|
||||
const size_t match_len = 0;
|
||||
const char in_anchored = 0;
|
||||
const char in_catchup = 0;
|
||||
const char from_mpv = 0;
|
||||
const char skip_mpv_catchup = 1;
|
||||
if (roseRunProgram(t, scratch, t->eodIterProgramOffset, som, offset,
|
||||
match_len, in_anchored, in_catchup,
|
||||
from_mpv, skip_mpv_catchup) == HWLM_TERMINATE_MATCHING) {
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static
|
||||
hwlmcb_rv_t roseMatcherEod(const struct RoseEngine *rose,
|
||||
struct hs_scratch *scratch, u64a offset) {
|
||||
assert(rose->ematcherOffset);
|
||||
assert(rose->ematcherRegionSize);
|
||||
|
||||
// Clear role state and active engines, since we have already handled all
|
||||
// outstanding work there.
|
||||
DEBUG_PRINTF("clear role state and active leaf array\n");
|
||||
char *state = scratch->core_info.state;
|
||||
mmbit_clear(getRoleState(state), rose->rolesWithStateCount);
|
||||
mmbit_clear(getActiveLeafArray(rose, state), rose->activeArrayCount);
|
||||
|
||||
const char is_streaming = rose->mode != HS_MODE_BLOCK;
|
||||
|
||||
size_t eod_len;
|
||||
const u8 *eod_data;
|
||||
if (!is_streaming) { /* Block */
|
||||
eod_data = scratch->core_info.buf;
|
||||
eod_len = scratch->core_info.len;
|
||||
} else { /* Streaming */
|
||||
eod_len = scratch->core_info.hlen;
|
||||
eod_data = scratch->core_info.hbuf;
|
||||
}
|
||||
|
||||
assert(eod_data);
|
||||
assert(eod_len);
|
||||
|
||||
DEBUG_PRINTF("%zu bytes of eod data to scan at offset %llu\n", eod_len,
|
||||
offset);
|
||||
|
||||
// If we don't have enough bytes to produce a match from an EOD table scan,
|
||||
// there's no point scanning.
|
||||
if (eod_len < rose->eodmatcherMinWidth) {
|
||||
DEBUG_PRINTF("too short for min width %u\n", rose->eodmatcherMinWidth);
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
// Ensure that we only need scan the last N bytes, where N is the length of
|
||||
// the eod-anchored matcher region.
|
||||
size_t adj = eod_len - MIN(eod_len, rose->ematcherRegionSize);
|
||||
|
||||
const struct HWLM *etable = getELiteralMatcher(rose);
|
||||
hwlmExec(etable, eod_data, eod_len, adj, roseCallback, scratch,
|
||||
scratch->tctxt.groups);
|
||||
|
||||
// We may need to fire delayed matches.
|
||||
if (cleanUpDelayed(rose, scratch, 0, offset) == HWLM_TERMINATE_MATCHING) {
|
||||
DEBUG_PRINTF("user instructed us to stop\n");
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
roseFlushLastByteHistory(rose, scratch, offset);
|
||||
|
||||
// Fire any new EOD reports.
|
||||
if (roseEodRunIterator(rose, offset, scratch) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("user instructed us to stop\n");
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static
|
||||
void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
|
||||
const char from_mpv) {
|
||||
@ -1410,6 +1504,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(MATCHER_EOD) {
|
||||
if (roseMatcherEod(t, scratch, end) ==
|
||||
HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(END) {
|
||||
DEBUG_PRINTF("finished\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
|
@ -225,6 +225,7 @@ public:
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
|
||||
case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod;
|
||||
case ROSE_INSTR_SUFFIXES_EOD: return &u.suffixesEod;
|
||||
case ROSE_INSTR_MATCHER_EOD: return &u.matcherEod;
|
||||
case ROSE_INSTR_END: return &u.end;
|
||||
}
|
||||
assert(0);
|
||||
@ -273,6 +274,7 @@ public:
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
|
||||
case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod);
|
||||
case ROSE_INSTR_SUFFIXES_EOD: return sizeof(u.suffixesEod);
|
||||
case ROSE_INSTR_MATCHER_EOD: return sizeof(u.matcherEod);
|
||||
case ROSE_INSTR_END: return sizeof(u.end);
|
||||
}
|
||||
assert(0);
|
||||
@ -320,6 +322,7 @@ public:
|
||||
ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
|
||||
ROSE_STRUCT_ENGINES_EOD enginesEod;
|
||||
ROSE_STRUCT_SUFFIXES_EOD suffixesEod;
|
||||
ROSE_STRUCT_MATCHER_EOD matcherEod;
|
||||
ROSE_STRUCT_END end;
|
||||
} u;
|
||||
|
||||
@ -4000,6 +4003,18 @@ bool hasEodAnchoredSuffix(const RoseBuildImpl &build) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static
|
||||
bool hasEodMatcher(const RoseBuildImpl &build) {
|
||||
const RoseGraph &g = build.g;
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (build.isInETable(v)) {
|
||||
DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].idx);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pair (program offset, sparse iter offset).
|
||||
*/
|
||||
@ -4108,16 +4123,22 @@ void addGeneralEodAnchorProgram(RoseBuildImpl &build, build_context &bc,
|
||||
}
|
||||
}
|
||||
|
||||
if (predProgramLists.empty()) {
|
||||
DEBUG_PRINTF("no eod anchored roles\n");
|
||||
return;
|
||||
if (!predProgramLists.empty()) {
|
||||
if (!program.empty()) {
|
||||
assert(program.back().code() == ROSE_INSTR_END);
|
||||
program.pop_back();
|
||||
}
|
||||
addPredBlocks(bc, predProgramLists, program);
|
||||
}
|
||||
|
||||
if (!program.empty()) {
|
||||
assert(program.back().code() == ROSE_INSTR_END);
|
||||
program.pop_back();
|
||||
if (hasEodMatcher(build)) {
|
||||
if (!program.empty()) {
|
||||
assert(program.back().code() == ROSE_INSTR_END);
|
||||
program.pop_back();
|
||||
}
|
||||
program.emplace_back(ROSE_INSTR_MATCHER_EOD);
|
||||
program.emplace_back(ROSE_INSTR_END);
|
||||
}
|
||||
addPredBlocks(bc, predProgramLists, program);
|
||||
}
|
||||
|
||||
static
|
||||
|
@ -484,6 +484,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
PROGRAM_CASE(SUFFIXES_EOD) {}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(MATCHER_EOD) {}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(END) { return; }
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
|
@ -104,6 +104,9 @@ enum RoseInstructionCode {
|
||||
* so. */
|
||||
ROSE_INSTR_SUFFIXES_EOD,
|
||||
|
||||
/** \brief Run the EOD-anchored HWLM literal matcher. */
|
||||
ROSE_INSTR_MATCHER_EOD,
|
||||
|
||||
ROSE_INSTR_END //!< End of program.
|
||||
};
|
||||
|
||||
@ -369,6 +372,10 @@ struct ROSE_STRUCT_SUFFIXES_EOD {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_MATCHER_EOD {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_END {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user