mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
eod: more suffix iteration into program
This commit is contained in:
parent
9669e0fe94
commit
2761e0105d
@ -129,47 +129,6 @@ void cleanupAfterEodMatcher(const struct RoseEngine *t, u64a offset,
|
||||
roseFlushLastByteHistory(t, scratch, offset);
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void roseCheckEodSuffixes(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
const u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
|
||||
const u32 aaCount = t->activeArrayCount;
|
||||
UNUSED u32 qCount = t->queueCount;
|
||||
|
||||
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
|
||||
qi = mmbit_iterate(aa, aaCount, qi)) {
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
const struct NFA *nfa = getNfaByInfo(t, info);
|
||||
|
||||
assert(nfaAcceptsEod(nfa));
|
||||
|
||||
DEBUG_PRINTF("checking nfa %u\n", qi);
|
||||
|
||||
assert(fatbit_isset(scratch->aqa, qCount, qi)); /* we have just been
|
||||
triggered */
|
||||
|
||||
char *fstate = scratch->fullState + info->fullStateOffset;
|
||||
const char *sstate = scratch->core_info.state + info->stateOffset;
|
||||
|
||||
struct mq *q = scratch->queues + qi;
|
||||
|
||||
pushQueueNoMerge(q, MQE_END, scratch->core_info.len);
|
||||
|
||||
q->context = NULL;
|
||||
/* rose exec is used as we don't want to / can't raise matches in the
|
||||
* history buffer. */
|
||||
char rv = nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX);
|
||||
if (rv) { /* nfa is still alive */
|
||||
if (nfaCheckFinalState(nfa, fstate, sstate, offset,
|
||||
roseReportAdaptor, roseReportSomAdaptor,
|
||||
scratch) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("user instructed us to stop\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
int roseRunEodProgram(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
@ -229,8 +188,6 @@ void roseEodExec_i(const struct RoseEngine *t, u64a offset,
|
||||
if (roseEodRunIterator(t, offset, scratch) == MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
roseCheckEodSuffixes(t, offset, scratch);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -851,6 +851,48 @@ hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose,
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseSuffixesEod(const struct RoseEngine *rose,
|
||||
struct hs_scratch *scratch, u64a offset) {
|
||||
const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state);
|
||||
const u32 aaCount = rose->activeArrayCount;
|
||||
|
||||
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
|
||||
qi = mmbit_iterate(aa, aaCount, qi)) {
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(rose, qi);
|
||||
const struct NFA *nfa = getNfaByInfo(rose, info);
|
||||
|
||||
assert(nfaAcceptsEod(nfa));
|
||||
|
||||
DEBUG_PRINTF("checking nfa %u\n", qi);
|
||||
|
||||
/* We have just been triggered. */
|
||||
assert(fatbit_isset(scratch->aqa, rose->queueCount, qi));
|
||||
|
||||
char *fstate = scratch->fullState + info->fullStateOffset;
|
||||
const char *sstate = scratch->core_info.state + info->stateOffset;
|
||||
|
||||
struct mq *q = scratch->queues + qi;
|
||||
|
||||
pushQueueNoMerge(q, MQE_END, scratch->core_info.len);
|
||||
|
||||
q->context = NULL;
|
||||
/* rose exec is used as we don't want to / can't raise matches in the
|
||||
* history buffer. */
|
||||
if (!nfaQueueExecRose(q->nfa, q, MO_INVALID_IDX)) {
|
||||
DEBUG_PRINTF("nfa is dead\n");
|
||||
continue;
|
||||
}
|
||||
if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor,
|
||||
roseReportSomAdaptor,
|
||||
scratch) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("user instructed us to stop\n");
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static
|
||||
void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
|
||||
const char from_mpv) {
|
||||
@ -1360,6 +1402,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SUFFIXES_EOD) {
|
||||
if (roseSuffixesEod(t, scratch, end) ==
|
||||
HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(END) {
|
||||
DEBUG_PRINTF("finished\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
|
@ -224,6 +224,7 @@ public:
|
||||
case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin;
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
|
||||
case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod;
|
||||
case ROSE_INSTR_SUFFIXES_EOD: return &u.suffixesEod;
|
||||
case ROSE_INSTR_END: return &u.end;
|
||||
}
|
||||
assert(0);
|
||||
@ -271,6 +272,7 @@ public:
|
||||
case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin);
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
|
||||
case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod);
|
||||
case ROSE_INSTR_SUFFIXES_EOD: return sizeof(u.suffixesEod);
|
||||
case ROSE_INSTR_END: return sizeof(u.end);
|
||||
}
|
||||
assert(0);
|
||||
@ -317,6 +319,7 @@ public:
|
||||
ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin;
|
||||
ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
|
||||
ROSE_STRUCT_ENGINES_EOD enginesEod;
|
||||
ROSE_STRUCT_SUFFIXES_EOD suffixesEod;
|
||||
ROSE_STRUCT_END end;
|
||||
} u;
|
||||
|
||||
@ -3985,6 +3988,19 @@ vector<RoseInstruction> makeEodAnchorProgram(RoseBuildImpl &build,
|
||||
return program;
|
||||
}
|
||||
|
||||
static
|
||||
bool hasEodAnchoredSuffix(const RoseBuildImpl &build) {
|
||||
const RoseGraph &g = build.g;
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (g[v].suffix && build.isInETable(v)) {
|
||||
DEBUG_PRINTF("vertex %zu is in eod table and has a suffix\n",
|
||||
g[v].idx);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pair (program offset, sparse iter offset).
|
||||
*/
|
||||
@ -4028,13 +4044,24 @@ u32 writeEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
|
||||
}
|
||||
}
|
||||
|
||||
if (predProgramLists.empty()) {
|
||||
DEBUG_PRINTF("no eod anchored roles\n");
|
||||
vector<RoseInstruction> program;
|
||||
if (!predProgramLists.empty()) {
|
||||
addPredBlocks(bc, predProgramLists, program, false);
|
||||
}
|
||||
|
||||
if (hasEodAnchoredSuffix(build)) {
|
||||
if (!program.empty()) {
|
||||
assert(program.back().code() == ROSE_INSTR_END);
|
||||
program.pop_back();
|
||||
}
|
||||
program.emplace_back(ROSE_INSTR_SUFFIXES_EOD);
|
||||
}
|
||||
|
||||
if (program.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
vector<RoseInstruction> program;
|
||||
addPredBlocks(bc, predProgramLists, program, false);
|
||||
program = flattenProgram({program});
|
||||
|
||||
assert(program.size() > 1);
|
||||
applyFinalSpecialisation(program);
|
||||
|
@ -481,6 +481,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(SUFFIXES_EOD) {}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(END) { return; }
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
|
@ -100,6 +100,10 @@ enum RoseInstructionCode {
|
||||
/** \brief Check outfixes and suffixes for EOD and fire reports if so. */
|
||||
ROSE_INSTR_ENGINES_EOD,
|
||||
|
||||
/** \brief Catch up and check active suffixes for EOD and fire reports if
|
||||
* so. */
|
||||
ROSE_INSTR_SUFFIXES_EOD,
|
||||
|
||||
ROSE_INSTR_END //!< End of program.
|
||||
};
|
||||
|
||||
@ -361,6 +365,10 @@ struct ROSE_STRUCT_ENGINES_EOD {
|
||||
u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_SUFFIXES_EOD {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_END {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user