mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
eod: move engine checks into ENGINES_EOD instr
This commit is contained in:
parent
7e3d56579b
commit
7a6a476723
@ -122,65 +122,6 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Check for (and deliver) reports from active output-exposed (suffix
|
||||
* or outfix) NFAs.
|
||||
*
|
||||
* \return MO_HALT_MATCHING if the user instructs us to stop.
|
||||
*/
|
||||
static rose_inline
|
||||
int roseCheckNfaEod(const struct RoseEngine *t, struct hs_scratch *scratch,
|
||||
u64a offset, const char is_streaming) {
|
||||
if (!t->eodNfaIterOffset) {
|
||||
DEBUG_PRINTF("no engines that report at EOD\n");
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
/* data, len is used for state decompress, should be full available data */
|
||||
u8 key = 0;
|
||||
if (is_streaming) {
|
||||
const u8 *eod_data = scratch->core_info.hbuf;
|
||||
size_t eod_len = scratch->core_info.hlen;
|
||||
key = eod_len ? eod_data[eod_len - 1] : 0;
|
||||
}
|
||||
|
||||
const u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
|
||||
const u32 aaCount = t->activeArrayCount;
|
||||
|
||||
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodNfaIterOffset);
|
||||
assert(ISALIGNED(it));
|
||||
|
||||
u32 idx = 0;
|
||||
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
|
||||
|
||||
for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state);
|
||||
qi != MMB_INVALID;
|
||||
qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) {
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||
const struct NFA *nfa = getNfaByInfo(t, info);
|
||||
|
||||
DEBUG_PRINTF("checking nfa %u\n", qi);
|
||||
assert(nfaAcceptsEod(nfa));
|
||||
|
||||
char *fstate = scratch->fullState + info->fullStateOffset;
|
||||
const char *sstate = scratch->core_info.state + info->stateOffset;
|
||||
|
||||
if (is_streaming) {
|
||||
// Decompress stream state.
|
||||
nfaExpandState(nfa, fstate, sstate, offset, key);
|
||||
}
|
||||
|
||||
if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor,
|
||||
roseReportSomAdaptor,
|
||||
scratch) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("user instructed us to stop\n");
|
||||
return MO_HALT_MATCHING;
|
||||
}
|
||||
}
|
||||
|
||||
return MO_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
void cleanupAfterEodMatcher(const struct RoseEngine *t, u64a offset,
|
||||
struct hs_scratch *scratch) {
|
||||
@ -269,10 +210,6 @@ void roseEodExec_i(const struct RoseEngine *t, u64a offset,
|
||||
return;
|
||||
}
|
||||
|
||||
if (roseCheckNfaEod(t, scratch, offset, is_streaming) == MO_HALT_MATCHING) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!t->eodIterProgramOffset && !t->ematcherOffset) {
|
||||
DEBUG_PRINTF("no eod accepts\n");
|
||||
return;
|
||||
|
@ -800,6 +800,57 @@ char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) {
|
||||
return end >= min_bound && end <= max_bound;
|
||||
}
|
||||
|
||||
static rose_inline
|
||||
hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose,
|
||||
struct hs_scratch *scratch, u64a offset,
|
||||
u32 iter_offset) {
|
||||
const char is_streaming = rose->mode != HS_MODE_BLOCK;
|
||||
|
||||
/* data, len is used for state decompress, should be full available data */
|
||||
u8 key = 0;
|
||||
if (is_streaming) {
|
||||
const u8 *eod_data = scratch->core_info.hbuf;
|
||||
size_t eod_len = scratch->core_info.hlen;
|
||||
key = eod_len ? eod_data[eod_len - 1] : 0;
|
||||
}
|
||||
|
||||
const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state);
|
||||
const u32 aaCount = rose->activeArrayCount;
|
||||
|
||||
const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset);
|
||||
assert(ISALIGNED(it));
|
||||
|
||||
u32 idx = 0;
|
||||
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
|
||||
|
||||
for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state);
|
||||
qi != MMB_INVALID;
|
||||
qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) {
|
||||
const struct NfaInfo *info = getNfaInfoByQueue(rose, qi);
|
||||
const struct NFA *nfa = getNfaByInfo(rose, info);
|
||||
|
||||
DEBUG_PRINTF("checking nfa %u\n", qi);
|
||||
assert(nfaAcceptsEod(nfa));
|
||||
|
||||
char *fstate = scratch->fullState + info->fullStateOffset;
|
||||
const char *sstate = scratch->core_info.state + info->stateOffset;
|
||||
|
||||
if (is_streaming) {
|
||||
// Decompress stream state.
|
||||
nfaExpandState(nfa, fstate, sstate, offset, key);
|
||||
}
|
||||
|
||||
if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor,
|
||||
roseReportSomAdaptor,
|
||||
scratch) == MO_HALT_MATCHING) {
|
||||
DEBUG_PRINTF("user instructed us to stop\n");
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
}
|
||||
|
||||
static
|
||||
void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
|
||||
const char from_mpv) {
|
||||
@ -1301,6 +1352,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(ENGINES_EOD) {
|
||||
if (roseEnginesEod(t, scratch, end, ri->iter_offset) ==
|
||||
HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATE_MATCHING;
|
||||
}
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(END) {
|
||||
DEBUG_PRINTF("finished\n");
|
||||
return HWLM_CONTINUE_MATCHING;
|
||||
|
@ -223,6 +223,7 @@ public:
|
||||
case ROSE_INSTR_CHECK_STATE: return &u.checkState;
|
||||
case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin;
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
|
||||
case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod;
|
||||
case ROSE_INSTR_END: return &u.end;
|
||||
}
|
||||
assert(0);
|
||||
@ -269,6 +270,7 @@ public:
|
||||
case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState);
|
||||
case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin);
|
||||
case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
|
||||
case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod);
|
||||
case ROSE_INSTR_END: return sizeof(u.end);
|
||||
}
|
||||
assert(0);
|
||||
@ -314,6 +316,7 @@ public:
|
||||
ROSE_STRUCT_CHECK_STATE checkState;
|
||||
ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin;
|
||||
ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
|
||||
ROSE_STRUCT_ENGINES_EOD enginesEod;
|
||||
ROSE_STRUCT_END end;
|
||||
} u;
|
||||
|
||||
@ -3532,7 +3535,7 @@ u32 addPredBlocks(build_context &bc,
|
||||
* Returns the pair (program offset, sparse iter offset).
|
||||
*/
|
||||
static
|
||||
pair<u32, u32> makeSparseIterProgram(build_context &bc,
|
||||
vector<RoseInstruction> makeSparseIterProgram(build_context &bc,
|
||||
map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
|
||||
const vector<RoseInstruction> &root_program,
|
||||
const vector<RoseInstruction> &pre_program) {
|
||||
@ -3548,7 +3551,7 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
|
||||
// Add blocks to deal with non-root edges (triggered by sparse iterator or
|
||||
// mmbit_isset checks). This operation will flatten the program up to this
|
||||
// point.
|
||||
u32 iter_offset = addPredBlocks(bc, predProgramLists, program, false);
|
||||
addPredBlocks(bc, predProgramLists, program, false);
|
||||
|
||||
// If we have a root program, replace the END instruction with it. Note
|
||||
// that the root program has already been flattened.
|
||||
@ -3559,8 +3562,7 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
|
||||
program.insert(end(program), begin(root_program), end(root_program));
|
||||
}
|
||||
|
||||
applyFinalSpecialisation(program);
|
||||
return {writeProgram(bc, program), iter_offset};
|
||||
return program;
|
||||
}
|
||||
|
||||
static
|
||||
@ -3778,8 +3780,9 @@ vector<RoseInstruction> buildLitInitialProgram(RoseBuildImpl &build,
|
||||
}
|
||||
|
||||
static
|
||||
u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
|
||||
const vector<RoseEdge> &lit_edges) {
|
||||
vector<RoseInstruction> buildLiteralProgram(RoseBuildImpl &build,
|
||||
build_context &bc, u32 final_id,
|
||||
const vector<RoseEdge> &lit_edges) {
|
||||
const auto &g = build.g;
|
||||
|
||||
DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size());
|
||||
@ -3831,7 +3834,19 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
|
||||
|
||||
// Put it all together.
|
||||
return makeSparseIterProgram(bc, predProgramLists, root_program,
|
||||
pre_program).first;
|
||||
pre_program);
|
||||
}
|
||||
|
||||
static
|
||||
u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
|
||||
const vector<RoseEdge> &lit_edges) {
|
||||
auto program = buildLiteralProgram(build, bc, final_id, lit_edges);
|
||||
if (program.empty()) {
|
||||
return 0;
|
||||
}
|
||||
// Note: already flattened.
|
||||
applyFinalSpecialisation(program);
|
||||
return writeProgram(bc, program);
|
||||
}
|
||||
|
||||
static
|
||||
@ -3904,7 +3919,7 @@ pair<u32, u32> buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
|
||||
const auto &lit_edges = lit_edge_map[finalId];
|
||||
|
||||
litPrograms[finalId] =
|
||||
buildLiteralProgram(build, bc, finalId, lit_edges);
|
||||
writeLiteralProgram(build, bc, finalId, lit_edges);
|
||||
delayRebuildPrograms[finalId] =
|
||||
buildDelayRebuildProgram(build, bc, finalId);
|
||||
}
|
||||
@ -4020,33 +4035,53 @@ pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
|
||||
}
|
||||
|
||||
static
|
||||
u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) {
|
||||
if (build.eod_event_literal_id == MO_INVALID_IDX) {
|
||||
u32 writeEodProgram(RoseBuildImpl &build, build_context &bc,
|
||||
u32 eodNfaIterOffset) {
|
||||
vector<RoseInstruction> program;
|
||||
|
||||
if (build.eod_event_literal_id != MO_INVALID_IDX) {
|
||||
const RoseGraph &g = build.g;
|
||||
const auto &lit_info =
|
||||
build.literal_info.at(build.eod_event_literal_id);
|
||||
assert(lit_info.delayed_ids.empty());
|
||||
assert(!lit_info.squash_group);
|
||||
assert(!lit_info.requires_benefits);
|
||||
|
||||
// Collect all edges leading into EOD event literal vertices.
|
||||
vector<RoseEdge> edge_list;
|
||||
for (const auto &v : lit_info.vertices) {
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
edge_list.push_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort edge list for determinism, prettiness.
|
||||
sort(begin(edge_list), end(edge_list),
|
||||
[&g](const RoseEdge &a, const RoseEdge &b) {
|
||||
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
|
||||
tie(g[source(b, g)].idx, g[target(b, g)].idx);
|
||||
});
|
||||
|
||||
program = buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list);
|
||||
}
|
||||
|
||||
if (eodNfaIterOffset) {
|
||||
auto ri = RoseInstruction(ROSE_INSTR_ENGINES_EOD);
|
||||
ri.u.enginesEod.iter_offset = eodNfaIterOffset;
|
||||
if (!program.empty()) {
|
||||
assert(program.back().code() == ROSE_INSTR_END);
|
||||
program.pop_back();
|
||||
}
|
||||
program.push_back(move(ri));
|
||||
program = flattenProgram({program});
|
||||
}
|
||||
|
||||
if (program.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const RoseGraph &g = build.g;
|
||||
const auto &lit_info = build.literal_info.at(build.eod_event_literal_id);
|
||||
assert(lit_info.delayed_ids.empty());
|
||||
assert(!lit_info.squash_group);
|
||||
assert(!lit_info.requires_benefits);
|
||||
|
||||
// Collect all edges leading into EOD event literal vertices.
|
||||
vector<RoseEdge> edge_list;
|
||||
for (const auto &v : lit_info.vertices) {
|
||||
for (const auto &e : in_edges_range(v, g)) {
|
||||
edge_list.push_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort edge list for determinism, prettiness.
|
||||
sort(begin(edge_list), end(edge_list),
|
||||
[&g](const RoseEdge &a, const RoseEdge &b) {
|
||||
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
|
||||
tie(g[source(b, g)].idx, g[target(b, g)].idx);
|
||||
});
|
||||
|
||||
return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list);
|
||||
applyFinalSpecialisation(program);
|
||||
return writeProgram(bc, program);
|
||||
}
|
||||
|
||||
static
|
||||
@ -4210,7 +4245,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
tie(litProgramOffset, litDelayRebuildProgramOffset) =
|
||||
buildLiteralPrograms(*this, bc);
|
||||
|
||||
u32 eodProgramOffset = writeEodProgram(*this, bc);
|
||||
u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset);
|
||||
u32 eodIterProgramOffset;
|
||||
u32 eodIterOffset;
|
||||
tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc);
|
||||
@ -4412,7 +4447,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
engine->eodProgramOffset = eodProgramOffset;
|
||||
engine->eodIterProgramOffset = eodIterProgramOffset;
|
||||
engine->eodIterOffset = eodIterOffset;
|
||||
engine->eodNfaIterOffset = eodNfaIterOffset;
|
||||
|
||||
engine->lastByteHistoryIterOffset = lastByteOffset;
|
||||
|
||||
|
@ -476,6 +476,11 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(ENGINES_EOD) {
|
||||
os << " iter_offset " << ri->iter_offset << endl;
|
||||
}
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
PROGRAM_CASE(END) { return; }
|
||||
PROGRAM_NEXT_INSTRUCTION
|
||||
|
||||
@ -1022,7 +1027,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, eodProgramOffset);
|
||||
DUMP_U32(t, eodIterProgramOffset);
|
||||
DUMP_U32(t, eodIterOffset);
|
||||
DUMP_U32(t, eodNfaIterOffset);
|
||||
DUMP_U32(t, lastByteHistoryIterOffset);
|
||||
DUMP_U32(t, minWidth);
|
||||
DUMP_U32(t, minWidthExcludingBoundaries);
|
||||
|
@ -380,9 +380,6 @@ struct RoseEngine {
|
||||
u32 eodIterProgramOffset; // or 0 if no eod iterator program
|
||||
u32 eodIterOffset; // offset to EOD sparse iter or 0 if none
|
||||
|
||||
/** \brief Offset to sparse iter over outfix/suffix NFAs that accept EOD. */
|
||||
u32 eodNfaIterOffset;
|
||||
|
||||
u32 lastByteHistoryIterOffset; // if non-zero
|
||||
|
||||
/** \brief Minimum number of bytes required to match. */
|
||||
|
@ -96,6 +96,10 @@ enum RoseInstructionCode {
|
||||
ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit.
|
||||
ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states.
|
||||
ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states.
|
||||
|
||||
/** \brief Check outfixes and suffixes for EOD and fire reports if so. */
|
||||
ROSE_INSTR_ENGINES_EOD,
|
||||
|
||||
ROSE_INSTR_END //!< End of program.
|
||||
};
|
||||
|
||||
@ -352,6 +356,11 @@ struct ROSE_STRUCT_SPARSE_ITER_NEXT {
|
||||
u32 fail_jump; //!< Jump forward this many bytes on failure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_ENGINES_EOD {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
|
||||
};
|
||||
|
||||
struct ROSE_STRUCT_END {
|
||||
u8 code; //!< From enum RoseInstructionCode.
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user