eod: move engine checks into ENGINES_EOD instr

This commit is contained in:
Justin Viiret 2016-06-09 14:41:15 +10:00 committed by Matthew Barr
parent 7e3d56579b
commit 7a6a476723
6 changed files with 141 additions and 101 deletions

View File

@ -122,65 +122,6 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
return MO_CONTINUE_MATCHING;
}
/**
* \brief Check for (and deliver) reports from active output-exposed (suffix
* or outfix) NFAs.
*
* \return MO_HALT_MATCHING if the user instructs us to stop.
*/
static rose_inline
int roseCheckNfaEod(const struct RoseEngine *t, struct hs_scratch *scratch,
u64a offset, const char is_streaming) {
if (!t->eodNfaIterOffset) {
DEBUG_PRINTF("no engines that report at EOD\n");
return MO_CONTINUE_MATCHING;
}
/* data, len is used for state decompress, should be full available data */
u8 key = 0;
if (is_streaming) {
const u8 *eod_data = scratch->core_info.hbuf;
size_t eod_len = scratch->core_info.hlen;
key = eod_len ? eod_data[eod_len - 1] : 0;
}
const u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
const u32 aaCount = t->activeArrayCount;
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodNfaIterOffset);
assert(ISALIGNED(it));
u32 idx = 0;
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state);
qi != MMB_INVALID;
qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) {
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
const struct NFA *nfa = getNfaByInfo(t, info);
DEBUG_PRINTF("checking nfa %u\n", qi);
assert(nfaAcceptsEod(nfa));
char *fstate = scratch->fullState + info->fullStateOffset;
const char *sstate = scratch->core_info.state + info->stateOffset;
if (is_streaming) {
// Decompress stream state.
nfaExpandState(nfa, fstate, sstate, offset, key);
}
if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor,
roseReportSomAdaptor,
scratch) == MO_HALT_MATCHING) {
DEBUG_PRINTF("user instructed us to stop\n");
return MO_HALT_MATCHING;
}
}
return MO_CONTINUE_MATCHING;
}
static rose_inline
void cleanupAfterEodMatcher(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch) {
@ -269,10 +210,6 @@ void roseEodExec_i(const struct RoseEngine *t, u64a offset,
return;
}
if (roseCheckNfaEod(t, scratch, offset, is_streaming) == MO_HALT_MATCHING) {
return;
}
if (!t->eodIterProgramOffset && !t->ematcherOffset) {
DEBUG_PRINTF("no eod accepts\n");
return;

View File

@ -800,6 +800,57 @@ char roseCheckBounds(u64a end, u64a min_bound, u64a max_bound) {
return end >= min_bound && end <= max_bound;
}
static rose_inline
hwlmcb_rv_t roseEnginesEod(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a offset,
u32 iter_offset) {
const char is_streaming = rose->mode != HS_MODE_BLOCK;
/* data, len is used for state decompress, should be full available data */
u8 key = 0;
if (is_streaming) {
const u8 *eod_data = scratch->core_info.hbuf;
size_t eod_len = scratch->core_info.hlen;
key = eod_len ? eod_data[eod_len - 1] : 0;
}
const u8 *aa = getActiveLeafArray(rose, scratch->core_info.state);
const u32 aaCount = rose->activeArrayCount;
const struct mmbit_sparse_iter *it = getByOffset(rose, iter_offset);
assert(ISALIGNED(it));
u32 idx = 0;
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state);
qi != MMB_INVALID;
qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) {
const struct NfaInfo *info = getNfaInfoByQueue(rose, qi);
const struct NFA *nfa = getNfaByInfo(rose, info);
DEBUG_PRINTF("checking nfa %u\n", qi);
assert(nfaAcceptsEod(nfa));
char *fstate = scratch->fullState + info->fullStateOffset;
const char *sstate = scratch->core_info.state + info->stateOffset;
if (is_streaming) {
// Decompress stream state.
nfaExpandState(nfa, fstate, sstate, offset, key);
}
if (nfaCheckFinalState(nfa, fstate, sstate, offset, roseReportAdaptor,
roseReportSomAdaptor,
scratch) == MO_HALT_MATCHING) {
DEBUG_PRINTF("user instructed us to stop\n");
return HWLM_TERMINATE_MATCHING;
}
}
return HWLM_CONTINUE_MATCHING;
}
static
void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
const char from_mpv) {
@ -1301,6 +1352,14 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t,
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(ENGINES_EOD) {
if (roseEnginesEod(t, scratch, end, ri->iter_offset) ==
HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(END) {
DEBUG_PRINTF("finished\n");
return HWLM_CONTINUE_MATCHING;

View File

@ -223,6 +223,7 @@ public:
case ROSE_INSTR_CHECK_STATE: return &u.checkState;
case ROSE_INSTR_SPARSE_ITER_BEGIN: return &u.sparseIterBegin;
case ROSE_INSTR_SPARSE_ITER_NEXT: return &u.sparseIterNext;
case ROSE_INSTR_ENGINES_EOD: return &u.enginesEod;
case ROSE_INSTR_END: return &u.end;
}
assert(0);
@ -269,6 +270,7 @@ public:
case ROSE_INSTR_CHECK_STATE: return sizeof(u.checkState);
case ROSE_INSTR_SPARSE_ITER_BEGIN: return sizeof(u.sparseIterBegin);
case ROSE_INSTR_SPARSE_ITER_NEXT: return sizeof(u.sparseIterNext);
case ROSE_INSTR_ENGINES_EOD: return sizeof(u.enginesEod);
case ROSE_INSTR_END: return sizeof(u.end);
}
assert(0);
@ -314,6 +316,7 @@ public:
ROSE_STRUCT_CHECK_STATE checkState;
ROSE_STRUCT_SPARSE_ITER_BEGIN sparseIterBegin;
ROSE_STRUCT_SPARSE_ITER_NEXT sparseIterNext;
ROSE_STRUCT_ENGINES_EOD enginesEod;
ROSE_STRUCT_END end;
} u;
@ -3532,7 +3535,7 @@ u32 addPredBlocks(build_context &bc,
* Returns the pair (program offset, sparse iter offset).
*/
static
pair<u32, u32> makeSparseIterProgram(build_context &bc,
vector<RoseInstruction> makeSparseIterProgram(build_context &bc,
map<u32, vector<vector<RoseInstruction>>> &predProgramLists,
const vector<RoseInstruction> &root_program,
const vector<RoseInstruction> &pre_program) {
@ -3548,7 +3551,7 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
// Add blocks to deal with non-root edges (triggered by sparse iterator or
// mmbit_isset checks). This operation will flatten the program up to this
// point.
u32 iter_offset = addPredBlocks(bc, predProgramLists, program, false);
addPredBlocks(bc, predProgramLists, program, false);
// If we have a root program, replace the END instruction with it. Note
// that the root program has already been flattened.
@ -3559,8 +3562,7 @@ pair<u32, u32> makeSparseIterProgram(build_context &bc,
program.insert(end(program), begin(root_program), end(root_program));
}
applyFinalSpecialisation(program);
return {writeProgram(bc, program), iter_offset};
return program;
}
static
@ -3778,8 +3780,9 @@ vector<RoseInstruction> buildLitInitialProgram(RoseBuildImpl &build,
}
static
u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
const vector<RoseEdge> &lit_edges) {
vector<RoseInstruction> buildLiteralProgram(RoseBuildImpl &build,
build_context &bc, u32 final_id,
const vector<RoseEdge> &lit_edges) {
const auto &g = build.g;
DEBUG_PRINTF("final id %u, %zu lit edges\n", final_id, lit_edges.size());
@ -3831,7 +3834,19 @@ u32 buildLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
// Put it all together.
return makeSparseIterProgram(bc, predProgramLists, root_program,
pre_program).first;
pre_program);
}
static
u32 writeLiteralProgram(RoseBuildImpl &build, build_context &bc, u32 final_id,
const vector<RoseEdge> &lit_edges) {
auto program = buildLiteralProgram(build, bc, final_id, lit_edges);
if (program.empty()) {
return 0;
}
// Note: already flattened.
applyFinalSpecialisation(program);
return writeProgram(bc, program);
}
static
@ -3904,7 +3919,7 @@ pair<u32, u32> buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
const auto &lit_edges = lit_edge_map[finalId];
litPrograms[finalId] =
buildLiteralProgram(build, bc, finalId, lit_edges);
writeLiteralProgram(build, bc, finalId, lit_edges);
delayRebuildPrograms[finalId] =
buildDelayRebuildProgram(build, bc, finalId);
}
@ -4020,33 +4035,53 @@ pair<u32, u32> buildEodAnchorProgram(RoseBuildImpl &build, build_context &bc) {
}
static
u32 writeEodProgram(RoseBuildImpl &build, build_context &bc) {
if (build.eod_event_literal_id == MO_INVALID_IDX) {
u32 writeEodProgram(RoseBuildImpl &build, build_context &bc,
u32 eodNfaIterOffset) {
vector<RoseInstruction> program;
if (build.eod_event_literal_id != MO_INVALID_IDX) {
const RoseGraph &g = build.g;
const auto &lit_info =
build.literal_info.at(build.eod_event_literal_id);
assert(lit_info.delayed_ids.empty());
assert(!lit_info.squash_group);
assert(!lit_info.requires_benefits);
// Collect all edges leading into EOD event literal vertices.
vector<RoseEdge> edge_list;
for (const auto &v : lit_info.vertices) {
for (const auto &e : in_edges_range(v, g)) {
edge_list.push_back(e);
}
}
// Sort edge list for determinism, prettiness.
sort(begin(edge_list), end(edge_list),
[&g](const RoseEdge &a, const RoseEdge &b) {
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
tie(g[source(b, g)].idx, g[target(b, g)].idx);
});
program = buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list);
}
if (eodNfaIterOffset) {
auto ri = RoseInstruction(ROSE_INSTR_ENGINES_EOD);
ri.u.enginesEod.iter_offset = eodNfaIterOffset;
if (!program.empty()) {
assert(program.back().code() == ROSE_INSTR_END);
program.pop_back();
}
program.push_back(move(ri));
program = flattenProgram({program});
}
if (program.empty()) {
return 0;
}
const RoseGraph &g = build.g;
const auto &lit_info = build.literal_info.at(build.eod_event_literal_id);
assert(lit_info.delayed_ids.empty());
assert(!lit_info.squash_group);
assert(!lit_info.requires_benefits);
// Collect all edges leading into EOD event literal vertices.
vector<RoseEdge> edge_list;
for (const auto &v : lit_info.vertices) {
for (const auto &e : in_edges_range(v, g)) {
edge_list.push_back(e);
}
}
// Sort edge list for determinism, prettiness.
sort(begin(edge_list), end(edge_list),
[&g](const RoseEdge &a, const RoseEdge &b) {
return tie(g[source(a, g)].idx, g[target(a, g)].idx) <
tie(g[source(b, g)].idx, g[target(b, g)].idx);
});
return buildLiteralProgram(build, bc, MO_INVALID_IDX, edge_list);
applyFinalSpecialisation(program);
return writeProgram(bc, program);
}
static
@ -4210,7 +4245,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
tie(litProgramOffset, litDelayRebuildProgramOffset) =
buildLiteralPrograms(*this, bc);
u32 eodProgramOffset = writeEodProgram(*this, bc);
u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset);
u32 eodIterProgramOffset;
u32 eodIterOffset;
tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc);
@ -4412,7 +4447,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
engine->eodProgramOffset = eodProgramOffset;
engine->eodIterProgramOffset = eodIterProgramOffset;
engine->eodIterOffset = eodIterOffset;
engine->eodNfaIterOffset = eodNfaIterOffset;
engine->lastByteHistoryIterOffset = lastByteOffset;

View File

@ -476,6 +476,11 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(ENGINES_EOD) {
os << " iter_offset " << ri->iter_offset << endl;
}
PROGRAM_NEXT_INSTRUCTION
PROGRAM_CASE(END) { return; }
PROGRAM_NEXT_INSTRUCTION
@ -1022,7 +1027,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
DUMP_U32(t, eodProgramOffset);
DUMP_U32(t, eodIterProgramOffset);
DUMP_U32(t, eodIterOffset);
DUMP_U32(t, eodNfaIterOffset);
DUMP_U32(t, lastByteHistoryIterOffset);
DUMP_U32(t, minWidth);
DUMP_U32(t, minWidthExcludingBoundaries);

View File

@ -380,9 +380,6 @@ struct RoseEngine {
u32 eodIterProgramOffset; // or 0 if no eod iterator program
u32 eodIterOffset; // offset to EOD sparse iter or 0 if none
/** \brief Offset to sparse iter over outfix/suffix NFAs that accept EOD. */
u32 eodNfaIterOffset;
u32 lastByteHistoryIterOffset; // if non-zero
/** \brief Minimum number of bytes required to match. */

View File

@ -96,6 +96,10 @@ enum RoseInstructionCode {
ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit.
ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states.
ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states.
/** \brief Check outfixes and suffixes for EOD and fire reports if so. */
ROSE_INSTR_ENGINES_EOD,
ROSE_INSTR_END //!< End of program.
};
@ -352,6 +356,11 @@ struct ROSE_STRUCT_SPARSE_ITER_NEXT {
u32 fail_jump; //!< Jump forward this many bytes on failure.
};
struct ROSE_STRUCT_ENGINES_EOD {
u8 code; //!< From enum RoseInstructionCode.
u32 iter_offset; //!< Offset of mmbit_sparse_iter structure.
};
struct ROSE_STRUCT_END {
u8 code; //!< From enum RoseInstructionCode.
};