mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
roseCheckNfaEod: use sparse iterator for EOD
Rather than checking all active outfix/suffix engines, use a sparse iterator to check only those engines that accept at EOD.
This commit is contained in:
parent
04dfed2602
commit
b2a76e6e2b
@ -125,33 +125,44 @@ int roseEodRunIterator(const struct RoseEngine *t, u64a offset,
|
|||||||
return MO_CONTINUE_MATCHING;
|
return MO_CONTINUE_MATCHING;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Check for (and deliver) reports from active output-exposed (suffix
|
||||||
|
* or outfix) NFAs.
|
||||||
|
*/
|
||||||
static rose_inline
|
static rose_inline
|
||||||
void roseCheckNfaEod(const struct RoseEngine *t, u8 *state,
|
void roseCheckNfaEod(const struct RoseEngine *t, u8 *state,
|
||||||
struct hs_scratch *scratch, u64a offset,
|
struct hs_scratch *scratch, u64a offset,
|
||||||
const char is_streaming) {
|
const char is_streaming) {
|
||||||
|
if (!t->eodNfaIterOffset) {
|
||||||
|
DEBUG_PRINTF("no engines that report at EOD\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* data, len is used for state decompress, should be full available data */
|
/* data, len is used for state decompress, should be full available data */
|
||||||
const u8 *aa = getActiveLeafArray(t, state);
|
|
||||||
const u32 aaCount = t->activeArrayCount;
|
|
||||||
|
|
||||||
u8 key = 0;
|
u8 key = 0;
|
||||||
|
|
||||||
if (is_streaming) {
|
if (is_streaming) {
|
||||||
const u8 *eod_data = scratch->core_info.hbuf;
|
const u8 *eod_data = scratch->core_info.hbuf;
|
||||||
size_t eod_len = scratch->core_info.hlen;
|
size_t eod_len = scratch->core_info.hlen;
|
||||||
key = eod_len ? eod_data[eod_len - 1] : 0;
|
key = eod_len ? eod_data[eod_len - 1] : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
|
const u8 *aa = getActiveLeafArray(t, state);
|
||||||
qi = mmbit_iterate(aa, aaCount, qi)) {
|
const u32 aaCount = t->activeArrayCount;
|
||||||
|
|
||||||
|
const struct mmbit_sparse_iter *it = getByOffset(t, t->eodNfaIterOffset);
|
||||||
|
assert(ISALIGNED(it));
|
||||||
|
|
||||||
|
u32 idx = 0;
|
||||||
|
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
|
||||||
|
|
||||||
|
for (u32 qi = mmbit_sparse_iter_begin(aa, aaCount, &idx, it, si_state);
|
||||||
|
qi != MMB_INVALID;
|
||||||
|
qi = mmbit_sparse_iter_next(aa, aaCount, qi, &idx, it, si_state)) {
|
||||||
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
|
||||||
const struct NFA *nfa = getNfaByInfo(t, info);
|
const struct NFA *nfa = getNfaByInfo(t, info);
|
||||||
|
|
||||||
if (!nfaAcceptsEod(nfa)) {
|
|
||||||
DEBUG_PRINTF("nfa %u does not accept eod\n", qi);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG_PRINTF("checking nfa %u\n", qi);
|
DEBUG_PRINTF("checking nfa %u\n", qi);
|
||||||
|
assert(nfaAcceptsEod(nfa));
|
||||||
|
|
||||||
char *fstate = scratch->fullState + info->fullStateOffset;
|
char *fstate = scratch->fullState + info->fullStateOffset;
|
||||||
const char *sstate = (const char *)state + info->stateOffset;
|
const char *sstate = (const char *)state + info->stateOffset;
|
||||||
|
@ -2294,6 +2294,29 @@ void buildSuffixEkeyLists(const RoseBuildImpl &tbi, build_context &bc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns sparse iter offset in engine blob. */
|
||||||
|
static
|
||||||
|
u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) {
|
||||||
|
vector<u32> keys;
|
||||||
|
for (u32 qi = 0; qi < activeQueueCount; ++qi) {
|
||||||
|
const NFA *n = get_nfa_from_blob(bc, qi);
|
||||||
|
if (nfaAcceptsEod(n)) {
|
||||||
|
DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi);
|
||||||
|
keys.push_back(qi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keys.empty()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUG_PRINTF("building iter for %zu nfas\n", keys.size());
|
||||||
|
|
||||||
|
vector<mmbit_sparse_iter> iter;
|
||||||
|
mmbBuildSparseIterator(iter, keys, activeQueueCount);
|
||||||
|
return addIteratorToTable(bc, iter);
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
bool hasMpvTrigger(const set<u32> &reports, const ReportManager &rm) {
|
bool hasMpvTrigger(const set<u32> &reports, const ReportManager &rm) {
|
||||||
for (u32 r : reports) {
|
for (u32 r : reports) {
|
||||||
@ -3802,6 +3825,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
&leftfixBeginQueue)) {
|
&leftfixBeginQueue)) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
u32 eodNfaIterOffset = buildEodNfaIterator(bc, leftfixBeginQueue);
|
||||||
buildCountingMiracles(*this, bc);
|
buildCountingMiracles(*this, bc);
|
||||||
|
|
||||||
u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q;
|
u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q;
|
||||||
@ -4054,6 +4078,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
engine->eodProgramOffset = eodProgramOffset;
|
engine->eodProgramOffset = eodProgramOffset;
|
||||||
engine->eodIterProgramOffset = eodIterProgramOffset;
|
engine->eodIterProgramOffset = eodIterProgramOffset;
|
||||||
engine->eodIterOffset = eodIterOffset;
|
engine->eodIterOffset = eodIterOffset;
|
||||||
|
engine->eodNfaIterOffset = eodNfaIterOffset;
|
||||||
|
|
||||||
engine->lastByteHistoryIterOffset = lastByteOffset;
|
engine->lastByteHistoryIterOffset = lastByteOffset;
|
||||||
|
|
||||||
|
@ -861,6 +861,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
|||||||
DUMP_U32(t, eodProgramOffset);
|
DUMP_U32(t, eodProgramOffset);
|
||||||
DUMP_U32(t, eodIterProgramOffset);
|
DUMP_U32(t, eodIterProgramOffset);
|
||||||
DUMP_U32(t, eodIterOffset);
|
DUMP_U32(t, eodIterOffset);
|
||||||
|
DUMP_U32(t, eodNfaIterOffset);
|
||||||
DUMP_U32(t, lastByteHistoryIterOffset);
|
DUMP_U32(t, lastByteHistoryIterOffset);
|
||||||
DUMP_U32(t, minWidth);
|
DUMP_U32(t, minWidth);
|
||||||
DUMP_U32(t, minWidthExcludingBoundaries);
|
DUMP_U32(t, minWidthExcludingBoundaries);
|
||||||
|
@ -428,6 +428,9 @@ struct RoseEngine {
|
|||||||
u32 eodIterProgramOffset; // or 0 if no eod iterator program
|
u32 eodIterProgramOffset; // or 0 if no eod iterator program
|
||||||
u32 eodIterOffset; // offset to EOD sparse iter or 0 if none
|
u32 eodIterOffset; // offset to EOD sparse iter or 0 if none
|
||||||
|
|
||||||
|
/** \brief Offset to sparse iter over outfix/suffix NFAs that accept EOD. */
|
||||||
|
u32 eodNfaIterOffset;
|
||||||
|
|
||||||
u32 lastByteHistoryIterOffset; // if non-zero
|
u32 lastByteHistoryIterOffset; // if non-zero
|
||||||
|
|
||||||
/** \brief Minimum number of bytes required to match. */
|
/** \brief Minimum number of bytes required to match. */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user