mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: build a separate delay rebuild matcher
This commit is contained in:
parent
5706acf5c0
commit
ac858cd47c
@ -5535,6 +5535,17 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
bc.resources.has_floating = true;
|
||||
}
|
||||
|
||||
// Build delay rebuild HWLM matcher.
|
||||
size_t drsize = 0;
|
||||
auto drtable = buildDelayRebuildMatcher(*this, bc.longLitLengthThreshold,
|
||||
final_to_frag_map, &drsize);
|
||||
u32 drmatcherOffset = 0;
|
||||
if (drtable) {
|
||||
currOffset = ROUNDUP_CL(currOffset);
|
||||
drmatcherOffset = currOffset;
|
||||
currOffset += verify_u32(drsize);
|
||||
}
|
||||
|
||||
// Build EOD-anchored HWLM matcher.
|
||||
size_t esize = 0;
|
||||
auto etable = buildEodAnchoredMatcher(*this, final_to_frag_map, &esize);
|
||||
@ -5632,6 +5643,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
assert(fmatcherOffset);
|
||||
memcpy(ptr + fmatcherOffset, ftable.get(), fsize);
|
||||
}
|
||||
if (drtable) {
|
||||
assert(drmatcherOffset);
|
||||
memcpy(ptr + drmatcherOffset, drtable.get(), drsize);
|
||||
}
|
||||
if (etable) {
|
||||
assert(ematcherOffset);
|
||||
memcpy(ptr + ematcherOffset, etable.get(), esize);
|
||||
@ -5724,6 +5739,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
engine->ematcherOffset = ematcherOffset;
|
||||
engine->sbmatcherOffset = sbmatcherOffset;
|
||||
engine->fmatcherOffset = fmatcherOffset;
|
||||
engine->drmatcherOffset = drmatcherOffset;
|
||||
engine->longLitTableOffset = longLitTableOffset;
|
||||
engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED);
|
||||
engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING);
|
||||
|
@ -507,23 +507,27 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
|
||||
|
||||
const auto final_to_frag_map = groupByFragment(build);
|
||||
|
||||
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED,
|
||||
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED, false,
|
||||
longLitLengthThreshold);
|
||||
dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits);
|
||||
|
||||
mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING,
|
||||
mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false,
|
||||
longLitLengthThreshold);
|
||||
dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits);
|
||||
|
||||
mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED,
|
||||
mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true,
|
||||
longLitLengthThreshold);
|
||||
dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", mp.lits);
|
||||
|
||||
mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, false,
|
||||
build.ematcher_region_size);
|
||||
dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits);
|
||||
|
||||
if (!build.cc.streaming) {
|
||||
mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING,
|
||||
mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false,
|
||||
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
|
||||
auto mp2 = makeMatcherProto(build, final_to_frag_map,
|
||||
ROSE_ANCHORED_SMALL_BLOCK,
|
||||
ROSE_ANCHORED_SMALL_BLOCK, false,
|
||||
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
|
||||
mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits));
|
||||
dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits);
|
||||
|
@ -666,8 +666,8 @@ void trim_to_suffix(Container &c, size_t len) {
|
||||
|
||||
MatcherProto makeMatcherProto(const RoseBuildImpl &build,
|
||||
const map<u32, u32> &final_to_frag_map,
|
||||
rose_literal_table table, size_t max_len,
|
||||
u32 max_offset) {
|
||||
rose_literal_table table, bool delay_rebuild,
|
||||
size_t max_len, u32 max_offset) {
|
||||
MatcherProto mp;
|
||||
|
||||
for (const auto &e : build.literals.right) {
|
||||
@ -694,6 +694,13 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
|
||||
DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(lit).c_str(),
|
||||
lit.length());
|
||||
|
||||
// When building the delay rebuild table, we only want to include
|
||||
// literals that have delayed variants.
|
||||
if (delay_rebuild && info.delayed_ids.empty()) {
|
||||
DEBUG_PRINTF("not needed for delay rebuild\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (max_offset != ROSE_BOUND_INF) {
|
||||
u64a min_report = literalMinReportOffset(build, e.second, info);
|
||||
if (min_report > max_offset) {
|
||||
@ -802,7 +809,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
|
||||
*fsize = 0;
|
||||
*fgroups = 0;
|
||||
|
||||
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING,
|
||||
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false,
|
||||
longLitLengthThreshold);
|
||||
if (mp.lits.empty()) {
|
||||
DEBUG_PRINTF("empty floating matcher\n");
|
||||
@ -832,6 +839,36 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
|
||||
return hwlm;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
|
||||
const RoseBuildImpl &build, size_t longLitLengthThreshold,
|
||||
const map<u32, u32> &final_to_frag_map, size_t *drsize) {
|
||||
*drsize = 0;
|
||||
|
||||
if (!build.cc.streaming) {
|
||||
DEBUG_PRINTF("not streaming\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true,
|
||||
longLitLengthThreshold);
|
||||
if (mp.lits.empty()) {
|
||||
DEBUG_PRINTF("empty delay rebuild matcher\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups());
|
||||
if (!hwlm) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
|
||||
buildAccel(build, mp, *hwlm);
|
||||
|
||||
*drsize = hwlmSize(hwlm.get());
|
||||
assert(*drsize);
|
||||
DEBUG_PRINTF("built delay rebuild table size %zu bytes\n", *drsize);
|
||||
return hwlm;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM>
|
||||
buildSmallBlockMatcher(const RoseBuildImpl &build,
|
||||
const map<u32, u32> &final_to_frag_map, size_t *sbsize) {
|
||||
@ -849,7 +886,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING,
|
||||
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false,
|
||||
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
|
||||
if (mp.lits.empty()) {
|
||||
DEBUG_PRINTF("no floating table\n");
|
||||
@ -861,7 +898,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build,
|
||||
|
||||
auto mp_anchored =
|
||||
makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED_SMALL_BLOCK,
|
||||
ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
|
||||
false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
|
||||
if (mp_anchored.lits.empty()) {
|
||||
DEBUG_PRINTF("no small-block anchored literals\n");
|
||||
return nullptr;
|
||||
@ -898,7 +935,7 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build,
|
||||
*esize = 0;
|
||||
|
||||
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED,
|
||||
build.ematcher_region_size);
|
||||
false, build.ematcher_region_size);
|
||||
|
||||
if (mp.lits.empty()) {
|
||||
DEBUG_PRINTF("no eod anchored literals\n");
|
||||
|
@ -69,8 +69,8 @@ struct MatcherProto {
|
||||
*/
|
||||
MatcherProto makeMatcherProto(const RoseBuildImpl &build,
|
||||
const std::map<u32, u32> &final_to_frag_map,
|
||||
rose_literal_table table, size_t max_len,
|
||||
u32 max_offset = ROSE_BOUND_INF);
|
||||
rose_literal_table table, bool delay_rebuild,
|
||||
size_t max_len, u32 max_offset = ROSE_BOUND_INF);
|
||||
|
||||
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
|
||||
size_t longLitLengthThreshold,
|
||||
@ -79,6 +79,10 @@ aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
|
||||
size_t *fsize,
|
||||
size_t *historyRequired);
|
||||
|
||||
aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
|
||||
const RoseBuildImpl &build, size_t longLitLengthThreshold,
|
||||
const std::map<u32, u32> &final_to_frag_map, size_t *drsize);
|
||||
|
||||
aligned_unique_ptr<HWLM>
|
||||
buildSmallBlockMatcher(const RoseBuildImpl &build,
|
||||
const std::map<u32, u32> &final_to_frag_map,
|
||||
|
@ -108,6 +108,11 @@ const HWLM *getFloatingMatcher(const RoseEngine *t) {
|
||||
return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset);
|
||||
}
|
||||
|
||||
static
|
||||
const HWLM *getDelayRebuildMatcher(const RoseEngine *t) {
|
||||
return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset);
|
||||
}
|
||||
|
||||
static
|
||||
const HWLM *getEodMatcher(const RoseEngine *t) {
|
||||
return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset);
|
||||
@ -1158,6 +1163,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
|
||||
|
||||
const void *atable = getAnchoredMatcher(t);
|
||||
const HWLM *ftable = getFloatingMatcher(t);
|
||||
const HWLM *drtable = getDelayRebuildMatcher(t);
|
||||
const HWLM *etable = getEodMatcher(t);
|
||||
const HWLM *sbtable = getSmallBlockMatcher(t);
|
||||
|
||||
@ -1212,6 +1218,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
|
||||
} else {
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
fprintf(f, " - delay-rb matcher : %zu bytes\n",
|
||||
drtable ? hwlmSize(drtable) : 0);
|
||||
fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n",
|
||||
etable ? hwlmSize(etable) : 0, t->ematcherRegionSize);
|
||||
fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n",
|
||||
@ -1274,6 +1282,11 @@ void roseDumpText(const RoseEngine *t, FILE *f) {
|
||||
hwlmPrintStats(ftable, f);
|
||||
}
|
||||
|
||||
if (drtable) {
|
||||
fprintf(f, "\nDelay Rebuild literal matcher stats:\n\n");
|
||||
hwlmPrintStats(drtable, f);
|
||||
}
|
||||
|
||||
if (etable) {
|
||||
fprintf(f, "\nEOD-anchored literal matcher stats:\n\n");
|
||||
hwlmPrintStats(etable, f);
|
||||
@ -1322,6 +1335,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
|
||||
DUMP_U32(t, amatcherOffset);
|
||||
DUMP_U32(t, ematcherOffset);
|
||||
DUMP_U32(t, fmatcherOffset);
|
||||
DUMP_U32(t, drmatcherOffset);
|
||||
DUMP_U32(t, sbmatcherOffset);
|
||||
DUMP_U32(t, longLitTableOffset);
|
||||
DUMP_U32(t, amatcherMinWidth);
|
||||
|
@ -326,6 +326,7 @@ struct RoseEngine {
|
||||
u32 amatcherOffset; // offset of the anchored literal matcher (bytes)
|
||||
u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
|
||||
u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
|
||||
u32 drmatcherOffset; // offset of the delayed rebuild table (bytes)
|
||||
u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
|
||||
u32 longLitTableOffset; // offset of the long literal table
|
||||
u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
|
||||
|
@ -412,16 +412,22 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state,
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable,
|
||||
struct hs_scratch *scratch) {
|
||||
void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||
assert(!can_stop_matching(scratch));
|
||||
|
||||
if (!t->drmatcherOffset) {
|
||||
DEBUG_PRINTF("no delayed rebuild table\n");
|
||||
return;
|
||||
}
|
||||
|
||||
const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset);
|
||||
size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength);
|
||||
const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len;
|
||||
DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len);
|
||||
|
||||
scratch->core_info.status &= ~STATUS_DELAY_DIRTY;
|
||||
|
||||
hwlmExec(ftable, buf, len, 0, roseDelayRebuildCallback, scratch,
|
||||
hwlmExec(hwlm, buf, len, 0, roseDelayRebuildCallback, scratch,
|
||||
scratch->tctxt.groups);
|
||||
assert(!can_stop_matching(scratch));
|
||||
}
|
||||
@ -637,13 +643,13 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
|
||||
|
||||
if (!flen) {
|
||||
if (rebuild) { /* rebuild floating delayed match stuff */
|
||||
do_rebuild(t, ftable, scratch);
|
||||
do_rebuild(t, scratch);
|
||||
}
|
||||
goto flush_delay_and_exit;
|
||||
}
|
||||
|
||||
if (rebuild) { /* rebuild floating delayed match stuff */
|
||||
do_rebuild(t, ftable, scratch);
|
||||
do_rebuild(t, scratch);
|
||||
}
|
||||
|
||||
if (flen + offset <= t->floatingMinDistance) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user