diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 736e0d35..321d1ca2 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -5535,6 +5535,17 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { bc.resources.has_floating = true; } + // Build delay rebuild HWLM matcher. + size_t drsize = 0; + auto drtable = buildDelayRebuildMatcher(*this, bc.longLitLengthThreshold, + final_to_frag_map, &drsize); + u32 drmatcherOffset = 0; + if (drtable) { + currOffset = ROUNDUP_CL(currOffset); + drmatcherOffset = currOffset; + currOffset += verify_u32(drsize); + } + // Build EOD-anchored HWLM matcher. size_t esize = 0; auto etable = buildEodAnchoredMatcher(*this, final_to_frag_map, &esize); @@ -5632,6 +5643,10 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { assert(fmatcherOffset); memcpy(ptr + fmatcherOffset, ftable.get(), fsize); } + if (drtable) { + assert(drmatcherOffset); + memcpy(ptr + drmatcherOffset, drtable.get(), drsize); + } if (etable) { assert(ematcherOffset); memcpy(ptr + ematcherOffset, etable.get(), esize); @@ -5724,6 +5739,7 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { engine->ematcherOffset = ematcherOffset; engine->sbmatcherOffset = sbmatcherOffset; engine->fmatcherOffset = fmatcherOffset; + engine->drmatcherOffset = drmatcherOffset; engine->longLitTableOffset = longLitTableOffset; engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 495d6f36..abd34629 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -507,23 +507,27 @@ void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) { const auto final_to_frag_map = groupByFragment(build); - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED, + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED, false, longLitLengthThreshold); dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, longLitLengthThreshold); dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits); - mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, + mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true, + longLitLengthThreshold); + dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt", mp.lits); + + mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, false, build.ematcher_region_size); dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits); if (!build.cc.streaming) { - mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); auto mp2 = makeMatcherProto(build, final_to_frag_map, - ROSE_ANCHORED_SMALL_BLOCK, + ROSE_ANCHORED_SMALL_BLOCK, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits)); dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits); diff --git a/src/rose/rose_build_matchers.cpp b/src/rose/rose_build_matchers.cpp index e16a0ac7..9f770973 100644 --- a/src/rose/rose_build_matchers.cpp +++ b/src/rose/rose_build_matchers.cpp @@ -666,8 +666,8 @@ void trim_to_suffix(Container &c, size_t len) { MatcherProto makeMatcherProto(const RoseBuildImpl &build, const map &final_to_frag_map, - rose_literal_table table, size_t max_len, - u32 max_offset) { + rose_literal_table table, bool delay_rebuild, + size_t max_len, u32 max_offset) { MatcherProto mp; for (const auto &e : build.literals.right) { @@ -694,6 +694,13 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build, DEBUG_PRINTF("lit='%s' (len %zu)\n", escapeString(lit).c_str(), lit.length()); + // When building the delay rebuild table, we only want to include + // literals that have delayed variants. + if (delay_rebuild && info.delayed_ids.empty()) { + DEBUG_PRINTF("not needed for delay rebuild\n"); + continue; + } + if (max_offset != ROSE_BOUND_INF) { u64a min_report = literalMinReportOffset(build, e.second, info); if (min_report > max_offset) { @@ -802,7 +809,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, *fsize = 0; *fgroups = 0; - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, longLitLengthThreshold); if (mp.lits.empty()) { DEBUG_PRINTF("empty floating matcher\n"); @@ -832,6 +839,36 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, return hwlm; } +aligned_unique_ptr buildDelayRebuildMatcher( + const RoseBuildImpl &build, size_t longLitLengthThreshold, + const map &final_to_frag_map, size_t *drsize) { + *drsize = 0; + + if (!build.cc.streaming) { + DEBUG_PRINTF("not streaming\n"); + return nullptr; + } + + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, true, + longLitLengthThreshold); + if (mp.lits.empty()) { + DEBUG_PRINTF("empty delay rebuild matcher\n"); + return nullptr; + } + + auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups()); + if (!hwlm) { + throw CompileError("Unable to generate bytecode."); + } + + buildAccel(build, mp, *hwlm); + + *drsize = hwlmSize(hwlm.get()); + assert(*drsize); + DEBUG_PRINTF("built delay rebuild table size %zu bytes\n", *drsize); + return hwlm; +} + aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, const map &final_to_frag_map, size_t *sbsize) { @@ -849,7 +886,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, return nullptr; } - auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, + auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (mp.lits.empty()) { DEBUG_PRINTF("no floating table\n"); @@ -861,7 +898,7 @@ buildSmallBlockMatcher(const RoseBuildImpl &build, auto mp_anchored = makeMatcherProto(build, final_to_frag_map, ROSE_ANCHORED_SMALL_BLOCK, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); + false, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); if (mp_anchored.lits.empty()) { DEBUG_PRINTF("no small-block anchored literals\n"); return nullptr; @@ -898,7 +935,7 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build, *esize = 0; auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED, - build.ematcher_region_size); + false, build.ematcher_region_size); if (mp.lits.empty()) { DEBUG_PRINTF("no eod anchored literals\n"); diff --git a/src/rose/rose_build_matchers.h b/src/rose/rose_build_matchers.h index a1817307..b06d460f 100644 --- a/src/rose/rose_build_matchers.h +++ b/src/rose/rose_build_matchers.h @@ -69,8 +69,8 @@ struct MatcherProto { */ MatcherProto makeMatcherProto(const RoseBuildImpl &build, const std::map &final_to_frag_map, - rose_literal_table table, size_t max_len, - u32 max_offset = ROSE_BOUND_INF); + rose_literal_table table, bool delay_rebuild, + size_t max_len, u32 max_offset = ROSE_BOUND_INF); aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold, @@ -79,6 +79,10 @@ aligned_unique_ptr buildFloatingMatcher(const RoseBuildImpl &build, size_t *fsize, size_t *historyRequired); +aligned_unique_ptr buildDelayRebuildMatcher( + const RoseBuildImpl &build, size_t longLitLengthThreshold, + const std::map &final_to_frag_map, size_t *drsize); + aligned_unique_ptr buildSmallBlockMatcher(const RoseBuildImpl &build, const std::map &final_to_frag_map, diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp index 96f49688..d83f8f9e 100644 --- a/src/rose/rose_dump.cpp +++ b/src/rose/rose_dump.cpp @@ -108,6 +108,11 @@ const HWLM *getFloatingMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->fmatcherOffset); } +static +const HWLM *getDelayRebuildMatcher(const RoseEngine *t) { + return (const HWLM *)loadFromByteCodeOffset(t, t->drmatcherOffset); +} + static const HWLM *getEodMatcher(const RoseEngine *t) { return (const HWLM *)loadFromByteCodeOffset(t, t->ematcherOffset); @@ -1158,6 +1163,7 @@ void roseDumpText(const RoseEngine *t, FILE *f) { const void *atable = getAnchoredMatcher(t); const HWLM *ftable = getFloatingMatcher(t); + const HWLM *drtable = getDelayRebuildMatcher(t); const HWLM *etable = getEodMatcher(t); const HWLM *sbtable = getSmallBlockMatcher(t); @@ -1212,6 +1218,8 @@ void roseDumpText(const RoseEngine *t, FILE *f) { } else { fprintf(f, "\n"); } + fprintf(f, " - delay-rb matcher : %zu bytes\n", + drtable ? hwlmSize(drtable) : 0); fprintf(f, " - eod-anch matcher : %zu bytes over last %u bytes\n", etable ? hwlmSize(etable) : 0, t->ematcherRegionSize); fprintf(f, " - small-blk matcher : %zu bytes over %u bytes\n", @@ -1274,6 +1282,11 @@ void roseDumpText(const RoseEngine *t, FILE *f) { hwlmPrintStats(ftable, f); } + if (drtable) { + fprintf(f, "\nDelay Rebuild literal matcher stats:\n\n"); + hwlmPrintStats(drtable, f); + } + if (etable) { fprintf(f, "\nEOD-anchored literal matcher stats:\n\n"); hwlmPrintStats(etable, f); @@ -1322,6 +1335,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) { DUMP_U32(t, amatcherOffset); DUMP_U32(t, ematcherOffset); DUMP_U32(t, fmatcherOffset); + DUMP_U32(t, drmatcherOffset); DUMP_U32(t, sbmatcherOffset); DUMP_U32(t, longLitTableOffset); DUMP_U32(t, amatcherMinWidth); diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index bf6e9a86..8e55a37d 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -326,6 +326,7 @@ struct RoseEngine { u32 amatcherOffset; // offset of the anchored literal matcher (bytes) u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes) u32 fmatcherOffset; // offset of the floating literal matcher (bytes) + u32 drmatcherOffset; // offset of the delayed rebuild table (bytes) u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes) u32 longLitTableOffset; // offset of the long literal table u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern diff --git a/src/rose/stream.c b/src/rose/stream.c index 17139b25..31224276 100644 --- a/src/rose/stream.c +++ b/src/rose/stream.c @@ -412,16 +412,22 @@ void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, } static really_inline -void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable, - struct hs_scratch *scratch) { +void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) { assert(!can_stop_matching(scratch)); + + if (!t->drmatcherOffset) { + DEBUG_PRINTF("no delayed rebuild table\n"); + return; + } + + const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset); size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength); const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len; DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len); scratch->core_info.status &= ~STATUS_DELAY_DIRTY; - hwlmExec(ftable, buf, len, 0, roseDelayRebuildCallback, scratch, + hwlmExec(hwlm, buf, len, 0, roseDelayRebuildCallback, scratch, scratch->tctxt.groups); assert(!can_stop_matching(scratch)); } @@ -637,13 +643,13 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { if (!flen) { if (rebuild) { /* rebuild floating delayed match stuff */ - do_rebuild(t, ftable, scratch); + do_rebuild(t, scratch); } goto flush_delay_and_exit; } if (rebuild) { /* rebuild floating delayed match stuff */ - do_rebuild(t, ftable, scratch); + do_rebuild(t, scratch); } if (flen + offset <= t->floatingMinDistance) {