diff --git a/src/rose/match.c b/src/rose/match.c index 95cb141e..b641e39d 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -220,10 +220,9 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { tctxt->lastEndOffset = real_end; } - const u32 *programs = getByOffset(t, t->litProgramOffset); - assert(id < t->literalCount); + // Note that the "id" we have been handed is the program offset. const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED; - if (roseRunProgram(t, scratch, programs[id], start, real_end, match_len, + if (roseRunProgram(t, scratch, id, start, real_end, match_len, flags) == HWLM_TERMINATE_MATCHING) { assert(can_stop_matching(scratch)); DEBUG_PRINTF("caller requested termination\n"); diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 286cc7ae..befd0bad 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -204,6 +204,28 @@ void remapAnchoredReports(RoseBuildImpl &tbi) { } } +static +void remapIds(flat_set &reports, const vector &litPrograms) { + flat_set new_reports; + for (auto id : reports) { + assert(id < litPrograms.size()); + new_reports.insert(litPrograms.at(id)); + } + reports = move(new_reports); +} + +/** + * \brief Replace the reports (which are literal final_ids) in the given + * raw_dfa with program offsets. + */ +static +void remapIdsToPrograms(raw_dfa &rdfa, const vector &litPrograms) { + for (dstate &ds : rdfa.states) { + remapIds(ds.reports, litPrograms); + remapIds(ds.reports_eod, litPrograms); + } +} + static void populate_holder(const simple_anchored_info &sai, const set &exit_ids, NGHolder *h_in) { @@ -826,7 +848,7 @@ vector buildAnchoredDfas(RoseBuildImpl &build) { aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, - size_t *asize) { + const vector &litPrograms, size_t *asize) { const CompileContext &cc = build.cc; if (dfas.empty()) { @@ -835,6 +857,10 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, return nullptr; } + for (auto &rdfa : dfas) { + remapIdsToPrograms(rdfa, litPrograms); + } + vector> nfas; vector start_offset; // start offset for each dfa (dots removed) size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); diff --git a/src/rose/rose_build_anchored.h b/src/rose/rose_build_anchored.h index a5317f89..579b26d7 100644 --- a/src/rose/rose_build_anchored.h +++ b/src/rose/rose_build_anchored.h @@ -56,10 +56,13 @@ std::vector buildAnchoredDfas(RoseBuildImpl &build); /** * \brief Construct an anchored_matcher_info runtime structure from the given * set of DFAs. + * + * Remap the literal final_ids used for raw_dfa reports to the program offsets + * given in litPrograms. */ aligned_unique_ptr buildAnchoredMatcher(RoseBuildImpl &build, std::vector &dfas, - size_t *asize); + const std::vector &litPrograms, size_t *asize); u32 anchoredStateSize(const anchored_matcher_info &atable); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 5cd8161b..f451b8ea 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -417,6 +417,10 @@ struct build_context : boost::noncopyable { * that have already been pushed into the engine_blob. */ ue2::unordered_map engineOffsets; + /** \brief Literal programs, indexed by final_id, after they have been + * written to the engine_blob. */ + vector litPrograms; + /** \brief Minimum offset of a match from the floating table. */ u32 floatingMinLiteralMatchOffset = 0; @@ -4736,20 +4740,20 @@ pair buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) { const u32 num_literals = build.final_id_to_literal.size(); auto lit_edge_map = findEdgesByLiteral(build); - vector litPrograms(num_literals); + bc.litPrograms.resize(num_literals); vector delayRebuildPrograms(num_literals); for (u32 finalId = 0; finalId != num_literals; ++finalId) { const auto &lit_edges = lit_edge_map[finalId]; - litPrograms[finalId] = + bc.litPrograms[finalId] = writeLiteralProgram(build, bc, finalId, lit_edges); delayRebuildPrograms[finalId] = buildDelayRebuildProgram(build, bc, finalId); } u32 litProgramsOffset = - add_to_engine_blob(bc, begin(litPrograms), end(litPrograms)); + add_to_engine_blob(bc, begin(bc.litPrograms), end(bc.litPrograms)); u32 delayRebuildProgramsOffset = add_to_engine_blob( bc, begin(delayRebuildPrograms), end(delayRebuildPrograms)); @@ -5206,7 +5210,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { // Build anchored matcher. size_t asize = 0; u32 amatcherOffset = 0; - auto atable = buildAnchoredMatcher(*this, anchored_dfas, &asize); + auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms, + &asize); if (atable) { currOffset = ROUNDUP_CL(currOffset); amatcherOffset = currOffset;