rose: use program offset, not final_id, in atable

This removes the need to look up the program offset in a table when
handling an anchored literal match.
This commit is contained in:
Justin Viiret 2016-07-14 11:40:49 +10:00 committed by Matthew Barr
parent 4dbbc4eaa5
commit 8754cbbd24
4 changed files with 42 additions and 9 deletions

View File

@ -220,10 +220,9 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {
tctxt->lastEndOffset = real_end; tctxt->lastEndOffset = real_end;
} }
const u32 *programs = getByOffset(t, t->litProgramOffset); // Note that the "id" we have been handed is the program offset.
assert(id < t->literalCount);
const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED; const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED;
if (roseRunProgram(t, scratch, programs[id], start, real_end, match_len, if (roseRunProgram(t, scratch, id, start, real_end, match_len,
flags) == HWLM_TERMINATE_MATCHING) { flags) == HWLM_TERMINATE_MATCHING) {
assert(can_stop_matching(scratch)); assert(can_stop_matching(scratch));
DEBUG_PRINTF("caller requested termination\n"); DEBUG_PRINTF("caller requested termination\n");

View File

@ -204,6 +204,28 @@ void remapAnchoredReports(RoseBuildImpl &tbi) {
} }
} }
static
void remapIds(flat_set<ReportID> &reports, const vector<u32> &litPrograms) {
flat_set<ReportID> new_reports;
for (auto id : reports) {
assert(id < litPrograms.size());
new_reports.insert(litPrograms.at(id));
}
reports = move(new_reports);
}
/**
* \brief Replace the reports (which are literal final_ids) in the given
* raw_dfa with program offsets.
*/
static
void remapIdsToPrograms(raw_dfa &rdfa, const vector<u32> &litPrograms) {
for (dstate &ds : rdfa.states) {
remapIds(ds.reports, litPrograms);
remapIds(ds.reports_eod, litPrograms);
}
}
static static
void populate_holder(const simple_anchored_info &sai, const set<u32> &exit_ids, void populate_holder(const simple_anchored_info &sai, const set<u32> &exit_ids,
NGHolder *h_in) { NGHolder *h_in) {
@ -826,7 +848,7 @@ vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build) {
aligned_unique_ptr<anchored_matcher_info> aligned_unique_ptr<anchored_matcher_info>
buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas, buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
size_t *asize) { const vector<u32> &litPrograms, size_t *asize) {
const CompileContext &cc = build.cc; const CompileContext &cc = build.cc;
if (dfas.empty()) { if (dfas.empty()) {
@ -835,6 +857,10 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
return nullptr; return nullptr;
} }
for (auto &rdfa : dfas) {
remapIdsToPrograms(rdfa, litPrograms);
}
vector<aligned_unique_ptr<NFA>> nfas; vector<aligned_unique_ptr<NFA>> nfas;
vector<u32> start_offset; // start offset for each dfa (dots removed) vector<u32> start_offset; // start offset for each dfa (dots removed)
size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm);

View File

@ -56,10 +56,13 @@ std::vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build);
/** /**
* \brief Construct an anchored_matcher_info runtime structure from the given * \brief Construct an anchored_matcher_info runtime structure from the given
* set of DFAs. * set of DFAs.
*
* Remap the literal final_ids used for raw_dfa reports to the program offsets
* given in litPrograms.
*/ */
aligned_unique_ptr<anchored_matcher_info> aligned_unique_ptr<anchored_matcher_info>
buildAnchoredMatcher(RoseBuildImpl &build, std::vector<raw_dfa> &dfas, buildAnchoredMatcher(RoseBuildImpl &build, std::vector<raw_dfa> &dfas,
size_t *asize); const std::vector<u32> &litPrograms, size_t *asize);
u32 anchoredStateSize(const anchored_matcher_info &atable); u32 anchoredStateSize(const anchored_matcher_info &atable);

View File

@ -417,6 +417,10 @@ struct build_context : boost::noncopyable {
* that have already been pushed into the engine_blob. */ * that have already been pushed into the engine_blob. */
ue2::unordered_map<u32, u32> engineOffsets; ue2::unordered_map<u32, u32> engineOffsets;
/** \brief Literal programs, indexed by final_id, after they have been
* written to the engine_blob. */
vector<u32> litPrograms;
/** \brief Minimum offset of a match from the floating table. */ /** \brief Minimum offset of a match from the floating table. */
u32 floatingMinLiteralMatchOffset = 0; u32 floatingMinLiteralMatchOffset = 0;
@ -4736,20 +4740,20 @@ pair<u32, u32> buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
const u32 num_literals = build.final_id_to_literal.size(); const u32 num_literals = build.final_id_to_literal.size();
auto lit_edge_map = findEdgesByLiteral(build); auto lit_edge_map = findEdgesByLiteral(build);
vector<u32> litPrograms(num_literals); bc.litPrograms.resize(num_literals);
vector<u32> delayRebuildPrograms(num_literals); vector<u32> delayRebuildPrograms(num_literals);
for (u32 finalId = 0; finalId != num_literals; ++finalId) { for (u32 finalId = 0; finalId != num_literals; ++finalId) {
const auto &lit_edges = lit_edge_map[finalId]; const auto &lit_edges = lit_edge_map[finalId];
litPrograms[finalId] = bc.litPrograms[finalId] =
writeLiteralProgram(build, bc, finalId, lit_edges); writeLiteralProgram(build, bc, finalId, lit_edges);
delayRebuildPrograms[finalId] = delayRebuildPrograms[finalId] =
buildDelayRebuildProgram(build, bc, finalId); buildDelayRebuildProgram(build, bc, finalId);
} }
u32 litProgramsOffset = u32 litProgramsOffset =
add_to_engine_blob(bc, begin(litPrograms), end(litPrograms)); add_to_engine_blob(bc, begin(bc.litPrograms), end(bc.litPrograms));
u32 delayRebuildProgramsOffset = add_to_engine_blob( u32 delayRebuildProgramsOffset = add_to_engine_blob(
bc, begin(delayRebuildPrograms), end(delayRebuildPrograms)); bc, begin(delayRebuildPrograms), end(delayRebuildPrograms));
@ -5206,7 +5210,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
// Build anchored matcher. // Build anchored matcher.
size_t asize = 0; size_t asize = 0;
u32 amatcherOffset = 0; u32 amatcherOffset = 0;
auto atable = buildAnchoredMatcher(*this, anchored_dfas, &asize); auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms,
&asize);
if (atable) { if (atable) {
currOffset = ROUNDUP_CL(currOffset); currOffset = ROUNDUP_CL(currOffset);
amatcherOffset = currOffset; amatcherOffset = currOffset;