mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
rose: use program offset, not final_id, in atable
This removes the need to look up the program offset in a table when handling an anchored literal match.
This commit is contained in:
parent
4dbbc4eaa5
commit
8754cbbd24
@ -220,10 +220,9 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {
|
|||||||
tctxt->lastEndOffset = real_end;
|
tctxt->lastEndOffset = real_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 *programs = getByOffset(t, t->litProgramOffset);
|
// Note that the "id" we have been handed is the program offset.
|
||||||
assert(id < t->literalCount);
|
|
||||||
const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED;
|
const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED;
|
||||||
if (roseRunProgram(t, scratch, programs[id], start, real_end, match_len,
|
if (roseRunProgram(t, scratch, id, start, real_end, match_len,
|
||||||
flags) == HWLM_TERMINATE_MATCHING) {
|
flags) == HWLM_TERMINATE_MATCHING) {
|
||||||
assert(can_stop_matching(scratch));
|
assert(can_stop_matching(scratch));
|
||||||
DEBUG_PRINTF("caller requested termination\n");
|
DEBUG_PRINTF("caller requested termination\n");
|
||||||
|
@ -204,6 +204,28 @@ void remapAnchoredReports(RoseBuildImpl &tbi) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void remapIds(flat_set<ReportID> &reports, const vector<u32> &litPrograms) {
|
||||||
|
flat_set<ReportID> new_reports;
|
||||||
|
for (auto id : reports) {
|
||||||
|
assert(id < litPrograms.size());
|
||||||
|
new_reports.insert(litPrograms.at(id));
|
||||||
|
}
|
||||||
|
reports = move(new_reports);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Replace the reports (which are literal final_ids) in the given
|
||||||
|
* raw_dfa with program offsets.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
void remapIdsToPrograms(raw_dfa &rdfa, const vector<u32> &litPrograms) {
|
||||||
|
for (dstate &ds : rdfa.states) {
|
||||||
|
remapIds(ds.reports, litPrograms);
|
||||||
|
remapIds(ds.reports_eod, litPrograms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void populate_holder(const simple_anchored_info &sai, const set<u32> &exit_ids,
|
void populate_holder(const simple_anchored_info &sai, const set<u32> &exit_ids,
|
||||||
NGHolder *h_in) {
|
NGHolder *h_in) {
|
||||||
@ -826,7 +848,7 @@ vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build) {
|
|||||||
|
|
||||||
aligned_unique_ptr<anchored_matcher_info>
|
aligned_unique_ptr<anchored_matcher_info>
|
||||||
buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
|
buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
|
||||||
size_t *asize) {
|
const vector<u32> &litPrograms, size_t *asize) {
|
||||||
const CompileContext &cc = build.cc;
|
const CompileContext &cc = build.cc;
|
||||||
|
|
||||||
if (dfas.empty()) {
|
if (dfas.empty()) {
|
||||||
@ -835,6 +857,10 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (auto &rdfa : dfas) {
|
||||||
|
remapIdsToPrograms(rdfa, litPrograms);
|
||||||
|
}
|
||||||
|
|
||||||
vector<aligned_unique_ptr<NFA>> nfas;
|
vector<aligned_unique_ptr<NFA>> nfas;
|
||||||
vector<u32> start_offset; // start offset for each dfa (dots removed)
|
vector<u32> start_offset; // start offset for each dfa (dots removed)
|
||||||
size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm);
|
size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm);
|
||||||
|
@ -56,10 +56,13 @@ std::vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build);
|
|||||||
/**
|
/**
|
||||||
* \brief Construct an anchored_matcher_info runtime structure from the given
|
* \brief Construct an anchored_matcher_info runtime structure from the given
|
||||||
* set of DFAs.
|
* set of DFAs.
|
||||||
|
*
|
||||||
|
* Remap the literal final_ids used for raw_dfa reports to the program offsets
|
||||||
|
* given in litPrograms.
|
||||||
*/
|
*/
|
||||||
aligned_unique_ptr<anchored_matcher_info>
|
aligned_unique_ptr<anchored_matcher_info>
|
||||||
buildAnchoredMatcher(RoseBuildImpl &build, std::vector<raw_dfa> &dfas,
|
buildAnchoredMatcher(RoseBuildImpl &build, std::vector<raw_dfa> &dfas,
|
||||||
size_t *asize);
|
const std::vector<u32> &litPrograms, size_t *asize);
|
||||||
|
|
||||||
u32 anchoredStateSize(const anchored_matcher_info &atable);
|
u32 anchoredStateSize(const anchored_matcher_info &atable);
|
||||||
|
|
||||||
|
@ -417,6 +417,10 @@ struct build_context : boost::noncopyable {
|
|||||||
* that have already been pushed into the engine_blob. */
|
* that have already been pushed into the engine_blob. */
|
||||||
ue2::unordered_map<u32, u32> engineOffsets;
|
ue2::unordered_map<u32, u32> engineOffsets;
|
||||||
|
|
||||||
|
/** \brief Literal programs, indexed by final_id, after they have been
|
||||||
|
* written to the engine_blob. */
|
||||||
|
vector<u32> litPrograms;
|
||||||
|
|
||||||
/** \brief Minimum offset of a match from the floating table. */
|
/** \brief Minimum offset of a match from the floating table. */
|
||||||
u32 floatingMinLiteralMatchOffset = 0;
|
u32 floatingMinLiteralMatchOffset = 0;
|
||||||
|
|
||||||
@ -4736,20 +4740,20 @@ pair<u32, u32> buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
|
|||||||
const u32 num_literals = build.final_id_to_literal.size();
|
const u32 num_literals = build.final_id_to_literal.size();
|
||||||
auto lit_edge_map = findEdgesByLiteral(build);
|
auto lit_edge_map = findEdgesByLiteral(build);
|
||||||
|
|
||||||
vector<u32> litPrograms(num_literals);
|
bc.litPrograms.resize(num_literals);
|
||||||
vector<u32> delayRebuildPrograms(num_literals);
|
vector<u32> delayRebuildPrograms(num_literals);
|
||||||
|
|
||||||
for (u32 finalId = 0; finalId != num_literals; ++finalId) {
|
for (u32 finalId = 0; finalId != num_literals; ++finalId) {
|
||||||
const auto &lit_edges = lit_edge_map[finalId];
|
const auto &lit_edges = lit_edge_map[finalId];
|
||||||
|
|
||||||
litPrograms[finalId] =
|
bc.litPrograms[finalId] =
|
||||||
writeLiteralProgram(build, bc, finalId, lit_edges);
|
writeLiteralProgram(build, bc, finalId, lit_edges);
|
||||||
delayRebuildPrograms[finalId] =
|
delayRebuildPrograms[finalId] =
|
||||||
buildDelayRebuildProgram(build, bc, finalId);
|
buildDelayRebuildProgram(build, bc, finalId);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 litProgramsOffset =
|
u32 litProgramsOffset =
|
||||||
add_to_engine_blob(bc, begin(litPrograms), end(litPrograms));
|
add_to_engine_blob(bc, begin(bc.litPrograms), end(bc.litPrograms));
|
||||||
u32 delayRebuildProgramsOffset = add_to_engine_blob(
|
u32 delayRebuildProgramsOffset = add_to_engine_blob(
|
||||||
bc, begin(delayRebuildPrograms), end(delayRebuildPrograms));
|
bc, begin(delayRebuildPrograms), end(delayRebuildPrograms));
|
||||||
|
|
||||||
@ -5206,7 +5210,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
// Build anchored matcher.
|
// Build anchored matcher.
|
||||||
size_t asize = 0;
|
size_t asize = 0;
|
||||||
u32 amatcherOffset = 0;
|
u32 amatcherOffset = 0;
|
||||||
auto atable = buildAnchoredMatcher(*this, anchored_dfas, &asize);
|
auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms,
|
||||||
|
&asize);
|
||||||
if (atable) {
|
if (atable) {
|
||||||
currOffset = ROUNDUP_CL(currOffset);
|
currOffset = ROUNDUP_CL(currOffset);
|
||||||
amatcherOffset = currOffset;
|
amatcherOffset = currOffset;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user