mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-07-12 13:34:45 +03:00
rose: use fragment ids earlier for anchored dfas
This commit is contained in:
parent
8b25d83415
commit
79512bd5c3
@ -183,7 +183,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) {
|
||||
flat_set<ReportID> new_reports;
|
||||
for (auto id : ds.reports) {
|
||||
assert(id < build.literal_info.size());
|
||||
new_reports.insert(build.literal_info.at(id).final_id);
|
||||
new_reports.insert(build.literal_info.at(id).fragment_id);
|
||||
}
|
||||
ds.reports = move(new_reports);
|
||||
}
|
||||
@ -191,7 +191,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) {
|
||||
|
||||
/**
|
||||
* \brief Replaces the report ids currently in the dfas (rose graph literal
|
||||
* ids) with the final id for each literal.
|
||||
* ids) with the fragment id for each literal.
|
||||
*/
|
||||
static
|
||||
void remapAnchoredReports(RoseBuildImpl &build) {
|
||||
@ -208,8 +208,7 @@ void remapAnchoredReports(RoseBuildImpl &build) {
|
||||
* raw_dfa with program offsets.
|
||||
*/
|
||||
static
|
||||
void remapIdsToPrograms(raw_dfa &rdfa,
|
||||
const map<u32, LitFragment> &final_to_frag_map) {
|
||||
void remapIdsToPrograms(const RoseBuildImpl &build, raw_dfa &rdfa) {
|
||||
for (dstate &ds : rdfa.states) {
|
||||
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
|
||||
if (ds.reports.empty()) {
|
||||
@ -217,9 +216,8 @@ void remapIdsToPrograms(raw_dfa &rdfa,
|
||||
}
|
||||
|
||||
flat_set<ReportID> new_reports;
|
||||
for (auto final_id : ds.reports) {
|
||||
assert(contains(final_to_frag_map, final_id));
|
||||
auto &frag = final_to_frag_map.at(final_id);
|
||||
for (auto fragment_id : ds.reports) {
|
||||
auto &frag = build.fragments.at(fragment_id);
|
||||
new_reports.insert(frag.lit_program_offset);
|
||||
}
|
||||
ds.reports = move(new_reports);
|
||||
@ -227,16 +225,18 @@ void remapIdsToPrograms(raw_dfa &rdfa,
|
||||
}
|
||||
|
||||
static
|
||||
void populate_holder(const simple_anchored_info &sai, const set<u32> &exit_ids,
|
||||
NGHolder *h_in) {
|
||||
unique_ptr<NGHolder> populate_holder(const simple_anchored_info &sai,
|
||||
const flat_set<u32> &exit_ids) {
|
||||
DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
|
||||
sai.max_bound, dumpString(sai.literal).c_str());
|
||||
NGHolder &h = *h_in;
|
||||
set<NFAVertex> ends = addDotsToGraph(h, h.start, sai.min_bound,
|
||||
sai.max_bound, CharReach::dot());
|
||||
auto h_ptr = make_unique<NGHolder>();
|
||||
NGHolder &h = *h_ptr;
|
||||
auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound,
|
||||
CharReach::dot());
|
||||
NFAVertex v = addToGraph(h, ends, sai.literal);
|
||||
add_edge(v, h.accept, h);
|
||||
h[v].reports.insert(exit_ids.begin(), exit_ids.end());
|
||||
return h_ptr;
|
||||
}
|
||||
|
||||
u32 anchoredStateSize(const anchored_matcher_info &atable) {
|
||||
@ -735,15 +735,15 @@ void buildSimpleDfas(const RoseBuildImpl &build,
|
||||
vector<unique_ptr<raw_dfa>> *anchored_dfas) {
|
||||
/* we should have determinised all of these before so there should be no
|
||||
* chance of failure. */
|
||||
flat_set<u32> exit_ids;
|
||||
for (const auto &simple : build.anchored_simple) {
|
||||
set<u32> exit_ids;
|
||||
exit_ids.clear();
|
||||
for (auto lit_id : simple.second) {
|
||||
exit_ids.insert(build.literal_info[lit_id].final_id);
|
||||
exit_ids.insert(build.literal_info[lit_id].fragment_id);
|
||||
}
|
||||
NGHolder h;
|
||||
populate_holder(simple.first, exit_ids, &h);
|
||||
Automaton_Holder autom(h);
|
||||
unique_ptr<raw_dfa> rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
||||
auto h = populate_holder(simple.first, exit_ids);
|
||||
Automaton_Holder autom(*h);
|
||||
auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
||||
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
||||
assert(!rv);
|
||||
rdfa->start_anchored = INIT_STATE;
|
||||
@ -858,7 +858,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
|
||||
}
|
||||
|
||||
for (auto &rdfa : dfas) {
|
||||
remapIdsToPrograms(rdfa, build.final_to_frag_map);
|
||||
remapIdsToPrograms(build, rdfa);
|
||||
}
|
||||
|
||||
vector<aligned_unique_ptr<NFA>> nfas;
|
||||
|
@ -4646,10 +4646,8 @@ rose_group getGroups(const RoseBuildImpl &build, const flat_set<u32> &lit_ids) {
|
||||
}
|
||||
|
||||
static
|
||||
map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
|
||||
const build_context &bc) {
|
||||
void groupByFragment(RoseBuildImpl &build, const build_context &bc) {
|
||||
u32 frag_id = 0;
|
||||
map<u32, LitFragment> final_to_frag;
|
||||
|
||||
struct FragmentInfo {
|
||||
vector<u32> final_ids;
|
||||
@ -4658,6 +4656,9 @@ map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
|
||||
|
||||
map<rose_literal_id, FragmentInfo> frag_info;
|
||||
|
||||
auto &final_to_frag = build.final_to_frag_map;
|
||||
auto &fragments = build.fragments;
|
||||
|
||||
for (const auto &m : bc.final_id_to_literal) {
|
||||
u32 final_id = m.first;
|
||||
const auto &lit_ids = m.second;
|
||||
@ -4666,21 +4667,27 @@ map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
|
||||
auto groups = getGroups(build, lit_ids);
|
||||
|
||||
if (lit_ids.size() > 1) {
|
||||
final_to_frag.emplace(final_id, LitFragment(frag_id++, groups));
|
||||
final_to_frag.emplace(final_id, frag_id);
|
||||
fragments.emplace_back(frag_id, groups);
|
||||
frag_id++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto lit_id = *lit_ids.begin();
|
||||
const auto &lit = build.literals.right.at(lit_id);
|
||||
if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
|
||||
final_to_frag.emplace(final_id, LitFragment(frag_id++, groups));
|
||||
final_to_frag.emplace(final_id, frag_id);
|
||||
fragments.emplace_back(frag_id, groups);
|
||||
frag_id++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Combining fragments that squash their groups is unsafe.
|
||||
const auto &info = build.literal_info[lit_id];
|
||||
if (info.squash_group) {
|
||||
final_to_frag.emplace(final_id, LitFragment(frag_id++, groups));
|
||||
final_to_frag.emplace(final_id, frag_id);
|
||||
fragments.emplace_back(frag_id, groups);
|
||||
frag_id++;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -4695,14 +4702,13 @@ map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
|
||||
const auto &fi = m.second;
|
||||
DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(),
|
||||
as_string_list(fi.final_ids).c_str());
|
||||
fragments.emplace_back(frag_id, fi.groups);
|
||||
for (const auto final_id : fi.final_ids) {
|
||||
assert(!contains(final_to_frag, final_id));
|
||||
final_to_frag.emplace(final_id, LitFragment(frag_id, fi.groups));
|
||||
final_to_frag.emplace(final_id, frag_id);
|
||||
}
|
||||
frag_id++;
|
||||
}
|
||||
|
||||
return final_to_frag;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -4713,7 +4719,7 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
|
||||
// Build a reverse mapping from fragment -> final_id.
|
||||
map<u32, flat_set<u32>> frag_to_final_map;
|
||||
for (const auto &m : build.final_to_frag_map) {
|
||||
frag_to_final_map[m.second.fragment_id].insert(m.first);
|
||||
frag_to_final_map[m.second].insert(m.first);
|
||||
}
|
||||
|
||||
const u32 num_fragments = verify_u32(frag_to_final_map.size());
|
||||
@ -4736,7 +4742,8 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
|
||||
}
|
||||
|
||||
// Update LitFragment entries.
|
||||
for (auto &frag : build.final_to_frag_map | map_values) {
|
||||
for (const auto &fragment_id : build.final_to_frag_map | map_values) {
|
||||
auto &frag = build.fragments.at(fragment_id);
|
||||
frag.lit_program_offset = litPrograms[frag.fragment_id];
|
||||
frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id];
|
||||
}
|
||||
@ -5407,7 +5414,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
|
||||
build_context bc;
|
||||
allocateFinalLiteralId(*this, bc);
|
||||
final_to_frag_map = groupByFragment(*this, bc);
|
||||
groupByFragment(*this, bc);
|
||||
|
||||
// Write the fragment IDs into the literal_info structures.
|
||||
for (auto &info : literal_info) {
|
||||
@ -5415,7 +5422,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
||||
continue;
|
||||
}
|
||||
assert(contains(final_to_frag_map, info.final_id));
|
||||
info.fragment_id = final_to_frag_map.at(info.final_id).fragment_id;
|
||||
info.fragment_id = final_to_frag_map.at(info.final_id);
|
||||
}
|
||||
|
||||
auto anchored_dfas = buildAnchoredDfas(*this);
|
||||
|
@ -1153,7 +1153,7 @@ void dumpRoseLitPrograms(const RoseBuildImpl &build, const RoseEngine *t,
|
||||
programs.reserve(build.final_to_frag_map.size());
|
||||
|
||||
for (const auto &m : build.final_to_frag_map) {
|
||||
const auto &frag = m.second;
|
||||
const auto &frag = build.fragments.at(m.second);
|
||||
if (frag.lit_program_offset) {
|
||||
programs.push_back(frag.lit_program_offset);
|
||||
}
|
||||
|
@ -593,7 +593,8 @@ public:
|
||||
* overlap calculation in history assignment. */
|
||||
std::map<u32, rose_literal_id> anchoredLitSuffix;
|
||||
|
||||
std::map<u32, LitFragment> final_to_frag_map;
|
||||
std::map<u32, u32> final_to_frag_map;
|
||||
std::vector<LitFragment> fragments;
|
||||
|
||||
unordered_set<left_id> transient;
|
||||
unordered_map<left_id, rose_group> rose_squash_masks;
|
||||
|
@ -741,7 +741,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
|
||||
for (auto &lit : mp.lits) {
|
||||
u32 final_id = lit.id;
|
||||
assert(contains(build.final_to_frag_map, final_id));
|
||||
const auto &frag = build.final_to_frag_map.at(final_id);
|
||||
const auto &frag =
|
||||
build.fragments.at(build.final_to_frag_map.at(final_id));
|
||||
lit.id = delay_rebuild ? frag.delay_program_offset
|
||||
: frag.lit_program_offset;
|
||||
lit.groups = frag.groups;
|
||||
|
Loading…
x
Reference in New Issue
Block a user