mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-07-12 13:34:45 +03:00
rose: use fragment ids earlier for anchored dfas
This commit is contained in:
parent
8b25d83415
commit
79512bd5c3
@ -183,7 +183,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) {
|
|||||||
flat_set<ReportID> new_reports;
|
flat_set<ReportID> new_reports;
|
||||||
for (auto id : ds.reports) {
|
for (auto id : ds.reports) {
|
||||||
assert(id < build.literal_info.size());
|
assert(id < build.literal_info.size());
|
||||||
new_reports.insert(build.literal_info.at(id).final_id);
|
new_reports.insert(build.literal_info.at(id).fragment_id);
|
||||||
}
|
}
|
||||||
ds.reports = move(new_reports);
|
ds.reports = move(new_reports);
|
||||||
}
|
}
|
||||||
@ -191,7 +191,7 @@ void remapAnchoredReports(raw_dfa &rdfa, const RoseBuildImpl &build) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Replaces the report ids currently in the dfas (rose graph literal
|
* \brief Replaces the report ids currently in the dfas (rose graph literal
|
||||||
* ids) with the final id for each literal.
|
* ids) with the fragment id for each literal.
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
void remapAnchoredReports(RoseBuildImpl &build) {
|
void remapAnchoredReports(RoseBuildImpl &build) {
|
||||||
@ -208,8 +208,7 @@ void remapAnchoredReports(RoseBuildImpl &build) {
|
|||||||
* raw_dfa with program offsets.
|
* raw_dfa with program offsets.
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
void remapIdsToPrograms(raw_dfa &rdfa,
|
void remapIdsToPrograms(const RoseBuildImpl &build, raw_dfa &rdfa) {
|
||||||
const map<u32, LitFragment> &final_to_frag_map) {
|
|
||||||
for (dstate &ds : rdfa.states) {
|
for (dstate &ds : rdfa.states) {
|
||||||
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
|
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
|
||||||
if (ds.reports.empty()) {
|
if (ds.reports.empty()) {
|
||||||
@ -217,9 +216,8 @@ void remapIdsToPrograms(raw_dfa &rdfa,
|
|||||||
}
|
}
|
||||||
|
|
||||||
flat_set<ReportID> new_reports;
|
flat_set<ReportID> new_reports;
|
||||||
for (auto final_id : ds.reports) {
|
for (auto fragment_id : ds.reports) {
|
||||||
assert(contains(final_to_frag_map, final_id));
|
auto &frag = build.fragments.at(fragment_id);
|
||||||
auto &frag = final_to_frag_map.at(final_id);
|
|
||||||
new_reports.insert(frag.lit_program_offset);
|
new_reports.insert(frag.lit_program_offset);
|
||||||
}
|
}
|
||||||
ds.reports = move(new_reports);
|
ds.reports = move(new_reports);
|
||||||
@ -227,16 +225,18 @@ void remapIdsToPrograms(raw_dfa &rdfa,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void populate_holder(const simple_anchored_info &sai, const set<u32> &exit_ids,
|
unique_ptr<NGHolder> populate_holder(const simple_anchored_info &sai,
|
||||||
NGHolder *h_in) {
|
const flat_set<u32> &exit_ids) {
|
||||||
DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
|
DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
|
||||||
sai.max_bound, dumpString(sai.literal).c_str());
|
sai.max_bound, dumpString(sai.literal).c_str());
|
||||||
NGHolder &h = *h_in;
|
auto h_ptr = make_unique<NGHolder>();
|
||||||
set<NFAVertex> ends = addDotsToGraph(h, h.start, sai.min_bound,
|
NGHolder &h = *h_ptr;
|
||||||
sai.max_bound, CharReach::dot());
|
auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound,
|
||||||
|
CharReach::dot());
|
||||||
NFAVertex v = addToGraph(h, ends, sai.literal);
|
NFAVertex v = addToGraph(h, ends, sai.literal);
|
||||||
add_edge(v, h.accept, h);
|
add_edge(v, h.accept, h);
|
||||||
h[v].reports.insert(exit_ids.begin(), exit_ids.end());
|
h[v].reports.insert(exit_ids.begin(), exit_ids.end());
|
||||||
|
return h_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 anchoredStateSize(const anchored_matcher_info &atable) {
|
u32 anchoredStateSize(const anchored_matcher_info &atable) {
|
||||||
@ -735,15 +735,15 @@ void buildSimpleDfas(const RoseBuildImpl &build,
|
|||||||
vector<unique_ptr<raw_dfa>> *anchored_dfas) {
|
vector<unique_ptr<raw_dfa>> *anchored_dfas) {
|
||||||
/* we should have determinised all of these before so there should be no
|
/* we should have determinised all of these before so there should be no
|
||||||
* chance of failure. */
|
* chance of failure. */
|
||||||
for (const auto &simple : build.anchored_simple) {
|
flat_set<u32> exit_ids;
|
||||||
set<u32> exit_ids;
|
for (const auto &simple : build.anchored_simple) {
|
||||||
|
exit_ids.clear();
|
||||||
for (auto lit_id : simple.second) {
|
for (auto lit_id : simple.second) {
|
||||||
exit_ids.insert(build.literal_info[lit_id].final_id);
|
exit_ids.insert(build.literal_info[lit_id].fragment_id);
|
||||||
}
|
}
|
||||||
NGHolder h;
|
auto h = populate_holder(simple.first, exit_ids);
|
||||||
populate_holder(simple.first, exit_ids, &h);
|
Automaton_Holder autom(*h);
|
||||||
Automaton_Holder autom(h);
|
auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
||||||
unique_ptr<raw_dfa> rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
|
||||||
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
||||||
assert(!rv);
|
assert(!rv);
|
||||||
rdfa->start_anchored = INIT_STATE;
|
rdfa->start_anchored = INIT_STATE;
|
||||||
@ -858,7 +858,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (auto &rdfa : dfas) {
|
for (auto &rdfa : dfas) {
|
||||||
remapIdsToPrograms(rdfa, build.final_to_frag_map);
|
remapIdsToPrograms(build, rdfa);
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<aligned_unique_ptr<NFA>> nfas;
|
vector<aligned_unique_ptr<NFA>> nfas;
|
||||||
|
@ -4646,10 +4646,8 @@ rose_group getGroups(const RoseBuildImpl &build, const flat_set<u32> &lit_ids) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
|
void groupByFragment(RoseBuildImpl &build, const build_context &bc) {
|
||||||
const build_context &bc) {
|
|
||||||
u32 frag_id = 0;
|
u32 frag_id = 0;
|
||||||
map<u32, LitFragment> final_to_frag;
|
|
||||||
|
|
||||||
struct FragmentInfo {
|
struct FragmentInfo {
|
||||||
vector<u32> final_ids;
|
vector<u32> final_ids;
|
||||||
@ -4658,6 +4656,9 @@ map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
|
|||||||
|
|
||||||
map<rose_literal_id, FragmentInfo> frag_info;
|
map<rose_literal_id, FragmentInfo> frag_info;
|
||||||
|
|
||||||
|
auto &final_to_frag = build.final_to_frag_map;
|
||||||
|
auto &fragments = build.fragments;
|
||||||
|
|
||||||
for (const auto &m : bc.final_id_to_literal) {
|
for (const auto &m : bc.final_id_to_literal) {
|
||||||
u32 final_id = m.first;
|
u32 final_id = m.first;
|
||||||
const auto &lit_ids = m.second;
|
const auto &lit_ids = m.second;
|
||||||
@ -4666,21 +4667,27 @@ map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
|
|||||||
auto groups = getGroups(build, lit_ids);
|
auto groups = getGroups(build, lit_ids);
|
||||||
|
|
||||||
if (lit_ids.size() > 1) {
|
if (lit_ids.size() > 1) {
|
||||||
final_to_frag.emplace(final_id, LitFragment(frag_id++, groups));
|
final_to_frag.emplace(final_id, frag_id);
|
||||||
|
fragments.emplace_back(frag_id, groups);
|
||||||
|
frag_id++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto lit_id = *lit_ids.begin();
|
const auto lit_id = *lit_ids.begin();
|
||||||
const auto &lit = build.literals.right.at(lit_id);
|
const auto &lit = build.literals.right.at(lit_id);
|
||||||
if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
|
if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
|
||||||
final_to_frag.emplace(final_id, LitFragment(frag_id++, groups));
|
final_to_frag.emplace(final_id, frag_id);
|
||||||
|
fragments.emplace_back(frag_id, groups);
|
||||||
|
frag_id++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Combining fragments that squash their groups is unsafe.
|
// Combining fragments that squash their groups is unsafe.
|
||||||
const auto &info = build.literal_info[lit_id];
|
const auto &info = build.literal_info[lit_id];
|
||||||
if (info.squash_group) {
|
if (info.squash_group) {
|
||||||
final_to_frag.emplace(final_id, LitFragment(frag_id++, groups));
|
final_to_frag.emplace(final_id, frag_id);
|
||||||
|
fragments.emplace_back(frag_id, groups);
|
||||||
|
frag_id++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4695,14 +4702,13 @@ map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build,
|
|||||||
const auto &fi = m.second;
|
const auto &fi = m.second;
|
||||||
DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(),
|
DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(),
|
||||||
as_string_list(fi.final_ids).c_str());
|
as_string_list(fi.final_ids).c_str());
|
||||||
|
fragments.emplace_back(frag_id, fi.groups);
|
||||||
for (const auto final_id : fi.final_ids) {
|
for (const auto final_id : fi.final_ids) {
|
||||||
assert(!contains(final_to_frag, final_id));
|
assert(!contains(final_to_frag, final_id));
|
||||||
final_to_frag.emplace(final_id, LitFragment(frag_id, fi.groups));
|
final_to_frag.emplace(final_id, frag_id);
|
||||||
}
|
}
|
||||||
frag_id++;
|
frag_id++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return final_to_frag;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -4713,7 +4719,7 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
|
|||||||
// Build a reverse mapping from fragment -> final_id.
|
// Build a reverse mapping from fragment -> final_id.
|
||||||
map<u32, flat_set<u32>> frag_to_final_map;
|
map<u32, flat_set<u32>> frag_to_final_map;
|
||||||
for (const auto &m : build.final_to_frag_map) {
|
for (const auto &m : build.final_to_frag_map) {
|
||||||
frag_to_final_map[m.second.fragment_id].insert(m.first);
|
frag_to_final_map[m.second].insert(m.first);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 num_fragments = verify_u32(frag_to_final_map.size());
|
const u32 num_fragments = verify_u32(frag_to_final_map.size());
|
||||||
@ -4736,7 +4742,8 @@ void buildLiteralPrograms(RoseBuildImpl &build, build_context &bc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Update LitFragment entries.
|
// Update LitFragment entries.
|
||||||
for (auto &frag : build.final_to_frag_map | map_values) {
|
for (const auto &fragment_id : build.final_to_frag_map | map_values) {
|
||||||
|
auto &frag = build.fragments.at(fragment_id);
|
||||||
frag.lit_program_offset = litPrograms[frag.fragment_id];
|
frag.lit_program_offset = litPrograms[frag.fragment_id];
|
||||||
frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id];
|
frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id];
|
||||||
}
|
}
|
||||||
@ -5407,7 +5414,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
|
|
||||||
build_context bc;
|
build_context bc;
|
||||||
allocateFinalLiteralId(*this, bc);
|
allocateFinalLiteralId(*this, bc);
|
||||||
final_to_frag_map = groupByFragment(*this, bc);
|
groupByFragment(*this, bc);
|
||||||
|
|
||||||
// Write the fragment IDs into the literal_info structures.
|
// Write the fragment IDs into the literal_info structures.
|
||||||
for (auto &info : literal_info) {
|
for (auto &info : literal_info) {
|
||||||
@ -5415,7 +5422,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
assert(contains(final_to_frag_map, info.final_id));
|
assert(contains(final_to_frag_map, info.final_id));
|
||||||
info.fragment_id = final_to_frag_map.at(info.final_id).fragment_id;
|
info.fragment_id = final_to_frag_map.at(info.final_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto anchored_dfas = buildAnchoredDfas(*this);
|
auto anchored_dfas = buildAnchoredDfas(*this);
|
||||||
|
@ -1153,7 +1153,7 @@ void dumpRoseLitPrograms(const RoseBuildImpl &build, const RoseEngine *t,
|
|||||||
programs.reserve(build.final_to_frag_map.size());
|
programs.reserve(build.final_to_frag_map.size());
|
||||||
|
|
||||||
for (const auto &m : build.final_to_frag_map) {
|
for (const auto &m : build.final_to_frag_map) {
|
||||||
const auto &frag = m.second;
|
const auto &frag = build.fragments.at(m.second);
|
||||||
if (frag.lit_program_offset) {
|
if (frag.lit_program_offset) {
|
||||||
programs.push_back(frag.lit_program_offset);
|
programs.push_back(frag.lit_program_offset);
|
||||||
}
|
}
|
||||||
|
@ -593,7 +593,8 @@ public:
|
|||||||
* overlap calculation in history assignment. */
|
* overlap calculation in history assignment. */
|
||||||
std::map<u32, rose_literal_id> anchoredLitSuffix;
|
std::map<u32, rose_literal_id> anchoredLitSuffix;
|
||||||
|
|
||||||
std::map<u32, LitFragment> final_to_frag_map;
|
std::map<u32, u32> final_to_frag_map;
|
||||||
|
std::vector<LitFragment> fragments;
|
||||||
|
|
||||||
unordered_set<left_id> transient;
|
unordered_set<left_id> transient;
|
||||||
unordered_map<left_id, rose_group> rose_squash_masks;
|
unordered_map<left_id, rose_group> rose_squash_masks;
|
||||||
|
@ -741,7 +741,8 @@ MatcherProto makeMatcherProto(const RoseBuildImpl &build,
|
|||||||
for (auto &lit : mp.lits) {
|
for (auto &lit : mp.lits) {
|
||||||
u32 final_id = lit.id;
|
u32 final_id = lit.id;
|
||||||
assert(contains(build.final_to_frag_map, final_id));
|
assert(contains(build.final_to_frag_map, final_id));
|
||||||
const auto &frag = build.final_to_frag_map.at(final_id);
|
const auto &frag =
|
||||||
|
build.fragments.at(build.final_to_frag_map.at(final_id));
|
||||||
lit.id = delay_rebuild ? frag.delay_program_offset
|
lit.id = delay_rebuild ? frag.delay_program_offset
|
||||||
: frag.lit_program_offset;
|
: frag.lit_program_offset;
|
||||||
lit.groups = frag.groups;
|
lit.groups = frag.groups;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user