mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
smallwrite: use failure map to set dfa daddy state
This commit is contained in:
parent
beac58fcb4
commit
3b3f6b739c
@ -842,6 +842,8 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
|
|||||||
flat_set<dstate_id_t> hinted;
|
flat_set<dstate_id_t> hinted;
|
||||||
if (trust_daddy_states) {
|
if (trust_daddy_states) {
|
||||||
hinted.insert(currState.daddy);
|
hinted.insert(currState.daddy);
|
||||||
|
addIfEarlier(hinted, info.raw.start_floating, curr_id);
|
||||||
|
addIfEarlier(hinted, info.raw.start_anchored, curr_id);
|
||||||
} else {
|
} else {
|
||||||
hinted = find_daddy_candidates(info, curr_id);
|
hinted = find_daddy_candidates(info, curr_id);
|
||||||
}
|
}
|
||||||
@ -896,7 +898,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
|
|||||||
|
|
||||||
if (self_loop_width > MAX_SHERMAN_SELF_LOOP) {
|
if (self_loop_width > MAX_SHERMAN_SELF_LOOP) {
|
||||||
DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id,
|
DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id,
|
||||||
self_loop_width);
|
self_loop_width);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -443,11 +443,11 @@ bool isSaneTrie(const LitTrie &trie) {
|
|||||||
* edges and reports.
|
* edges and reports.
|
||||||
*/
|
*/
|
||||||
static
|
static
|
||||||
void buildAutomaton(LitTrie &trie) {
|
void buildAutomaton(LitTrie &trie,
|
||||||
|
map<LitTrieVertex, LitTrieVertex> &failure_map) {
|
||||||
assert(isSaneTrie(trie));
|
assert(isSaneTrie(trie));
|
||||||
|
|
||||||
// Find our failure transitions and reports.
|
// Find our failure transitions and reports.
|
||||||
map<LitTrieVertex, LitTrieVertex> failure_map;
|
|
||||||
vector<LitTrieVertex> ordering;
|
vector<LitTrieVertex> ordering;
|
||||||
ACVisitor ac_vis(trie, failure_map, ordering);
|
ACVisitor ac_vis(trie, failure_map, ordering);
|
||||||
boost::breadth_first_search(trie, trie.root, visitor(ac_vis));
|
boost::breadth_first_search(trie, trie.root, visitor(ac_vis));
|
||||||
@ -535,7 +535,8 @@ static
|
|||||||
unique_ptr<raw_dfa> buildDfa(LitTrie &trie, bool nocase) {
|
unique_ptr<raw_dfa> buildDfa(LitTrie &trie, bool nocase) {
|
||||||
DEBUG_PRINTF("trie has %zu states\n", num_vertices(trie));
|
DEBUG_PRINTF("trie has %zu states\n", num_vertices(trie));
|
||||||
|
|
||||||
buildAutomaton(trie);
|
map<LitTrieVertex, LitTrieVertex> failure_map;
|
||||||
|
buildAutomaton(trie, failure_map);
|
||||||
|
|
||||||
auto rdfa = make_unique<raw_dfa>(NFA_OUTFIX);
|
auto rdfa = make_unique<raw_dfa>(NFA_OUTFIX);
|
||||||
|
|
||||||
@ -559,13 +560,19 @@ unique_ptr<raw_dfa> buildDfa(LitTrie &trie, bool nocase) {
|
|||||||
DEBUG_PRINTF("state %zu\n", u_state);
|
DEBUG_PRINTF("state %zu\n", u_state);
|
||||||
assert(u_state < rdfa->states.size());
|
assert(u_state < rdfa->states.size());
|
||||||
auto &ds = rdfa->states[u_state];
|
auto &ds = rdfa->states[u_state];
|
||||||
ds.daddy = root_state;
|
|
||||||
ds.reports = trie[u].reports;
|
ds.reports = trie[u].reports;
|
||||||
|
|
||||||
if (!ds.reports.empty()) {
|
if (!ds.reports.empty()) {
|
||||||
DEBUG_PRINTF("reports: %s\n", as_string_list(ds.reports).c_str());
|
DEBUG_PRINTF("reports: %s\n", as_string_list(ds.reports).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set daddy state from failure map.
|
||||||
|
if (u == trie.root) {
|
||||||
|
ds.daddy = DEAD_STATE;
|
||||||
|
} else {
|
||||||
|
assert(contains(failure_map, u));
|
||||||
|
ds.daddy = trie[failure_map.at(u)].index + 1;
|
||||||
|
}
|
||||||
|
|
||||||
// By default, transition back to the root.
|
// By default, transition back to the root.
|
||||||
fill(ds.next.begin(), ds.next.end(), root_state);
|
fill(ds.next.begin(), ds.next.end(), root_state);
|
||||||
// TOP should be a self-loop.
|
// TOP should be a self-loop.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user