mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
mcclellan: Remove the use of state numbering
McClellan and Haig only need to know which states are not participants (i.e. unused start, startDs)
This commit is contained in:
parent
5dd4aa9c13
commit
839c90594c
@ -69,17 +69,15 @@ struct haig_too_wide {
|
|||||||
|
|
||||||
template<typename stateset>
|
template<typename stateset>
|
||||||
static
|
static
|
||||||
void populateInit(const NGHolder &g,
|
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
|
||||||
stateset *init, stateset *initDS,
|
stateset *init, stateset *initDS,
|
||||||
vector<NFAVertex> *v_by_index) {
|
vector<NFAVertex> *v_by_index) {
|
||||||
DEBUG_PRINTF("graph kind: %u\n", (int)g.kind);
|
DEBUG_PRINTF("graph kind: %u\n", (int)g.kind);
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
u32 v_index = g[v].index;
|
if (contains(unused, v)) {
|
||||||
if (state_ids.at(v) == NO_STATE) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
u32 v_index = g[v].index;
|
||||||
if (is_any_start(v, g)) {
|
if (is_any_start(v, g)) {
|
||||||
init->set(v_index);
|
init->set(v_index);
|
||||||
if (hasSelfLoop(v, g) || is_triggered(g)) {
|
if (hasSelfLoop(v, g) || is_triggered(g)) {
|
||||||
@ -116,8 +114,8 @@ void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) {
|
|||||||
class Automaton_Base {
|
class Automaton_Base {
|
||||||
protected:
|
protected:
|
||||||
Automaton_Base(const NGHolder &graph_in,
|
Automaton_Base(const NGHolder &graph_in,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in)
|
const flat_set<NFAVertex> &unused_in)
|
||||||
: graph(graph_in), state_ids(state_ids_in) {
|
: graph(graph_in), unused(unused_in) {
|
||||||
calculateAlphabet(graph, alpha, unalpha, &alphasize);
|
calculateAlphabet(graph, alpha, unalpha, &alphasize);
|
||||||
assert(alphasize <= ALPHABET_SIZE);
|
assert(alphasize <= ALPHABET_SIZE);
|
||||||
}
|
}
|
||||||
@ -126,7 +124,7 @@ public:
|
|||||||
static bool canPrune(const flat_set<ReportID> &) { return false; }
|
static bool canPrune(const flat_set<ReportID> &) { return false; }
|
||||||
|
|
||||||
const NGHolder &graph;
|
const NGHolder &graph;
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids;
|
const flat_set<NFAVertex> &unused;
|
||||||
|
|
||||||
array<u16, ALPHABET_SIZE> alpha;
|
array<u16, ALPHABET_SIZE> alpha;
|
||||||
array<u16, ALPHABET_SIZE> unalpha;
|
array<u16, ALPHABET_SIZE> unalpha;
|
||||||
@ -145,14 +143,13 @@ public:
|
|||||||
typedef map<StateSet, dstate_id_t> StateMap;
|
typedef map<StateSet, dstate_id_t> StateMap;
|
||||||
|
|
||||||
Automaton_Big(const NGHolder &graph_in,
|
Automaton_Big(const NGHolder &graph_in,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
|
const flat_set<NFAVertex> &unused_in, som_type som,
|
||||||
som_type som, const vector<vector<CharReach>> &triggers,
|
const vector<vector<CharReach>> &triggers, bool unordered_som)
|
||||||
bool unordered_som)
|
: Automaton_Base(graph_in, unused_in), numStates(num_vertices(graph)),
|
||||||
: Automaton_Base(graph_in, state_ids_in), numStates(num_vertices(graph)),
|
|
||||||
init(numStates), initDS(numStates), squash(numStates),
|
init(numStates), initDS(numStates), squash(numStates),
|
||||||
accept(numStates), acceptEod(numStates), toppable(numStates),
|
accept(numStates), acceptEod(numStates), toppable(numStates),
|
||||||
dead(numStates) {
|
dead(numStates) {
|
||||||
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
|
populateInit(graph, unused, &init, &initDS, &v_by_index);
|
||||||
populateAccepts(graph, &accept, &acceptEod);
|
populateAccepts(graph, &accept, &acceptEod);
|
||||||
|
|
||||||
start_anchored = DEAD_STATE + 1;
|
start_anchored = DEAD_STATE + 1;
|
||||||
@ -175,7 +172,7 @@ public:
|
|||||||
|
|
||||||
cr_by_index = populateCR(graph, v_by_index, alpha);
|
cr_by_index = populateCR(graph, v_by_index, alpha);
|
||||||
if (is_triggered(graph)) {
|
if (is_triggered(graph)) {
|
||||||
markToppableStarts(graph, state_ids, false, triggers, &toppable);
|
markToppableStarts(graph, unused, false, triggers, &toppable);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -244,11 +241,11 @@ public:
|
|||||||
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
||||||
|
|
||||||
Automaton_Graph(const NGHolder &graph_in,
|
Automaton_Graph(const NGHolder &graph_in,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
|
const flat_set<NFAVertex> &unused_in,
|
||||||
som_type som, const vector<vector<CharReach>> &triggers,
|
som_type som, const vector<vector<CharReach>> &triggers,
|
||||||
bool unordered_som)
|
bool unordered_som)
|
||||||
: Automaton_Base(graph_in, state_ids_in) {
|
: Automaton_Base(graph_in, unused_in) {
|
||||||
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
|
populateInit(graph, unused, &init, &initDS, &v_by_index);
|
||||||
populateAccepts(graph, &accept, &acceptEod);
|
populateAccepts(graph, &accept, &acceptEod);
|
||||||
|
|
||||||
start_anchored = DEAD_STATE + 1;
|
start_anchored = DEAD_STATE + 1;
|
||||||
@ -272,7 +269,7 @@ public:
|
|||||||
cr_by_index = populateCR(graph, v_by_index, alpha);
|
cr_by_index = populateCR(graph, v_by_index, alpha);
|
||||||
if (is_triggered(graph)) {
|
if (is_triggered(graph)) {
|
||||||
dynamic_bitset<> temp(NFA_STATE_LIMIT);
|
dynamic_bitset<> temp(NFA_STATE_LIMIT);
|
||||||
markToppableStarts(graph, state_ids, false, triggers, &temp);
|
markToppableStarts(graph, unused, false, triggers, &temp);
|
||||||
toppable = bitfield<NFA_STATE_LIMIT>(temp);
|
toppable = bitfield<NFA_STATE_LIMIT>(temp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -481,11 +478,10 @@ bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
s32 getSlotID(const NGHolder &g,
|
s32 getSlotID(const NGHolder &g, UNUSED const flat_set<NFAVertex> &unused,
|
||||||
UNUSED const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
|
||||||
NFAVertex v) {
|
NFAVertex v) {
|
||||||
if (is_triggered(g) && v == g.start) {
|
if (is_triggered(g) && v == g.start) {
|
||||||
assert(state_ids.at(v) != NO_STATE);
|
assert(!contains(unused, v));
|
||||||
} else if (is_any_start_inc_virtual(v, g)) {
|
} else if (is_any_start_inc_virtual(v, g)) {
|
||||||
return CREATE_NEW_SOM;
|
return CREATE_NEW_SOM;
|
||||||
}
|
}
|
||||||
@ -516,8 +512,7 @@ void haig_do_preds(const NGHolder &g, const stateset &nfa_states,
|
|||||||
|
|
||||||
template<typename stateset>
|
template<typename stateset>
|
||||||
static
|
static
|
||||||
void haig_do_report(const NGHolder &g,
|
void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
|
||||||
NFAVertex accept_v, const stateset &source_nfa_states,
|
NFAVertex accept_v, const stateset &source_nfa_states,
|
||||||
const vector<NFAVertex> &state_mapping,
|
const vector<NFAVertex> &state_mapping,
|
||||||
set<som_report> &out) {
|
set<som_report> &out) {
|
||||||
@ -528,7 +523,7 @@ void haig_do_report(const NGHolder &g,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for (ReportID report_id : g[v].reports) {
|
for (ReportID report_id : g[v].reports) {
|
||||||
out.insert(som_report(report_id, getSlotID(g, state_ids, v)));
|
out.insert(som_report(report_id, getSlotID(g, unused, v)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -565,14 +560,14 @@ void haig_note_starts(const NGHolder &g, map<u32, u32> *out) {
|
|||||||
template<class Auto>
|
template<class Auto>
|
||||||
static
|
static
|
||||||
bool doHaig(const NGHolder &g,
|
bool doHaig(const NGHolder &g,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
const flat_set<NFAVertex> &unused,
|
||||||
som_type som, const vector<vector<CharReach>> &triggers,
|
som_type som, const vector<vector<CharReach>> &triggers,
|
||||||
bool unordered_som, raw_som_dfa *rdfa) {
|
bool unordered_som, raw_som_dfa *rdfa) {
|
||||||
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
|
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
|
||||||
a fight */
|
a fight */
|
||||||
typedef typename Auto::StateSet StateSet;
|
typedef typename Auto::StateSet StateSet;
|
||||||
vector<StateSet> nfa_state_map;
|
vector<StateSet> nfa_state_map;
|
||||||
Auto n(g, state_ids, som, triggers, unordered_som);
|
Auto n(g, unused, som, triggers, unordered_som);
|
||||||
try {
|
try {
|
||||||
if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
||||||
DEBUG_PRINTF("state limit exceeded\n");
|
DEBUG_PRINTF("state limit exceeded\n");
|
||||||
@ -602,9 +597,9 @@ bool doHaig(const NGHolder &g,
|
|||||||
haig_do_preds(g, source_states, n.v_by_index,
|
haig_do_preds(g, source_states, n.v_by_index,
|
||||||
rdfa->state_som.back().preds);
|
rdfa->state_som.back().preds);
|
||||||
|
|
||||||
haig_do_report(g, state_ids, g.accept, source_states, n.v_by_index,
|
haig_do_report(g, unused, g.accept, source_states, n.v_by_index,
|
||||||
rdfa->state_som.back().reports);
|
rdfa->state_som.back().reports);
|
||||||
haig_do_report(g, state_ids, g.acceptEod, source_states, n.v_by_index,
|
haig_do_report(g, unused, g.acceptEod, source_states, n.v_by_index,
|
||||||
rdfa->state_som.back().reports_eod);
|
rdfa->state_som.back().reports_eod);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -613,10 +608,10 @@ bool doHaig(const NGHolder &g,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
|
unique_ptr<raw_som_dfa>
|
||||||
u32 somPrecision,
|
attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision,
|
||||||
const vector<vector<CharReach> > &triggers,
|
const vector<vector<CharReach>> &triggers, const Grey &grey,
|
||||||
const Grey &grey, bool unordered_som) {
|
bool unordered_som) {
|
||||||
assert(is_triggered(g) != triggers.empty());
|
assert(is_triggered(g) != triggers.empty());
|
||||||
assert(!unordered_som || is_triggered(g));
|
assert(!unordered_som || is_triggered(g));
|
||||||
|
|
||||||
@ -625,13 +620,12 @@ unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto state_ids = numberStates(g);
|
|
||||||
dropUnusedStarts(g, state_ids);
|
|
||||||
|
|
||||||
DEBUG_PRINTF("attempting to build haig \n");
|
DEBUG_PRINTF("attempting to build haig \n");
|
||||||
assert(allMatchStatesHaveReports(g));
|
assert(allMatchStatesHaveReports(g));
|
||||||
assert(hasCorrectlyNumberedVertices(g));
|
assert(hasCorrectlyNumberedVertices(g));
|
||||||
|
|
||||||
|
auto unused = findUnusedStates(g);
|
||||||
|
|
||||||
u32 numStates = num_vertices(g);
|
u32 numStates = num_vertices(g);
|
||||||
if (numStates > HAIG_MAX_NFA_STATE) {
|
if (numStates > HAIG_MAX_NFA_STATE) {
|
||||||
DEBUG_PRINTF("giving up... looks too big\n");
|
DEBUG_PRINTF("giving up... looks too big\n");
|
||||||
@ -645,11 +639,11 @@ unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
|
|||||||
bool rv;
|
bool rv;
|
||||||
if (numStates <= NFA_STATE_LIMIT) {
|
if (numStates <= NFA_STATE_LIMIT) {
|
||||||
/* fast path */
|
/* fast path */
|
||||||
rv = doHaig<Automaton_Graph>(g, state_ids, som, triggers, unordered_som,
|
rv = doHaig<Automaton_Graph>(g, unused, som, triggers, unordered_som,
|
||||||
rdfa.get());
|
rdfa.get());
|
||||||
} else {
|
} else {
|
||||||
/* not the fast path */
|
/* not the fast path */
|
||||||
rv = doHaig<Automaton_Big>(g, state_ids, som, triggers, unordered_som,
|
rv = doHaig<Automaton_Big>(g, unused, som, triggers, unordered_som,
|
||||||
rdfa.get());
|
rdfa.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,9 +54,9 @@ struct raw_som_dfa;
|
|||||||
* between)
|
* between)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
std::unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
|
std::unique_ptr<raw_som_dfa>
|
||||||
u32 somPrecision,
|
attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision,
|
||||||
const std::vector<std::vector<CharReach> > &triggers,
|
const std::vector<std::vector<CharReach>> &triggers,
|
||||||
const Grey &grey, bool unordered_som_triggers = false);
|
const Grey &grey, bool unordered_som_triggers = false);
|
||||||
|
|
||||||
std::unique_ptr<raw_som_dfa>
|
std::unique_ptr<raw_som_dfa>
|
||||||
|
@ -152,12 +152,11 @@ void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state,
|
|||||||
|
|
||||||
template<typename stateset>
|
template<typename stateset>
|
||||||
static
|
static
|
||||||
void populateInit(const NGHolder &g,
|
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
|
||||||
stateset *init, stateset *init_deep,
|
stateset *init, stateset *init_deep,
|
||||||
vector<NFAVertex> *v_by_index) {
|
vector<NFAVertex> *v_by_index) {
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
if (state_ids.at(v) == NO_STATE) {
|
if (contains(unused, v)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,21 +187,22 @@ void populateInit(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename StateSet>
|
template<typename StateSet>
|
||||||
void populateAccepts(const NGHolder &g,
|
void populateAccepts(const NGHolder &g, const flat_set<NFAVertex> &unused,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
|
||||||
StateSet *accept, StateSet *acceptEod) {
|
StateSet *accept, StateSet *acceptEod) {
|
||||||
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
|
||||||
if (state_ids.at(v) != NO_STATE) {
|
if (contains(unused, v)) {
|
||||||
accept->set(g[v].index);
|
continue;
|
||||||
}
|
}
|
||||||
|
accept->set(g[v].index);
|
||||||
}
|
}
|
||||||
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
|
||||||
if (v == g.accept) {
|
if (v == g.accept) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (state_ids.at(v) != NO_STATE) {
|
if (contains(unused, v)) {
|
||||||
acceptEod->set(g[v].index);
|
continue;
|
||||||
}
|
}
|
||||||
|
acceptEod->set(g[v].index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,8 +315,7 @@ bool triggerAllowed(const NGHolder &g, const NFAVertex v,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void markToppableStarts(const NGHolder &g,
|
void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
|
||||||
bool single_trigger,
|
bool single_trigger,
|
||||||
const vector<vector<CharReach>> &triggers,
|
const vector<vector<CharReach>> &triggers,
|
||||||
dynamic_bitset<> *out) {
|
dynamic_bitset<> *out) {
|
||||||
@ -325,14 +324,13 @@ void markToppableStarts(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
if (state_ids.at(v) == NO_STATE) {
|
if (contains(unused, v)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
u32 vert_id = g[v].index;
|
|
||||||
for (const auto &trigger : triggers) {
|
for (const auto &trigger : triggers) {
|
||||||
if (triggerAllowed(g, v, triggers, trigger)) {
|
if (triggerAllowed(g, v, triggers, trigger)) {
|
||||||
DEBUG_PRINTF("idx %u is valid location for top\n", vert_id);
|
DEBUG_PRINTF("idx %u is valid location for top\n", g[v].index);
|
||||||
out->set(vert_id);
|
out->set(g[v].index);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -349,15 +347,14 @@ public:
|
|||||||
typedef map<StateSet, dstate_id_t> StateMap;
|
typedef map<StateSet, dstate_id_t> StateMap;
|
||||||
|
|
||||||
Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in,
|
Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
|
const flat_set<NFAVertex> &unused_in, bool single_trigger,
|
||||||
bool single_trigger,
|
|
||||||
const vector<vector<CharReach>> &triggers, bool prunable_in)
|
const vector<vector<CharReach>> &triggers, bool prunable_in)
|
||||||
: rm(rm_in), graph(graph_in), state_ids(state_ids_in),
|
: rm(rm_in), graph(graph_in), numStates(num_vertices(graph)),
|
||||||
numStates(num_vertices(graph)), init(numStates), initDS(numStates),
|
unused(unused_in), init(numStates), initDS(numStates),
|
||||||
squash(numStates), accept(numStates), acceptEod(numStates),
|
squash(numStates), accept(numStates), acceptEod(numStates),
|
||||||
toppable(numStates), prunable(prunable_in), dead(numStates) {
|
toppable(numStates), prunable(prunable_in), dead(numStates) {
|
||||||
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
|
populateInit(graph, unused, &init, &initDS, &v_by_index);
|
||||||
populateAccepts(graph, state_ids, &accept, &acceptEod);
|
populateAccepts(graph, unused, &accept, &acceptEod);
|
||||||
|
|
||||||
start_anchored = DEAD_STATE + 1;
|
start_anchored = DEAD_STATE + 1;
|
||||||
if (initDS == init) {
|
if (initDS == init) {
|
||||||
@ -379,7 +376,7 @@ public:
|
|||||||
|
|
||||||
cr_by_index = populateCR(graph, v_by_index, alpha);
|
cr_by_index = populateCR(graph, v_by_index, alpha);
|
||||||
if (is_triggered(graph)) {
|
if (is_triggered(graph)) {
|
||||||
markToppableStarts(graph, state_ids, single_trigger, triggers,
|
markToppableStarts(graph, unused, single_trigger, triggers,
|
||||||
&toppable);
|
&toppable);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -438,8 +435,8 @@ private:
|
|||||||
const ReportManager *rm;
|
const ReportManager *rm;
|
||||||
public:
|
public:
|
||||||
const NGHolder &graph;
|
const NGHolder &graph;
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids;
|
|
||||||
u32 numStates;
|
u32 numStates;
|
||||||
|
const flat_set<NFAVertex> &unused;
|
||||||
vector<NFAVertex> v_by_index;
|
vector<NFAVertex> v_by_index;
|
||||||
vector<CharReach> cr_by_index; /* pre alpha'ed */
|
vector<CharReach> cr_by_index; /* pre alpha'ed */
|
||||||
StateSet init;
|
StateSet init;
|
||||||
@ -466,13 +463,11 @@ public:
|
|||||||
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
|
||||||
|
|
||||||
Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in,
|
Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
|
const flat_set<NFAVertex> &unused_in, bool single_trigger,
|
||||||
bool single_trigger,
|
|
||||||
const vector<vector<CharReach>> &triggers, bool prunable_in)
|
const vector<vector<CharReach>> &triggers, bool prunable_in)
|
||||||
: rm(rm_in), graph(graph_in), state_ids(state_ids_in),
|
: rm(rm_in), graph(graph_in), unused(unused_in), prunable(prunable_in) {
|
||||||
prunable(prunable_in) {
|
populateInit(graph, unused, &init, &initDS, &v_by_index);
|
||||||
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
|
populateAccepts(graph, unused, &accept, &acceptEod);
|
||||||
populateAccepts(graph, state_ids, &accept, &acceptEod);
|
|
||||||
|
|
||||||
start_anchored = DEAD_STATE + 1;
|
start_anchored = DEAD_STATE + 1;
|
||||||
if (initDS == init) {
|
if (initDS == init) {
|
||||||
@ -496,8 +491,7 @@ public:
|
|||||||
cr_by_index = populateCR(graph, v_by_index, alpha);
|
cr_by_index = populateCR(graph, v_by_index, alpha);
|
||||||
if (is_triggered(graph)) {
|
if (is_triggered(graph)) {
|
||||||
dynamic_bitset<> temp(NFA_STATE_LIMIT);
|
dynamic_bitset<> temp(NFA_STATE_LIMIT);
|
||||||
markToppableStarts(graph, state_ids, single_trigger, triggers,
|
markToppableStarts(graph, unused, single_trigger, triggers, &temp);
|
||||||
&temp);
|
|
||||||
toppable = bitfield<NFA_STATE_LIMIT>(temp);
|
toppable = bitfield<NFA_STATE_LIMIT>(temp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -557,7 +551,7 @@ private:
|
|||||||
const ReportManager *rm;
|
const ReportManager *rm;
|
||||||
public:
|
public:
|
||||||
const NGHolder &graph;
|
const NGHolder &graph;
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids;
|
const flat_set<NFAVertex> &unused;
|
||||||
vector<NFAVertex> v_by_index;
|
vector<NFAVertex> v_by_index;
|
||||||
vector<CharReach> cr_by_index; /* pre alpha'ed */
|
vector<CharReach> cr_by_index; /* pre alpha'ed */
|
||||||
StateSet init;
|
StateSet init;
|
||||||
@ -580,20 +574,15 @@ public:
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
|
unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
|
||||||
bool single_trigger,
|
const ReportManager *rm, bool single_trigger,
|
||||||
const vector<vector<CharReach>> &triggers,
|
const vector<vector<CharReach>> &triggers,
|
||||||
const Grey &grey, bool finalChance) {
|
const Grey &grey, bool finalChance) {
|
||||||
if (!grey.allowMcClellan) {
|
if (!grey.allowMcClellan) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct a mutable copy of the graph so that we can drop unused starts.
|
auto unused = findUnusedStates(graph);
|
||||||
auto g_copy = cloneHolder(g);
|
|
||||||
NGHolder &graph = *g_copy;
|
|
||||||
|
|
||||||
auto state_ids = numberStates(graph);
|
|
||||||
dropUnusedStarts(graph, state_ids);
|
|
||||||
|
|
||||||
DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind);
|
DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind);
|
||||||
assert(allMatchStatesHaveReports(graph));
|
assert(allMatchStatesHaveReports(graph));
|
||||||
@ -620,7 +609,7 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
|
|||||||
if (numStates <= NFA_STATE_LIMIT) {
|
if (numStates <= NFA_STATE_LIMIT) {
|
||||||
/* Fast path. Automaton_Graph uses a bitfield internally to represent
|
/* Fast path. Automaton_Graph uses a bitfield internally to represent
|
||||||
* states and is quicker than Automaton_Big. */
|
* states and is quicker than Automaton_Big. */
|
||||||
Automaton_Graph n(rm, graph, state_ids, single_trigger, triggers,
|
Automaton_Graph n(rm, graph, unused, single_trigger, triggers,
|
||||||
prunable);
|
prunable);
|
||||||
if (determinise(n, rdfa->states, state_limit)) {
|
if (determinise(n, rdfa->states, state_limit)) {
|
||||||
DEBUG_PRINTF("state limit exceeded\n");
|
DEBUG_PRINTF("state limit exceeded\n");
|
||||||
@ -633,8 +622,7 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
|
|||||||
rdfa->alpha_remap = n.alpha;
|
rdfa->alpha_remap = n.alpha;
|
||||||
} else {
|
} else {
|
||||||
/* Slow path. Too many states to use Automaton_Graph. */
|
/* Slow path. Too many states to use Automaton_Graph. */
|
||||||
Automaton_Big n(rm, graph, state_ids, single_trigger, triggers,
|
Automaton_Big n(rm, graph, unused, single_trigger, triggers, prunable);
|
||||||
prunable);
|
|
||||||
if (determinise(n, rdfa->states, state_limit)) {
|
if (determinise(n, rdfa->states, state_limit)) {
|
||||||
DEBUG_PRINTF("state limit exceeded\n");
|
DEBUG_PRINTF("state limit exceeded\n");
|
||||||
return nullptr; /* over state limit */
|
return nullptr; /* over state limit */
|
||||||
|
@ -64,8 +64,7 @@ void getFullTransitionFromState(const raw_dfa &n, u16 state,
|
|||||||
u16 *out_table);
|
u16 *out_table);
|
||||||
|
|
||||||
/** produce a map of states on which it is valid to receive tops */
|
/** produce a map of states on which it is valid to receive tops */
|
||||||
void markToppableStarts(const NGHolder &g,
|
void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
|
||||||
const ue2::unordered_map<NFAVertex, u32> &state_ids,
|
|
||||||
bool single_trigger,
|
bool single_trigger,
|
||||||
const std::vector<std::vector<CharReach>> &triggers,
|
const std::vector<std::vector<CharReach>> &triggers,
|
||||||
boost::dynamic_bitset<> *out);
|
boost::dynamic_bitset<> *out);
|
||||||
@ -76,7 +75,7 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
|
|||||||
typename autom::StateSet *next) {
|
typename autom::StateSet *next) {
|
||||||
typedef typename autom::StateSet StateSet;
|
typedef typename autom::StateSet StateSet;
|
||||||
const NGHolder &graph = nfa.graph;
|
const NGHolder &graph = nfa.graph;
|
||||||
const auto &state_ids = nfa.state_ids;
|
const auto &unused = nfa.unused;
|
||||||
const auto &alpha = nfa.alpha;
|
const auto &alpha = nfa.alpha;
|
||||||
const StateSet &squash = nfa.squash;
|
const StateSet &squash = nfa.squash;
|
||||||
const std::map<u32, StateSet> &squash_mask = nfa.squash_mask;
|
const std::map<u32, StateSet> &squash_mask = nfa.squash_mask;
|
||||||
@ -94,7 +93,7 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
|
|||||||
NFAVertex u = vByStateId[i];
|
NFAVertex u = vByStateId[i];
|
||||||
|
|
||||||
for (const auto &v : adjacent_vertices_range(u, graph)) {
|
for (const auto &v : adjacent_vertices_range(u, graph)) {
|
||||||
if (state_ids.at(v) == NO_STATE) {
|
if (contains(unused, v)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
succ.set(graph[v].index);
|
succ.set(graph[v].index);
|
||||||
|
@ -281,6 +281,17 @@ void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
flat_set<NFAVertex> findUnusedStates(const NGHolder &g) {
|
||||||
|
flat_set<NFAVertex> dead;
|
||||||
|
if (startIsRedundant(g)) {
|
||||||
|
dead.insert(g.start);
|
||||||
|
}
|
||||||
|
if (proper_out_degree(g.startDs, g) == 0) {
|
||||||
|
dead.insert(g.startDs);
|
||||||
|
}
|
||||||
|
return dead;
|
||||||
|
}
|
||||||
|
|
||||||
/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
|
/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
|
||||||
* accepts. */
|
* accepts. */
|
||||||
void reverseHolder(const NGHolder &g_in, NGHolder &g) {
|
void reverseHolder(const NGHolder &g_in, NGHolder &g) {
|
||||||
|
@ -81,6 +81,13 @@ u32 countStates(const NGHolder &g,
|
|||||||
/** Optimisation: drop unnecessary start states. */
|
/** Optimisation: drop unnecessary start states. */
|
||||||
void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states);
|
void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Returns a set of vertices that will not participate in an
|
||||||
|
* implementation (NFA, DFA etc) of this graph. For example, starts with no
|
||||||
|
* successors.
|
||||||
|
*/
|
||||||
|
flat_set<NFAVertex> findUnusedStates(const NGHolder &g);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user