mcclellan: Remove the use of state numbering

McClellan and Haig only need to know which states are not participants
(i.e. unused start, startDs)
This commit is contained in:
Justin Viiret 2016-03-23 11:47:54 +11:00 committed by Matthew Barr
parent 5dd4aa9c13
commit 839c90594c
6 changed files with 87 additions and 88 deletions

View File

@ -69,17 +69,15 @@ struct haig_too_wide {
template<typename stateset>
static
void populateInit(const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
stateset *init, stateset *initDS,
vector<NFAVertex> *v_by_index) {
DEBUG_PRINTF("graph kind: %u\n", (int)g.kind);
for (auto v : vertices_range(g)) {
u32 v_index = g[v].index;
if (state_ids.at(v) == NO_STATE) {
if (contains(unused, v)) {
continue;
}
u32 v_index = g[v].index;
if (is_any_start(v, g)) {
init->set(v_index);
if (hasSelfLoop(v, g) || is_triggered(g)) {
@ -116,8 +114,8 @@ void populateAccepts(const NGHolder &g, StateSet *accept, StateSet *acceptEod) {
class Automaton_Base {
protected:
Automaton_Base(const NGHolder &graph_in,
const ue2::unordered_map<NFAVertex, u32> &state_ids_in)
: graph(graph_in), state_ids(state_ids_in) {
const flat_set<NFAVertex> &unused_in)
: graph(graph_in), unused(unused_in) {
calculateAlphabet(graph, alpha, unalpha, &alphasize);
assert(alphasize <= ALPHABET_SIZE);
}
@ -126,7 +124,7 @@ public:
static bool canPrune(const flat_set<ReportID> &) { return false; }
const NGHolder &graph;
const ue2::unordered_map<NFAVertex, u32> &state_ids;
const flat_set<NFAVertex> &unused;
array<u16, ALPHABET_SIZE> alpha;
array<u16, ALPHABET_SIZE> unalpha;
@ -145,14 +143,13 @@ public:
typedef map<StateSet, dstate_id_t> StateMap;
Automaton_Big(const NGHolder &graph_in,
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
som_type som, const vector<vector<CharReach>> &triggers,
bool unordered_som)
: Automaton_Base(graph_in, state_ids_in), numStates(num_vertices(graph)),
const flat_set<NFAVertex> &unused_in, som_type som,
const vector<vector<CharReach>> &triggers, bool unordered_som)
: Automaton_Base(graph_in, unused_in), numStates(num_vertices(graph)),
init(numStates), initDS(numStates), squash(numStates),
accept(numStates), acceptEod(numStates), toppable(numStates),
dead(numStates) {
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
populateInit(graph, unused, &init, &initDS, &v_by_index);
populateAccepts(graph, &accept, &acceptEod);
start_anchored = DEAD_STATE + 1;
@ -175,7 +172,7 @@ public:
cr_by_index = populateCR(graph, v_by_index, alpha);
if (is_triggered(graph)) {
markToppableStarts(graph, state_ids, false, triggers, &toppable);
markToppableStarts(graph, unused, false, triggers, &toppable);
}
}
@ -244,11 +241,11 @@ public:
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
Automaton_Graph(const NGHolder &graph_in,
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
const flat_set<NFAVertex> &unused_in,
som_type som, const vector<vector<CharReach>> &triggers,
bool unordered_som)
: Automaton_Base(graph_in, state_ids_in) {
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
: Automaton_Base(graph_in, unused_in) {
populateInit(graph, unused, &init, &initDS, &v_by_index);
populateAccepts(graph, &accept, &acceptEod);
start_anchored = DEAD_STATE + 1;
@ -272,7 +269,7 @@ public:
cr_by_index = populateCR(graph, v_by_index, alpha);
if (is_triggered(graph)) {
dynamic_bitset<> temp(NFA_STATE_LIMIT);
markToppableStarts(graph, state_ids, false, triggers, &temp);
markToppableStarts(graph, unused, false, triggers, &temp);
toppable = bitfield<NFA_STATE_LIMIT>(temp);
}
}
@ -481,11 +478,10 @@ bool is_any_start_inc_virtual(NFAVertex v, const NGHolder &g) {
}
static
s32 getSlotID(const NGHolder &g,
UNUSED const ue2::unordered_map<NFAVertex, u32> &state_ids,
s32 getSlotID(const NGHolder &g, UNUSED const flat_set<NFAVertex> &unused,
NFAVertex v) {
if (is_triggered(g) && v == g.start) {
assert(state_ids.at(v) != NO_STATE);
assert(!contains(unused, v));
} else if (is_any_start_inc_virtual(v, g)) {
return CREATE_NEW_SOM;
}
@ -516,8 +512,7 @@ void haig_do_preds(const NGHolder &g, const stateset &nfa_states,
template<typename stateset>
static
void haig_do_report(const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
void haig_do_report(const NGHolder &g, const flat_set<NFAVertex> &unused,
NFAVertex accept_v, const stateset &source_nfa_states,
const vector<NFAVertex> &state_mapping,
set<som_report> &out) {
@ -528,7 +523,7 @@ void haig_do_report(const NGHolder &g,
continue;
}
for (ReportID report_id : g[v].reports) {
out.insert(som_report(report_id, getSlotID(g, state_ids, v)));
out.insert(som_report(report_id, getSlotID(g, unused, v)));
}
}
}
@ -565,14 +560,14 @@ void haig_note_starts(const NGHolder &g, map<u32, u32> *out) {
template<class Auto>
static
bool doHaig(const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
const flat_set<NFAVertex> &unused,
som_type som, const vector<vector<CharReach>> &triggers,
bool unordered_som, raw_som_dfa *rdfa) {
u32 state_limit = HAIG_FINAL_DFA_STATE_LIMIT; /* haig never backs down from
a fight */
typedef typename Auto::StateSet StateSet;
vector<StateSet> nfa_state_map;
Auto n(g, state_ids, som, triggers, unordered_som);
Auto n(g, unused, som, triggers, unordered_som);
try {
if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
DEBUG_PRINTF("state limit exceeded\n");
@ -602,9 +597,9 @@ bool doHaig(const NGHolder &g,
haig_do_preds(g, source_states, n.v_by_index,
rdfa->state_som.back().preds);
haig_do_report(g, state_ids, g.accept, source_states, n.v_by_index,
haig_do_report(g, unused, g.accept, source_states, n.v_by_index,
rdfa->state_som.back().reports);
haig_do_report(g, state_ids, g.acceptEod, source_states, n.v_by_index,
haig_do_report(g, unused, g.acceptEod, source_states, n.v_by_index,
rdfa->state_som.back().reports_eod);
}
@ -613,10 +608,10 @@ bool doHaig(const NGHolder &g,
return true;
}
unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
u32 somPrecision,
const vector<vector<CharReach> > &triggers,
const Grey &grey, bool unordered_som) {
unique_ptr<raw_som_dfa>
attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision,
const vector<vector<CharReach>> &triggers, const Grey &grey,
bool unordered_som) {
assert(is_triggered(g) != triggers.empty());
assert(!unordered_som || is_triggered(g));
@ -625,13 +620,12 @@ unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
return nullptr;
}
auto state_ids = numberStates(g);
dropUnusedStarts(g, state_ids);
DEBUG_PRINTF("attempting to build haig \n");
assert(allMatchStatesHaveReports(g));
assert(hasCorrectlyNumberedVertices(g));
auto unused = findUnusedStates(g);
u32 numStates = num_vertices(g);
if (numStates > HAIG_MAX_NFA_STATE) {
DEBUG_PRINTF("giving up... looks too big\n");
@ -645,11 +639,11 @@ unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
bool rv;
if (numStates <= NFA_STATE_LIMIT) {
/* fast path */
rv = doHaig<Automaton_Graph>(g, state_ids, som, triggers, unordered_som,
rv = doHaig<Automaton_Graph>(g, unused, som, triggers, unordered_som,
rdfa.get());
} else {
/* not the fast path */
rv = doHaig<Automaton_Big>(g, state_ids, som, triggers, unordered_som,
rv = doHaig<Automaton_Big>(g, unused, som, triggers, unordered_som,
rdfa.get());
}

View File

@ -54,10 +54,10 @@ struct raw_som_dfa;
* between)
*/
std::unique_ptr<raw_som_dfa> attemptToBuildHaig(NGHolder &g, som_type som,
u32 somPrecision,
const std::vector<std::vector<CharReach> > &triggers,
const Grey &grey, bool unordered_som_triggers = false);
std::unique_ptr<raw_som_dfa>
attemptToBuildHaig(const NGHolder &g, som_type som, u32 somPrecision,
const std::vector<std::vector<CharReach>> &triggers,
const Grey &grey, bool unordered_som_triggers = false);
std::unique_ptr<raw_som_dfa>
attemptToMergeHaig(const std::vector<const raw_som_dfa *> &dfas,

View File

@ -152,12 +152,11 @@ void getFullTransitionFromState(const raw_dfa &n, dstate_id_t state,
template<typename stateset>
static
void populateInit(const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
void populateInit(const NGHolder &g, const flat_set<NFAVertex> &unused,
stateset *init, stateset *init_deep,
vector<NFAVertex> *v_by_index) {
for (auto v : vertices_range(g)) {
if (state_ids.at(v) == NO_STATE) {
if (contains(unused, v)) {
continue;
}
@ -188,21 +187,22 @@ void populateInit(const NGHolder &g,
}
template<typename StateSet>
void populateAccepts(const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
void populateAccepts(const NGHolder &g, const flat_set<NFAVertex> &unused,
StateSet *accept, StateSet *acceptEod) {
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
if (state_ids.at(v) != NO_STATE) {
accept->set(g[v].index);
if (contains(unused, v)) {
continue;
}
accept->set(g[v].index);
}
for (auto v : inv_adjacent_vertices_range(g.acceptEod, g)) {
if (v == g.accept) {
continue;
}
if (state_ids.at(v) != NO_STATE) {
acceptEod->set(g[v].index);
if (contains(unused, v)) {
continue;
}
acceptEod->set(g[v].index);
}
}
@ -315,8 +315,7 @@ bool triggerAllowed(const NGHolder &g, const NFAVertex v,
return true;
}
void markToppableStarts(const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
bool single_trigger,
const vector<vector<CharReach>> &triggers,
dynamic_bitset<> *out) {
@ -325,14 +324,13 @@ void markToppableStarts(const NGHolder &g,
}
for (auto v : vertices_range(g)) {
if (state_ids.at(v) == NO_STATE) {
if (contains(unused, v)) {
continue;
}
u32 vert_id = g[v].index;
for (const auto &trigger : triggers) {
if (triggerAllowed(g, v, triggers, trigger)) {
DEBUG_PRINTF("idx %u is valid location for top\n", vert_id);
out->set(vert_id);
DEBUG_PRINTF("idx %u is valid location for top\n", g[v].index);
out->set(g[v].index);
break;
}
}
@ -349,15 +347,14 @@ public:
typedef map<StateSet, dstate_id_t> StateMap;
Automaton_Big(const ReportManager *rm_in, const NGHolder &graph_in,
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
bool single_trigger,
const flat_set<NFAVertex> &unused_in, bool single_trigger,
const vector<vector<CharReach>> &triggers, bool prunable_in)
: rm(rm_in), graph(graph_in), state_ids(state_ids_in),
numStates(num_vertices(graph)), init(numStates), initDS(numStates),
: rm(rm_in), graph(graph_in), numStates(num_vertices(graph)),
unused(unused_in), init(numStates), initDS(numStates),
squash(numStates), accept(numStates), acceptEod(numStates),
toppable(numStates), prunable(prunable_in), dead(numStates) {
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
populateAccepts(graph, state_ids, &accept, &acceptEod);
populateInit(graph, unused, &init, &initDS, &v_by_index);
populateAccepts(graph, unused, &accept, &acceptEod);
start_anchored = DEAD_STATE + 1;
if (initDS == init) {
@ -379,7 +376,7 @@ public:
cr_by_index = populateCR(graph, v_by_index, alpha);
if (is_triggered(graph)) {
markToppableStarts(graph, state_ids, single_trigger, triggers,
markToppableStarts(graph, unused, single_trigger, triggers,
&toppable);
}
}
@ -438,8 +435,8 @@ private:
const ReportManager *rm;
public:
const NGHolder &graph;
const ue2::unordered_map<NFAVertex, u32> &state_ids;
u32 numStates;
const flat_set<NFAVertex> &unused;
vector<NFAVertex> v_by_index;
vector<CharReach> cr_by_index; /* pre alpha'ed */
StateSet init;
@ -466,13 +463,11 @@ public:
typedef ue2::unordered_map<StateSet, dstate_id_t> StateMap;
Automaton_Graph(const ReportManager *rm_in, const NGHolder &graph_in,
const ue2::unordered_map<NFAVertex, u32> &state_ids_in,
bool single_trigger,
const flat_set<NFAVertex> &unused_in, bool single_trigger,
const vector<vector<CharReach>> &triggers, bool prunable_in)
: rm(rm_in), graph(graph_in), state_ids(state_ids_in),
prunable(prunable_in) {
populateInit(graph, state_ids, &init, &initDS, &v_by_index);
populateAccepts(graph, state_ids, &accept, &acceptEod);
: rm(rm_in), graph(graph_in), unused(unused_in), prunable(prunable_in) {
populateInit(graph, unused, &init, &initDS, &v_by_index);
populateAccepts(graph, unused, &accept, &acceptEod);
start_anchored = DEAD_STATE + 1;
if (initDS == init) {
@ -496,8 +491,7 @@ public:
cr_by_index = populateCR(graph, v_by_index, alpha);
if (is_triggered(graph)) {
dynamic_bitset<> temp(NFA_STATE_LIMIT);
markToppableStarts(graph, state_ids, single_trigger, triggers,
&temp);
markToppableStarts(graph, unused, single_trigger, triggers, &temp);
toppable = bitfield<NFA_STATE_LIMIT>(temp);
}
}
@ -557,7 +551,7 @@ private:
const ReportManager *rm;
public:
const NGHolder &graph;
const ue2::unordered_map<NFAVertex, u32> &state_ids;
const flat_set<NFAVertex> &unused;
vector<NFAVertex> v_by_index;
vector<CharReach> cr_by_index; /* pre alpha'ed */
StateSet init;
@ -580,20 +574,15 @@ public:
} // namespace
unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
bool single_trigger,
unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
const ReportManager *rm, bool single_trigger,
const vector<vector<CharReach>> &triggers,
const Grey &grey, bool finalChance) {
if (!grey.allowMcClellan) {
return nullptr;
}
// Construct a mutable copy of the graph so that we can drop unused starts.
auto g_copy = cloneHolder(g);
NGHolder &graph = *g_copy;
auto state_ids = numberStates(graph);
dropUnusedStarts(graph, state_ids);
auto unused = findUnusedStates(graph);
DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind);
assert(allMatchStatesHaveReports(graph));
@ -620,7 +609,7 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
if (numStates <= NFA_STATE_LIMIT) {
/* Fast path. Automaton_Graph uses a bitfield internally to represent
* states and is quicker than Automaton_Big. */
Automaton_Graph n(rm, graph, state_ids, single_trigger, triggers,
Automaton_Graph n(rm, graph, unused, single_trigger, triggers,
prunable);
if (determinise(n, rdfa->states, state_limit)) {
DEBUG_PRINTF("state limit exceeded\n");
@ -633,8 +622,7 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &g, const ReportManager *rm,
rdfa->alpha_remap = n.alpha;
} else {
/* Slow path. Too many states to use Automaton_Graph. */
Automaton_Big n(rm, graph, state_ids, single_trigger, triggers,
prunable);
Automaton_Big n(rm, graph, unused, single_trigger, triggers, prunable);
if (determinise(n, rdfa->states, state_limit)) {
DEBUG_PRINTF("state limit exceeded\n");
return nullptr; /* over state limit */

View File

@ -64,8 +64,7 @@ void getFullTransitionFromState(const raw_dfa &n, u16 state,
u16 *out_table);
/** produce a map of states on which it is valid to receive tops */
void markToppableStarts(const NGHolder &g,
const ue2::unordered_map<NFAVertex, u32> &state_ids,
void markToppableStarts(const NGHolder &g, const flat_set<NFAVertex> &unused,
bool single_trigger,
const std::vector<std::vector<CharReach>> &triggers,
boost::dynamic_bitset<> *out);
@ -76,7 +75,7 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
typename autom::StateSet *next) {
typedef typename autom::StateSet StateSet;
const NGHolder &graph = nfa.graph;
const auto &state_ids = nfa.state_ids;
const auto &unused = nfa.unused;
const auto &alpha = nfa.alpha;
const StateSet &squash = nfa.squash;
const std::map<u32, StateSet> &squash_mask = nfa.squash_mask;
@ -94,7 +93,7 @@ void transition_graph(autom &nfa, const std::vector<NFAVertex> &vByStateId,
NFAVertex u = vByStateId[i];
for (const auto &v : adjacent_vertices_range(u, graph)) {
if (state_ids.at(v) == NO_STATE) {
if (contains(unused, v)) {
continue;
}
succ.set(graph[v].index);

View File

@ -281,6 +281,17 @@ void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states) {
}
}
flat_set<NFAVertex> findUnusedStates(const NGHolder &g) {
flat_set<NFAVertex> dead;
if (startIsRedundant(g)) {
dead.insert(g.start);
}
if (proper_out_degree(g.startDs, g) == 0) {
dead.insert(g.startDs);
}
return dead;
}
/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
* accepts. */
void reverseHolder(const NGHolder &g_in, NGHolder &g) {

View File

@ -81,6 +81,13 @@ u32 countStates(const NGHolder &g,
/** Optimisation: drop unnecessary start states. */
void dropUnusedStarts(NGHolder &g, ue2::unordered_map<NFAVertex, u32> &states);
/**
* \brief Returns a set of vertices that will not participate in an
* implementation (NFA, DFA etc) of this graph. For example, starts with no
* successors.
*/
flat_set<NFAVertex> findUnusedStates(const NGHolder &g);
} // namespace ue2
#endif