ng_squash: switch to using unordered_map

Also some cleaning up, small performance improvements.
This commit is contained in:
Justin Viiret 2017-08-14 12:55:28 +10:00 committed by Matthew Barr
parent ace592e247
commit ea2e85ac87
5 changed files with 93 additions and 80 deletions

View File

@ -105,11 +105,13 @@ struct limex_accel_info {
}; };
static static
map<NFAVertex, NFAStateSet> unordered_map<NFAVertex, NFAStateSet>
reindexByStateId(const map<NFAVertex, NFAStateSet> &in, const NGHolder &g, reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in,
const NGHolder &g,
const unordered_map<NFAVertex, u32> &state_ids, const unordered_map<NFAVertex, u32> &state_ids,
const u32 num_states) { const u32 num_states) {
map<NFAVertex, NFAStateSet> out; unordered_map<NFAVertex, NFAStateSet> out;
out.reserve(in.size());
vector<u32> indexToState(num_vertices(g), NO_STATE); vector<u32> indexToState(num_vertices(g), NO_STATE);
for (const auto &m : state_ids) { for (const auto &m : state_ids) {
@ -141,8 +143,8 @@ struct build_info {
build_info(NGHolder &hi, build_info(NGHolder &hi,
const unordered_map<NFAVertex, u32> &states_in, const unordered_map<NFAVertex, u32> &states_in,
const vector<BoundedRepeatData> &ri, const vector<BoundedRepeatData> &ri,
const map<NFAVertex, NFAStateSet> &rsmi, const unordered_map<NFAVertex, NFAStateSet> &rsmi,
const map<NFAVertex, NFAStateSet> &smi, const unordered_map<NFAVertex, NFAStateSet> &smi,
const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi, const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi,
bool dai, bool sci, const CompileContext &cci, u32 nsi) bool dai, bool sci, const CompileContext &cci, u32 nsi)
: h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi), : h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi),
@ -168,8 +170,8 @@ struct build_info {
const vector<BoundedRepeatData> &repeats; const vector<BoundedRepeatData> &repeats;
// Squash maps; state sets are indexed by state_id. // Squash maps; state sets are indexed by state_id.
map<NFAVertex, NFAStateSet> reportSquashMap; unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
map<NFAVertex, NFAStateSet> squashMap; unordered_map<NFAVertex, NFAStateSet> squashMap;
const map<u32, set<NFAVertex>> &tops; const map<u32, set<NFAVertex>> &tops;
NFAStateSet tugs; NFAStateSet tugs;
@ -2436,8 +2438,8 @@ u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
bytecode_ptr<NFA> generate(NGHolder &h, bytecode_ptr<NFA> generate(NGHolder &h,
const unordered_map<NFAVertex, u32> &states, const unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats, const vector<BoundedRepeatData> &repeats,
const map<NFAVertex, NFAStateSet> &reportSquashMap, const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
const map<NFAVertex, NFAStateSet> &squashMap, const unordered_map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, set<NFAVertex>> &tops, const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies, bool do_accel, const set<NFAVertex> &zombies, bool do_accel,
bool stateCompression, u32 hint, bool stateCompression, u32 hint,
@ -2506,8 +2508,8 @@ bytecode_ptr<NFA> generate(NGHolder &h,
u32 countAccelStates(NGHolder &h, u32 countAccelStates(NGHolder &h,
const unordered_map<NFAVertex, u32> &states, const unordered_map<NFAVertex, u32> &states,
const vector<BoundedRepeatData> &repeats, const vector<BoundedRepeatData> &repeats,
const map<NFAVertex, NFAStateSet> &reportSquashMap, const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
const map<NFAVertex, NFAStateSet> &squashMap, const unordered_map<NFAVertex, NFAStateSet> &squashMap,
const map<u32, set<NFAVertex>> &tops, const map<u32, set<NFAVertex>> &tops,
const set<NFAVertex> &zombies, const set<NFAVertex> &zombies,
const CompileContext &cc) { const CompileContext &cc) {

View File

@ -72,8 +72,8 @@ struct CompileContext;
bytecode_ptr<NFA> generate(NGHolder &g, bytecode_ptr<NFA> generate(NGHolder &g,
const std::unordered_map<NFAVertex, u32> &states, const std::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats, const std::vector<BoundedRepeatData> &repeats,
const std::map<NFAVertex, NFAStateSet> &reportSquashMap, const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::map<NFAVertex, NFAStateSet> &squashMap, const std::unordered_map<NFAVertex, NFAStateSet> &squashMap,
const std::map<u32, std::set<NFAVertex>> &tops, const std::map<u32, std::set<NFAVertex>> &tops,
const std::set<NFAVertex> &zombies, const std::set<NFAVertex> &zombies,
bool do_accel, bool do_accel,
@ -90,8 +90,8 @@ bytecode_ptr<NFA> generate(NGHolder &g,
u32 countAccelStates(NGHolder &h, u32 countAccelStates(NGHolder &h,
const std::unordered_map<NFAVertex, u32> &states, const std::unordered_map<NFAVertex, u32> &states,
const std::vector<BoundedRepeatData> &repeats, const std::vector<BoundedRepeatData> &repeats,
const std::map<NFAVertex, NFAStateSet> &reportSquashMap, const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
const std::map<NFAVertex, NFAStateSet> &squashMap, const std::unordered_map<NFAVertex, NFAStateSet> &squashMap,
const std::map<u32, std::set<NFAVertex>> &tops, const std::map<u32, std::set<NFAVertex>> &tops,
const std::set<NFAVertex> &zombies, const std::set<NFAVertex> &zombies,
const CompileContext &cc); const CompileContext &cc);

View File

@ -117,10 +117,9 @@ bool sanityCheckGraph(const NGHolder &g,
#endif #endif
static static
void findSquashStates(const NGHolder &g, unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g,
const vector<BoundedRepeatData> &repeats, const vector<BoundedRepeatData> &repeats) {
map<NFAVertex, NFAStateSet> &squashMap) { auto squashMap = findSquashers(g);
squashMap = findSquashers(g);
filterSquashers(g, squashMap); filterSquashers(g, squashMap);
/* We also filter out the cyclic states representing bounded repeats, as /* We also filter out the cyclic states representing bounded repeats, as
@ -130,6 +129,8 @@ void findSquashStates(const NGHolder &g,
squashMap.erase(br.cyclic); squashMap.erase(br.cyclic);
} }
} }
return squashMap;
} }
/** /**
@ -659,12 +660,12 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax); br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
} }
map<NFAVertex, NFAStateSet> reportSquashMap; unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
map<NFAVertex, NFAStateSet> squashMap; unordered_map<NFAVertex, NFAStateSet> squashMap;
// build map of squashed and squashers // build map of squashed and squashers
if (cc.grey.squashNFA) { if (cc.grey.squashNFA) {
findSquashStates(*h, repeats, squashMap); squashMap = findSquashStates(*h, repeats);
if (rm && cc.grey.highlanderSquash) { if (rm && cc.grey.highlanderSquash) {
reportSquashMap = findHighlanderSquashers(*h, *rm); reportSquashMap = findHighlanderSquashers(*h, *rm);
@ -736,8 +737,8 @@ bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */ map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */
set<NFAVertex> zombies; set<NFAVertex> zombies;
vector<BoundedRepeatData> repeats; vector<BoundedRepeatData> repeats;
map<NFAVertex, NFAStateSet> reportSquashMap; unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
map<NFAVertex, NFAStateSet> squashMap; unordered_map<NFAVertex, NFAStateSet> squashMap;
return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops, return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
zombies, false, false, hint, cc); zombies, false, false, hint, cc);
@ -850,8 +851,8 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
// Should have no bearing on accel calculation, so we leave these empty. // Should have no bearing on accel calculation, so we leave these empty.
const set<NFAVertex> zombies; const set<NFAVertex> zombies;
const map<NFAVertex, NFAStateSet> reportSquashMap; const unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
const map<NFAVertex, NFAStateSet> squashMap; const unordered_map<NFAVertex, NFAStateSet> squashMap;
return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap, return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
tops, zombies, cc); tops, zombies, cc);

View File

@ -121,10 +121,13 @@ using namespace std;
namespace ue2 { namespace ue2 {
typedef unordered_map<NFAVertex, unordered_set<NFAVertex>> PostDomTree; using PostDomTree = unordered_map<NFAVertex, unordered_set<NFAVertex>>;
static static
void buildPDomTree(const NGHolder &g, PostDomTree &tree) { PostDomTree buildPDomTree(const NGHolder &g) {
PostDomTree tree;
tree.reserve(num_vertices(g));
auto postdominators = findPostDominators(g); auto postdominators = findPostDominators(g);
for (auto v : vertices_range(g)) { for (auto v : vertices_range(g)) {
@ -137,6 +140,7 @@ void buildPDomTree(const NGHolder &g, PostDomTree &tree) {
tree[pdom].insert(v); tree[pdom].insert(v);
} }
} }
return tree;
} }
/** /**
@ -155,7 +159,7 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
vector<NFAVertex> q; vector<NFAVertex> q;
PostDomTree::const_iterator it = tree.find(v); auto it = tree.find(v);
if (it != tree.end()) { if (it != tree.end()) {
q.insert(q.end(), it->second.begin(), it->second.end()); q.insert(q.end(), it->second.begin(), it->second.end());
} }
@ -271,8 +275,8 @@ void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) {
static static
void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
const PostDomTree &pdom_tree, const NFAStateSet &init, const PostDomTree &pdom_tree, const NFAStateSet &init,
map<NFAVertex, NFAStateSet> *squash, som_type som, unordered_map<NFAVertex, NFAStateSet> *squash,
const vector<DepthMinMax> &som_depths, som_type som, const vector<DepthMinMax> &som_depths,
const unordered_map<NFAVertex, u32> &region_map, const unordered_map<NFAVertex, u32> &region_map,
smgb_cache &cache) { smgb_cache &cache) {
deque<NFAVertex> remaining; deque<NFAVertex> remaining;
@ -315,37 +319,41 @@ void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
} }
} }
/* If there are redundant states in the graph, it may be possible for two sibling /* If there are redundant states in the graph, it may be possible for two
* .* states to try to squash each other -- which should be prevented * sibling .* states to try to squash each other -- which should be prevented.
* *
* Note: this situation should only happen if ng_equivalence has not been run. * Note: this situation should only happen if ng_equivalence has not been run.
*/ */
static static
void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex, void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
map<NFAVertex, NFAStateSet> &squash) { unordered_map<NFAVertex, NFAStateSet> &squash) {
for (auto it = squash.begin(); it != squash.end();) { for (auto it = squash.begin(); it != squash.end();) {
NFAVertex a = it->first; NFAVertex a = it->first;
u32 a_index = g[a].index; u32 a_index = g[a].index;
NFAStateSet a_squash = ~it->second; /* default is mask of survivors */ NFAStateSet a_squash = ~it->second; /* default is mask of survivors */
for (NFAStateSet::size_type b_index = a_squash.find_first(); for (auto b_index = a_squash.find_first(); b_index != a_squash.npos;
b_index != a_squash.npos; b_index = a_squash.find_next(b_index)) { b_index = a_squash.find_next(b_index)) {
assert(b_index != a_index); assert(b_index != a_index);
NFAVertex b = vByIndex[b_index]; NFAVertex b = vByIndex[b_index];
if (!contains(squash, b)) {
auto b_it = squash.find(b);
if (b_it == squash.end()) {
continue; continue;
} }
if (!squash[b].test(a_index)) { auto &b_squash = b_it->second;
if (!b_squash.test(a_index)) {
/* b and a squash each other, prevent this */ /* b and a squash each other, prevent this */
DEBUG_PRINTF("removing mutual squash %u %zu\n", DEBUG_PRINTF("removing mutual squash %u %zu\n",
a_index, b_index); a_index, b_index);
squash[b].set(a_index); b_squash.set(a_index);
it->second.set(b_index); it->second.set(b_index);
} }
} }
if (it->second.all()) { if (it->second.all()) {
DEBUG_PRINTF("%u is no longer an effictive squash state\n", a_index); DEBUG_PRINTF("%u is no longer an effective squash state\n",
a_index);
it = squash.erase(it); it = squash.erase(it);
} else { } else {
++it; ++it;
@ -353,16 +361,16 @@ void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
} }
} }
map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) { unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
map<NFAVertex, NFAStateSet> squash; som_type som) {
unordered_map<NFAVertex, NFAStateSet> squash;
// Number of bits to use for all our masks. If we're a triggered graph, // Number of bits to use for all our masks. If we're a triggered graph,
// tops have already been assigned, so we don't have to account for them. // tops have already been assigned, so we don't have to account for them.
const u32 numStates = num_vertices(g); const u32 numStates = num_vertices(g);
// Build post-dominator tree. // Build post-dominator tree.
PostDomTree pdom_tree; auto pdom_tree = buildPDomTree(g);
buildPDomTree(g, pdom_tree);
// Build list of vertices by state ID and a set of init states. // Build list of vertices by state ID and a set of init states.
vector<NFAVertex> vByIndex(numStates, NGHolder::null_vertex()); vector<NFAVertex> vByIndex(numStates, NGHolder::null_vertex());
@ -507,9 +515,11 @@ map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) {
* -# squash only a few acyclic states * -# squash only a few acyclic states
*/ */
void filterSquashers(const NGHolder &g, void filterSquashers(const NGHolder &g,
map<NFAVertex, NFAStateSet> &squash) { unordered_map<NFAVertex, NFAStateSet> &squash) {
assert(hasCorrectlyNumberedVertices(g));
DEBUG_PRINTF("filtering\n"); DEBUG_PRINTF("filtering\n");
map<u32, NFAVertex> rev; /* vertex_index -> vertex */ vector<NFAVertex> rev(num_vertices(g)); /* vertex_index -> vertex */
for (auto v : vertices_range(g)) { for (auto v : vertices_range(g)) {
rev[g[v].index] = v; rev[g[v].index] = v;
} }
@ -528,8 +538,8 @@ void filterSquashers(const NGHolder &g,
NFAStateSet squashed = squash[v]; NFAStateSet squashed = squash[v];
squashed.flip(); /* default sense for mask of survivors */ squashed.flip(); /* default sense for mask of survivors */
for (NFAStateSet::size_type sq = squashed.find_first(); for (auto sq = squashed.find_first(); sq != squashed.npos;
sq != squashed.npos; sq = squashed.find_next(sq)) { sq = squashed.find_next(sq)) {
NFAVertex u = rev[sq]; NFAVertex u = rev[sq];
if (hasSelfLoop(u, g)) { if (hasSelfLoop(u, g)) {
DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq); DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq);
@ -637,9 +647,9 @@ vector<NFAVertex> findUnreachable(const NGHolder &g) {
/** Populates squash masks for states that can be switched off by highlander /** Populates squash masks for states that can be switched off by highlander
* (single match) reporters. */ * (single match) reporters. */
map<NFAVertex, NFAStateSet> unordered_map<NFAVertex, NFAStateSet>
findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) { findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) {
map<NFAVertex, NFAStateSet> squash; unordered_map<NFAVertex, NFAStateSet> squash;
set<NFAVertex> verts; set<NFAVertex> verts;
getHighlanderReporters(g, g.accept, rm, verts); getHighlanderReporters(g, g.accept, rm, verts);

View File

@ -36,7 +36,7 @@
#include "som/som.h" #include "som/som.h"
#include "ue2common.h" #include "ue2common.h"
#include <map> #include <unordered_map>
#include <boost/dynamic_bitset.hpp> #include <boost/dynamic_bitset.hpp>
namespace ue2 { namespace ue2 {
@ -47,7 +47,7 @@ class ReportManager;
/** /**
* Dynamically-sized bitset, as an NFA can have an arbitrary number of states. * Dynamically-sized bitset, as an NFA can have an arbitrary number of states.
*/ */
typedef boost::dynamic_bitset<> NFAStateSet; using NFAStateSet = boost::dynamic_bitset<>;
/** /**
* Populates the squash mask for each vertex (i.e. the set of states to be left * Populates the squash mask for each vertex (i.e. the set of states to be left
@ -55,16 +55,16 @@ typedef boost::dynamic_bitset<> NFAStateSet;
* *
* The NFAStateSet in the output map is indexed by vertex_index. * The NFAStateSet in the output map is indexed by vertex_index.
*/ */
std::map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, std::unordered_map<NFAVertex, NFAStateSet>
som_type som = SOM_NONE); findSquashers(const NGHolder &g, som_type som = SOM_NONE);
/** Filters out squash states intended only for use in DFA construction. */ /** Filters out squash states intended only for use in DFA construction. */
void filterSquashers(const NGHolder &g, void filterSquashers(const NGHolder &g,
std::map<NFAVertex, NFAStateSet> &squash); std::unordered_map<NFAVertex, NFAStateSet> &squash);
/** Populates squash masks for states that can be switched off by highlander /** Populates squash masks for states that can be switched off by highlander
* (single match) reporters. */ * (single match) reporters. */
std::map<NFAVertex, NFAStateSet> std::unordered_map<NFAVertex, NFAStateSet>
findHighlanderSquashers(const NGHolder &g, const ReportManager &rm); findHighlanderSquashers(const NGHolder &g, const ReportManager &rm);
} // namespace ue2 } // namespace ue2