mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
ng_squash: switch to using unordered_map
Also some cleaning up, small performance improvements.
This commit is contained in:
parent
ace592e247
commit
ea2e85ac87
@ -105,11 +105,13 @@ struct limex_accel_info {
|
||||
};
|
||||
|
||||
static
|
||||
map<NFAVertex, NFAStateSet>
|
||||
reindexByStateId(const map<NFAVertex, NFAStateSet> &in, const NGHolder &g,
|
||||
unordered_map<NFAVertex, NFAStateSet>
|
||||
reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in,
|
||||
const NGHolder &g,
|
||||
const unordered_map<NFAVertex, u32> &state_ids,
|
||||
const u32 num_states) {
|
||||
map<NFAVertex, NFAStateSet> out;
|
||||
unordered_map<NFAVertex, NFAStateSet> out;
|
||||
out.reserve(in.size());
|
||||
|
||||
vector<u32> indexToState(num_vertices(g), NO_STATE);
|
||||
for (const auto &m : state_ids) {
|
||||
@ -141,8 +143,8 @@ struct build_info {
|
||||
build_info(NGHolder &hi,
|
||||
const unordered_map<NFAVertex, u32> &states_in,
|
||||
const vector<BoundedRepeatData> &ri,
|
||||
const map<NFAVertex, NFAStateSet> &rsmi,
|
||||
const map<NFAVertex, NFAStateSet> &smi,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &rsmi,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &smi,
|
||||
const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi,
|
||||
bool dai, bool sci, const CompileContext &cci, u32 nsi)
|
||||
: h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi),
|
||||
@ -168,8 +170,8 @@ struct build_info {
|
||||
const vector<BoundedRepeatData> &repeats;
|
||||
|
||||
// Squash maps; state sets are indexed by state_id.
|
||||
map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
map<NFAVertex, NFAStateSet> squashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
const map<u32, set<NFAVertex>> &tops;
|
||||
NFAStateSet tugs;
|
||||
@ -2434,14 +2436,14 @@ u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) {
|
||||
}
|
||||
|
||||
bytecode_ptr<NFA> generate(NGHolder &h,
|
||||
const unordered_map<NFAVertex, u32> &states,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
const map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const map<u32, set<NFAVertex>> &tops,
|
||||
const set<NFAVertex> &zombies, bool do_accel,
|
||||
bool stateCompression, u32 hint,
|
||||
const CompileContext &cc) {
|
||||
const unordered_map<NFAVertex, u32> &states,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const map<u32, set<NFAVertex>> &tops,
|
||||
const set<NFAVertex> &zombies, bool do_accel,
|
||||
bool stateCompression, u32 hint,
|
||||
const CompileContext &cc) {
|
||||
const u32 num_states = max_state(states) + 1;
|
||||
DEBUG_PRINTF("total states: %u\n", num_states);
|
||||
|
||||
@ -2504,13 +2506,13 @@ bytecode_ptr<NFA> generate(NGHolder &h,
|
||||
}
|
||||
|
||||
u32 countAccelStates(NGHolder &h,
|
||||
const unordered_map<NFAVertex, u32> &states,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
const map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const map<u32, set<NFAVertex>> &tops,
|
||||
const set<NFAVertex> &zombies,
|
||||
const CompileContext &cc) {
|
||||
const unordered_map<NFAVertex, u32> &states,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const map<u32, set<NFAVertex>> &tops,
|
||||
const set<NFAVertex> &zombies,
|
||||
const CompileContext &cc) {
|
||||
const u32 num_states = max_state(states) + 1;
|
||||
DEBUG_PRINTF("total states: %u\n", num_states);
|
||||
|
||||
|
@ -70,16 +70,16 @@ struct CompileContext;
|
||||
* graph.
|
||||
*/
|
||||
bytecode_ptr<NFA> generate(NGHolder &g,
|
||||
const std::unordered_map<NFAVertex, u32> &states,
|
||||
const std::vector<BoundedRepeatData> &repeats,
|
||||
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const std::map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const std::map<u32, std::set<NFAVertex>> &tops,
|
||||
const std::set<NFAVertex> &zombies,
|
||||
bool do_accel,
|
||||
bool stateCompression,
|
||||
u32 hint,
|
||||
const CompileContext &cc);
|
||||
const std::unordered_map<NFAVertex, u32> &states,
|
||||
const std::vector<BoundedRepeatData> &repeats,
|
||||
const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const std::unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const std::map<u32, std::set<NFAVertex>> &tops,
|
||||
const std::set<NFAVertex> &zombies,
|
||||
bool do_accel,
|
||||
bool stateCompression,
|
||||
u32 hint,
|
||||
const CompileContext &cc);
|
||||
|
||||
/**
|
||||
* \brief For a given graph, count the number of accelerable states it has.
|
||||
@ -88,13 +88,13 @@ bytecode_ptr<NFA> generate(NGHolder &g,
|
||||
* implementable.
|
||||
*/
|
||||
u32 countAccelStates(NGHolder &h,
|
||||
const std::unordered_map<NFAVertex, u32> &states,
|
||||
const std::vector<BoundedRepeatData> &repeats,
|
||||
const std::map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const std::map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const std::map<u32, std::set<NFAVertex>> &tops,
|
||||
const std::set<NFAVertex> &zombies,
|
||||
const CompileContext &cc);
|
||||
const std::unordered_map<NFAVertex, u32> &states,
|
||||
const std::vector<BoundedRepeatData> &repeats,
|
||||
const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap,
|
||||
const std::unordered_map<NFAVertex, NFAStateSet> &squashMap,
|
||||
const std::map<u32, std::set<NFAVertex>> &tops,
|
||||
const std::set<NFAVertex> &zombies,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -117,10 +117,9 @@ bool sanityCheckGraph(const NGHolder &g,
|
||||
#endif
|
||||
|
||||
static
|
||||
void findSquashStates(const NGHolder &g,
|
||||
const vector<BoundedRepeatData> &repeats,
|
||||
map<NFAVertex, NFAStateSet> &squashMap) {
|
||||
squashMap = findSquashers(g);
|
||||
unordered_map<NFAVertex, NFAStateSet> findSquashStates(const NGHolder &g,
|
||||
const vector<BoundedRepeatData> &repeats) {
|
||||
auto squashMap = findSquashers(g);
|
||||
filterSquashers(g, squashMap);
|
||||
|
||||
/* We also filter out the cyclic states representing bounded repeats, as
|
||||
@ -130,6 +129,8 @@ void findSquashStates(const NGHolder &g,
|
||||
squashMap.erase(br.cyclic);
|
||||
}
|
||||
}
|
||||
|
||||
return squashMap;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -659,12 +660,12 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
|
||||
br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax);
|
||||
}
|
||||
|
||||
map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
map<NFAVertex, NFAStateSet> squashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
// build map of squashed and squashers
|
||||
if (cc.grey.squashNFA) {
|
||||
findSquashStates(*h, repeats, squashMap);
|
||||
squashMap = findSquashStates(*h, repeats);
|
||||
|
||||
if (rm && cc.grey.highlanderSquash) {
|
||||
reportSquashMap = findHighlanderSquashers(*h, *rm);
|
||||
@ -736,8 +737,8 @@ bytecode_ptr<NFA> constructReversedNFA_i(const NGHolder &h_in, u32 hint,
|
||||
map<u32, set<NFAVertex>> tops; /* only the standards tops for nfas */
|
||||
set<NFAVertex> zombies;
|
||||
vector<BoundedRepeatData> repeats;
|
||||
map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
map<NFAVertex, NFAStateSet> squashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
unordered_map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
return generate(h, state_ids, repeats, reportSquashMap, squashMap, tops,
|
||||
zombies, false, false, hint, cc);
|
||||
@ -850,8 +851,8 @@ u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
|
||||
|
||||
// Should have no bearing on accel calculation, so we leave these empty.
|
||||
const set<NFAVertex> zombies;
|
||||
const map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
const map<NFAVertex, NFAStateSet> squashMap;
|
||||
const unordered_map<NFAVertex, NFAStateSet> reportSquashMap;
|
||||
const unordered_map<NFAVertex, NFAStateSet> squashMap;
|
||||
|
||||
return countAccelStates(*h, state_ids, repeats, reportSquashMap, squashMap,
|
||||
tops, zombies, cc);
|
||||
|
@ -121,10 +121,13 @@ using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
typedef unordered_map<NFAVertex, unordered_set<NFAVertex>> PostDomTree;
|
||||
using PostDomTree = unordered_map<NFAVertex, unordered_set<NFAVertex>>;
|
||||
|
||||
static
|
||||
void buildPDomTree(const NGHolder &g, PostDomTree &tree) {
|
||||
PostDomTree buildPDomTree(const NGHolder &g) {
|
||||
PostDomTree tree;
|
||||
tree.reserve(num_vertices(g));
|
||||
|
||||
auto postdominators = findPostDominators(g);
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
@ -137,6 +140,7 @@ void buildPDomTree(const NGHolder &g, PostDomTree &tree) {
|
||||
tree[pdom].insert(v);
|
||||
}
|
||||
}
|
||||
return tree;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -155,7 +159,7 @@ void buildSquashMask(NFAStateSet &mask, const NGHolder &g, NFAVertex v,
|
||||
|
||||
vector<NFAVertex> q;
|
||||
|
||||
PostDomTree::const_iterator it = tree.find(v);
|
||||
auto it = tree.find(v);
|
||||
if (it != tree.end()) {
|
||||
q.insert(q.end(), it->second.begin(), it->second.end());
|
||||
}
|
||||
@ -271,8 +275,8 @@ void buildPred(NFAStateSet &pred, const NGHolder &g, NFAVertex v) {
|
||||
static
|
||||
void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
|
||||
const PostDomTree &pdom_tree, const NFAStateSet &init,
|
||||
map<NFAVertex, NFAStateSet> *squash, som_type som,
|
||||
const vector<DepthMinMax> &som_depths,
|
||||
unordered_map<NFAVertex, NFAStateSet> *squash,
|
||||
som_type som, const vector<DepthMinMax> &som_depths,
|
||||
const unordered_map<NFAVertex, u32> ®ion_map,
|
||||
smgb_cache &cache) {
|
||||
deque<NFAVertex> remaining;
|
||||
@ -315,37 +319,41 @@ void findDerivedSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
|
||||
}
|
||||
}
|
||||
|
||||
/* If there are redundant states in the graph, it may be possible for two sibling
|
||||
* .* states to try to squash each other -- which should be prevented
|
||||
/* If there are redundant states in the graph, it may be possible for two
|
||||
* sibling .* states to try to squash each other -- which should be prevented.
|
||||
*
|
||||
* Note: this situation should only happen if ng_equivalence has not been run.
|
||||
*/
|
||||
static
|
||||
void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
|
||||
map<NFAVertex, NFAStateSet> &squash) {
|
||||
unordered_map<NFAVertex, NFAStateSet> &squash) {
|
||||
for (auto it = squash.begin(); it != squash.end();) {
|
||||
NFAVertex a = it->first;
|
||||
u32 a_index = g[a].index;
|
||||
|
||||
NFAStateSet a_squash = ~it->second; /* default is mask of survivors */
|
||||
for (NFAStateSet::size_type b_index = a_squash.find_first();
|
||||
b_index != a_squash.npos; b_index = a_squash.find_next(b_index)) {
|
||||
for (auto b_index = a_squash.find_first(); b_index != a_squash.npos;
|
||||
b_index = a_squash.find_next(b_index)) {
|
||||
assert(b_index != a_index);
|
||||
NFAVertex b = vByIndex[b_index];
|
||||
if (!contains(squash, b)) {
|
||||
|
||||
auto b_it = squash.find(b);
|
||||
if (b_it == squash.end()) {
|
||||
continue;
|
||||
}
|
||||
if (!squash[b].test(a_index)) {
|
||||
auto &b_squash = b_it->second;
|
||||
if (!b_squash.test(a_index)) {
|
||||
/* b and a squash each other, prevent this */
|
||||
DEBUG_PRINTF("removing mutual squash %u %zu\n",
|
||||
a_index, b_index);
|
||||
squash[b].set(a_index);
|
||||
b_squash.set(a_index);
|
||||
it->second.set(b_index);
|
||||
}
|
||||
}
|
||||
|
||||
if (it->second.all()) {
|
||||
DEBUG_PRINTF("%u is no longer an effictive squash state\n", a_index);
|
||||
DEBUG_PRINTF("%u is no longer an effective squash state\n",
|
||||
a_index);
|
||||
it = squash.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
@ -353,16 +361,16 @@ void clearMutualSquashers(const NGHolder &g, const vector<NFAVertex> &vByIndex,
|
||||
}
|
||||
}
|
||||
|
||||
map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) {
|
||||
map<NFAVertex, NFAStateSet> squash;
|
||||
unordered_map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
|
||||
som_type som) {
|
||||
unordered_map<NFAVertex, NFAStateSet> squash;
|
||||
|
||||
// Number of bits to use for all our masks. If we're a triggered graph,
|
||||
// tops have already been assigned, so we don't have to account for them.
|
||||
const u32 numStates = num_vertices(g);
|
||||
|
||||
// Build post-dominator tree.
|
||||
PostDomTree pdom_tree;
|
||||
buildPDomTree(g, pdom_tree);
|
||||
auto pdom_tree = buildPDomTree(g);
|
||||
|
||||
// Build list of vertices by state ID and a set of init states.
|
||||
vector<NFAVertex> vByIndex(numStates, NGHolder::null_vertex());
|
||||
@ -507,9 +515,11 @@ map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g, som_type som) {
|
||||
* -# squash only a few acyclic states
|
||||
*/
|
||||
void filterSquashers(const NGHolder &g,
|
||||
map<NFAVertex, NFAStateSet> &squash) {
|
||||
unordered_map<NFAVertex, NFAStateSet> &squash) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
DEBUG_PRINTF("filtering\n");
|
||||
map<u32, NFAVertex> rev; /* vertex_index -> vertex */
|
||||
vector<NFAVertex> rev(num_vertices(g)); /* vertex_index -> vertex */
|
||||
for (auto v : vertices_range(g)) {
|
||||
rev[g[v].index] = v;
|
||||
}
|
||||
@ -528,8 +538,8 @@ void filterSquashers(const NGHolder &g,
|
||||
|
||||
NFAStateSet squashed = squash[v];
|
||||
squashed.flip(); /* default sense for mask of survivors */
|
||||
for (NFAStateSet::size_type sq = squashed.find_first();
|
||||
sq != squashed.npos; sq = squashed.find_next(sq)) {
|
||||
for (auto sq = squashed.find_first(); sq != squashed.npos;
|
||||
sq = squashed.find_next(sq)) {
|
||||
NFAVertex u = rev[sq];
|
||||
if (hasSelfLoop(u, g)) {
|
||||
DEBUG_PRINTF("squashing a cyclic (%zu) is always good\n", sq);
|
||||
@ -637,9 +647,9 @@ vector<NFAVertex> findUnreachable(const NGHolder &g) {
|
||||
|
||||
/** Populates squash masks for states that can be switched off by highlander
|
||||
* (single match) reporters. */
|
||||
map<NFAVertex, NFAStateSet>
|
||||
unordered_map<NFAVertex, NFAStateSet>
|
||||
findHighlanderSquashers(const NGHolder &g, const ReportManager &rm) {
|
||||
map<NFAVertex, NFAStateSet> squash;
|
||||
unordered_map<NFAVertex, NFAStateSet> squash;
|
||||
|
||||
set<NFAVertex> verts;
|
||||
getHighlanderReporters(g, g.accept, rm, verts);
|
||||
|
@ -36,7 +36,7 @@
|
||||
#include "som/som.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
@ -47,7 +47,7 @@ class ReportManager;
|
||||
/**
|
||||
* Dynamically-sized bitset, as an NFA can have an arbitrary number of states.
|
||||
*/
|
||||
typedef boost::dynamic_bitset<> NFAStateSet;
|
||||
using NFAStateSet = boost::dynamic_bitset<>;
|
||||
|
||||
/**
|
||||
* Populates the squash mask for each vertex (i.e. the set of states to be left
|
||||
@ -55,16 +55,16 @@ typedef boost::dynamic_bitset<> NFAStateSet;
|
||||
*
|
||||
* The NFAStateSet in the output map is indexed by vertex_index.
|
||||
*/
|
||||
std::map<NFAVertex, NFAStateSet> findSquashers(const NGHolder &g,
|
||||
som_type som = SOM_NONE);
|
||||
std::unordered_map<NFAVertex, NFAStateSet>
|
||||
findSquashers(const NGHolder &g, som_type som = SOM_NONE);
|
||||
|
||||
/** Filters out squash states intended only for use in DFA construction. */
|
||||
void filterSquashers(const NGHolder &g,
|
||||
std::map<NFAVertex, NFAStateSet> &squash);
|
||||
std::unordered_map<NFAVertex, NFAStateSet> &squash);
|
||||
|
||||
/** Populates squash masks for states that can be switched off by highlander
|
||||
* (single match) reporters. */
|
||||
std::map<NFAVertex, NFAStateSet>
|
||||
std::unordered_map<NFAVertex, NFAStateSet>
|
||||
findHighlanderSquashers(const NGHolder &g, const ReportManager &rm);
|
||||
|
||||
} // namespace ue2
|
||||
|
Loading…
x
Reference in New Issue
Block a user