mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
ng_execute: update interface to use flat_set
This changes all the execute_graph() interfaces so that instead of mutating a std::set of vertices, they accept an initial flat_set of states and return a resultant flat_set of states after execution. (Note that internally execute_graph() still uses bitsets) This is both faster and more flexible.
This commit is contained in:
parent
fd19168025
commit
abbd548899
@ -125,61 +125,62 @@ void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
|
||||
}
|
||||
|
||||
static
|
||||
void fillStateBitset(const NGHolder &g, const set<NFAVertex> &in,
|
||||
dynamic_bitset<> &out) {
|
||||
out.reset();
|
||||
for (auto v : in) {
|
||||
dynamic_bitset<> makeStateBitset(const NGHolder &g,
|
||||
const flat_set<NFAVertex> &in) {
|
||||
dynamic_bitset<> work_states(num_vertices(g));
|
||||
for (const auto &v : in) {
|
||||
u32 idx = g[v].index;
|
||||
out.set(idx);
|
||||
work_states.set(idx);
|
||||
}
|
||||
return work_states;
|
||||
}
|
||||
|
||||
static
|
||||
void fillVertexSet(const dynamic_bitset<> &in,
|
||||
const vector<StateInfo> &info, set<NFAVertex> &out) {
|
||||
out.clear();
|
||||
flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in,
|
||||
const vector<StateInfo> &info) {
|
||||
flat_set<NFAVertex> out;
|
||||
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
||||
out.insert(info[i].vertex);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
static
|
||||
void fillInfoTable(const NGHolder &g, vector<StateInfo> &info) {
|
||||
info.resize(num_vertices(g));
|
||||
vector<StateInfo> makeInfoTable(const NGHolder &g) {
|
||||
vector<StateInfo> info(num_vertices(g));
|
||||
for (auto v : vertices_range(g)) {
|
||||
u32 idx = g[v].index;
|
||||
const CharReach &cr = g[v].char_reach;
|
||||
assert(idx < info.size());
|
||||
info[idx] = StateInfo(v, cr);
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
void execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||
set<NFAVertex> *states, bool kill_sds) {
|
||||
flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||
const flat_set<NFAVertex> &initial_states,
|
||||
bool kill_sds) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
vector<StateInfo> info;
|
||||
fillInfoTable(g, info);
|
||||
dynamic_bitset<> work_states(num_vertices(g));
|
||||
fillStateBitset(g, *states, work_states);
|
||||
auto info = makeInfoTable(g);
|
||||
auto work_states = makeStateBitset(g, initial_states);
|
||||
|
||||
execute_graph_i(g, info, input, &work_states, kill_sds);
|
||||
|
||||
fillVertexSet(work_states, info, *states);
|
||||
return getVertices(work_states, info);
|
||||
}
|
||||
|
||||
void execute_graph(const NGHolder &g, const vector<CharReach> &input,
|
||||
set<NFAVertex> *states) {
|
||||
flat_set<NFAVertex> execute_graph(const NGHolder &g,
|
||||
const vector<CharReach> &input,
|
||||
const flat_set<NFAVertex> &initial_states) {
|
||||
assert(hasCorrectlyNumberedVertices(g));
|
||||
|
||||
vector<StateInfo> info;
|
||||
fillInfoTable(g, info);
|
||||
dynamic_bitset<> work_states(num_vertices(g));
|
||||
fillStateBitset(g, *states, work_states);
|
||||
auto info = makeInfoTable(g);
|
||||
auto work_states = makeStateBitset(g, initial_states);
|
||||
|
||||
execute_graph_i(g, info, input, &work_states, false);
|
||||
|
||||
fillVertexSet(work_states, info, *states);
|
||||
return getVertices(work_states, info);
|
||||
}
|
||||
|
||||
typedef boost::reverse_graph<const NFAGraph, const NFAGraph &> RevNFAGraph;
|
||||
@ -276,9 +277,10 @@ private:
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
||||
const set<NFAVertex> &input_start_states,
|
||||
set<NFAVertex> *states) {
|
||||
flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
|
||||
const NGHolder &input_dag,
|
||||
const flat_set<NFAVertex> &input_start_states,
|
||||
const flat_set<NFAVertex> &initial_states) {
|
||||
DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
|
||||
num_vertices(running_g), num_vertices(input_dag));
|
||||
assert(hasCorrectlyNumberedVertices(running_g));
|
||||
@ -290,10 +292,8 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
||||
RevNFAGraph revg(input_dag.g);
|
||||
map<NFAVertex, dynamic_bitset<> > dfs_states;
|
||||
|
||||
vector<StateInfo> info;
|
||||
fillInfoTable(running_g, info);
|
||||
dynamic_bitset<> input_fs(num_vertices(running_g));
|
||||
fillStateBitset(running_g, *states, input_fs);
|
||||
auto info = makeInfoTable(running_g);
|
||||
auto input_fs = makeStateBitset(running_g, initial_states);
|
||||
|
||||
for (auto v : input_start_states) {
|
||||
dfs_states[v] = input_fs;
|
||||
@ -303,21 +303,25 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
||||
eg_visitor(running_g, info, input_dag, dfs_states),
|
||||
make_assoc_property_map(colours));
|
||||
|
||||
fillVertexSet(dfs_states[input_dag.accept], info, *states);
|
||||
auto states = getVertices(dfs_states[input_dag.accept], info);
|
||||
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF(" output rstates:");
|
||||
for (auto v : *states) {
|
||||
printf(" %u", running_g[v].index);
|
||||
}
|
||||
printf("\n");
|
||||
DEBUG_PRINTF(" output rstates:");
|
||||
for (const auto &v : states) {
|
||||
printf(" %u", running_g[v].index);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
return states;
|
||||
}
|
||||
|
||||
void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
||||
set<NFAVertex> *states) {
|
||||
set<NFAVertex> input_start_states = {input_dag.start, input_dag.startDs};
|
||||
execute_graph(running_g, input_dag, input_start_states, states);
|
||||
flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
|
||||
const NGHolder &input_dag,
|
||||
const flat_set<NFAVertex> &initial_states) {
|
||||
auto input_start_states = {input_dag.start, input_dag.startDs};
|
||||
return execute_graph(running_g, input_dag, input_start_states,
|
||||
initial_states);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
@ -35,8 +35,8 @@
|
||||
#define NG_EXECUTE_H
|
||||
|
||||
#include "ng_holder.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
@ -44,23 +44,25 @@ namespace ue2 {
|
||||
class CharReach;
|
||||
struct ue2_literal;
|
||||
|
||||
void execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||
std::set<NFAVertex> *states, bool kill_sds = false);
|
||||
flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||
const flat_set<NFAVertex> &initial,
|
||||
bool kill_sds = false);
|
||||
|
||||
void execute_graph(const NGHolder &g, const std::vector<CharReach> &input,
|
||||
std::set<NFAVertex> *states);
|
||||
flat_set<NFAVertex> execute_graph(const NGHolder &g,
|
||||
const std::vector<CharReach> &input,
|
||||
const flat_set<NFAVertex> &initial);
|
||||
|
||||
/** on exit, states contains any state which may still be enabled after
|
||||
* receiving an input which corresponds to some path through the input_dag from
|
||||
* start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
|
||||
*/
|
||||
void execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||
std::set<NFAVertex> *states);
|
||||
flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||
const flat_set<NFAVertex> &initial);
|
||||
|
||||
/* as above, but able to specify the source states for the input graph */
|
||||
void execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||
const std::set<NFAVertex> &input_start_states,
|
||||
std::set<NFAVertex> *states);
|
||||
flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||
const flat_set<NFAVertex> &input_start_states,
|
||||
const flat_set<NFAVertex> &initial);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
|
@ -266,7 +266,7 @@ bool validateEXSL(const NGHolder &g,
|
||||
const vector<CharReach> escapes_vec(1, escapes);
|
||||
const vector<CharReach> notescapes_vec(1, ~escapes);
|
||||
|
||||
set<NFAVertex> states;
|
||||
ue2::flat_set<NFAVertex> states;
|
||||
/* turn on all states past the prefix */
|
||||
DEBUG_PRINTF("region %u is cutover\n", region);
|
||||
for (auto v : vertices_range(g)) {
|
||||
@ -276,20 +276,20 @@ bool validateEXSL(const NGHolder &g,
|
||||
}
|
||||
|
||||
/* process the escapes */
|
||||
execute_graph(g, escapes_vec, &states);
|
||||
states = execute_graph(g, escapes_vec, states);
|
||||
|
||||
/* flood with any number of not escapes */
|
||||
set<NFAVertex> prev_states;
|
||||
ue2::flat_set<NFAVertex> prev_states;
|
||||
while (prev_states != states) {
|
||||
prev_states = states;
|
||||
execute_graph(g, notescapes_vec, &states);
|
||||
states = execute_graph(g, notescapes_vec, states);
|
||||
insert(&states, prev_states);
|
||||
}
|
||||
|
||||
/* find input starts to use for when we are running the prefix through as
|
||||
* when the escape character arrives we may be in matching the prefix
|
||||
* already */
|
||||
set<NFAVertex> prefix_start_states;
|
||||
ue2::flat_set<NFAVertex> prefix_start_states;
|
||||
for (auto v : vertices_range(prefix)) {
|
||||
if (v != prefix.accept && v != prefix.acceptEod
|
||||
/* and as we have already made it past the prefix once */
|
||||
@ -298,11 +298,12 @@ bool validateEXSL(const NGHolder &g,
|
||||
}
|
||||
}
|
||||
|
||||
execute_graph(prefix, escapes_vec, &prefix_start_states);
|
||||
prefix_start_states =
|
||||
execute_graph(prefix, escapes_vec, prefix_start_states);
|
||||
|
||||
assert(contains(prefix_start_states, prefix.startDs));
|
||||
/* see what happens after we feed it the prefix */
|
||||
execute_graph(g, prefix, prefix_start_states, &states);
|
||||
states = execute_graph(g, prefix, prefix_start_states, states);
|
||||
|
||||
for (auto v : states) {
|
||||
assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
|
||||
|
@ -136,7 +136,7 @@ bool firstMatchIsFirst(const NGHolder &p) {
|
||||
return false;
|
||||
}
|
||||
|
||||
set<NFAVertex> states;
|
||||
ue2::flat_set<NFAVertex> states;
|
||||
/* turn on all states (except starts - avoid suffix matches) */
|
||||
/* If we were doing (1) we would also except states leading to accepts -
|
||||
avoid prefix matches */
|
||||
@ -149,7 +149,7 @@ bool firstMatchIsFirst(const NGHolder &p) {
|
||||
}
|
||||
|
||||
/* run the prefix the main graph */
|
||||
execute_graph(p, p, &states);
|
||||
states = execute_graph(p, p, states);
|
||||
|
||||
for (auto v : states) {
|
||||
/* need to check if this vertex may represent an infix match - ie
|
||||
@ -313,7 +313,7 @@ bool sentClearsTail(const NGHolder &g,
|
||||
*/
|
||||
|
||||
u32 first_bad_region = ~0U;
|
||||
set<NFAVertex> states;
|
||||
ue2::flat_set<NFAVertex> states;
|
||||
/* turn on all states */
|
||||
DEBUG_PRINTF("region %u is cutover\n", last_head_region);
|
||||
for (auto v : vertices_range(g)) {
|
||||
@ -327,7 +327,7 @@ bool sentClearsTail(const NGHolder &g,
|
||||
}
|
||||
|
||||
/* run the prefix the main graph */
|
||||
execute_graph(g, sent, &states);
|
||||
states = execute_graph(g, sent, states);
|
||||
|
||||
/* .. and check if we are left with anything in the tail region */
|
||||
for (auto v : states) {
|
||||
|
@ -1631,20 +1631,23 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left,
|
||||
assert(left.graph());
|
||||
const NGHolder &h = *left.graph();
|
||||
|
||||
ue2::flat_set<NFAVertex> all_states;
|
||||
insert(&all_states, vertices(h));
|
||||
assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
|
||||
DEBUG_PRINTF("removing sds\n");
|
||||
all_states.erase(h.startDs);
|
||||
|
||||
ue2::flat_set<NFAVertex> states;
|
||||
|
||||
/* check each pred literal to see if they all kill previous graph
|
||||
* state */
|
||||
for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) {
|
||||
const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id);
|
||||
const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
|
||||
|
||||
set<NFAVertex> states;
|
||||
insert(&states, vertices(h));
|
||||
assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
|
||||
DEBUG_PRINTF("removing sds\n");
|
||||
states.erase(h.startDs);
|
||||
DEBUG_PRINTF("running graph %zu\n", states.size());
|
||||
execute_graph(h, s, &states, true);
|
||||
DEBUG_PRINTF("ran\n");
|
||||
states = execute_graph(h, s, all_states, true);
|
||||
DEBUG_PRINTF("ran, %zu states on\n", states.size());
|
||||
|
||||
if (!states.empty()) {
|
||||
return false;
|
||||
|
Loading…
x
Reference in New Issue
Block a user