mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
ng_execute: update interface to use flat_set
This changes all the execute_graph() interfaces so that instead of mutating a std::set of vertices, they accept an initial flat_set of states and return a resultant flat_set of states after execution. (Note that internally execute_graph() still uses bitsets) This is both faster and more flexible.
This commit is contained in:
parent
fd19168025
commit
abbd548899
@ -125,61 +125,62 @@ void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void fillStateBitset(const NGHolder &g, const set<NFAVertex> &in,
|
dynamic_bitset<> makeStateBitset(const NGHolder &g,
|
||||||
dynamic_bitset<> &out) {
|
const flat_set<NFAVertex> &in) {
|
||||||
out.reset();
|
dynamic_bitset<> work_states(num_vertices(g));
|
||||||
for (auto v : in) {
|
for (const auto &v : in) {
|
||||||
u32 idx = g[v].index;
|
u32 idx = g[v].index;
|
||||||
out.set(idx);
|
work_states.set(idx);
|
||||||
}
|
}
|
||||||
|
return work_states;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void fillVertexSet(const dynamic_bitset<> &in,
|
flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in,
|
||||||
const vector<StateInfo> &info, set<NFAVertex> &out) {
|
const vector<StateInfo> &info) {
|
||||||
out.clear();
|
flat_set<NFAVertex> out;
|
||||||
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
|
||||||
out.insert(info[i].vertex);
|
out.insert(info[i].vertex);
|
||||||
}
|
}
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void fillInfoTable(const NGHolder &g, vector<StateInfo> &info) {
|
vector<StateInfo> makeInfoTable(const NGHolder &g) {
|
||||||
info.resize(num_vertices(g));
|
vector<StateInfo> info(num_vertices(g));
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
u32 idx = g[v].index;
|
u32 idx = g[v].index;
|
||||||
const CharReach &cr = g[v].char_reach;
|
const CharReach &cr = g[v].char_reach;
|
||||||
assert(idx < info.size());
|
assert(idx < info.size());
|
||||||
info[idx] = StateInfo(v, cr);
|
info[idx] = StateInfo(v, cr);
|
||||||
}
|
}
|
||||||
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
void execute_graph(const NGHolder &g, const ue2_literal &input,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||||
set<NFAVertex> *states, bool kill_sds) {
|
const flat_set<NFAVertex> &initial_states,
|
||||||
|
bool kill_sds) {
|
||||||
assert(hasCorrectlyNumberedVertices(g));
|
assert(hasCorrectlyNumberedVertices(g));
|
||||||
|
|
||||||
vector<StateInfo> info;
|
auto info = makeInfoTable(g);
|
||||||
fillInfoTable(g, info);
|
auto work_states = makeStateBitset(g, initial_states);
|
||||||
dynamic_bitset<> work_states(num_vertices(g));
|
|
||||||
fillStateBitset(g, *states, work_states);
|
|
||||||
|
|
||||||
execute_graph_i(g, info, input, &work_states, kill_sds);
|
execute_graph_i(g, info, input, &work_states, kill_sds);
|
||||||
|
|
||||||
fillVertexSet(work_states, info, *states);
|
return getVertices(work_states, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void execute_graph(const NGHolder &g, const vector<CharReach> &input,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g,
|
||||||
set<NFAVertex> *states) {
|
const vector<CharReach> &input,
|
||||||
|
const flat_set<NFAVertex> &initial_states) {
|
||||||
assert(hasCorrectlyNumberedVertices(g));
|
assert(hasCorrectlyNumberedVertices(g));
|
||||||
|
|
||||||
vector<StateInfo> info;
|
auto info = makeInfoTable(g);
|
||||||
fillInfoTable(g, info);
|
auto work_states = makeStateBitset(g, initial_states);
|
||||||
dynamic_bitset<> work_states(num_vertices(g));
|
|
||||||
fillStateBitset(g, *states, work_states);
|
|
||||||
|
|
||||||
execute_graph_i(g, info, input, &work_states, false);
|
execute_graph_i(g, info, input, &work_states, false);
|
||||||
|
|
||||||
fillVertexSet(work_states, info, *states);
|
return getVertices(work_states, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef boost::reverse_graph<const NFAGraph, const NFAGraph &> RevNFAGraph;
|
typedef boost::reverse_graph<const NFAGraph, const NFAGraph &> RevNFAGraph;
|
||||||
@ -276,9 +277,10 @@ private:
|
|||||||
};
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
|
||||||
const set<NFAVertex> &input_start_states,
|
const NGHolder &input_dag,
|
||||||
set<NFAVertex> *states) {
|
const flat_set<NFAVertex> &input_start_states,
|
||||||
|
const flat_set<NFAVertex> &initial_states) {
|
||||||
DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
|
DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
|
||||||
num_vertices(running_g), num_vertices(input_dag));
|
num_vertices(running_g), num_vertices(input_dag));
|
||||||
assert(hasCorrectlyNumberedVertices(running_g));
|
assert(hasCorrectlyNumberedVertices(running_g));
|
||||||
@ -290,10 +292,8 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
|||||||
RevNFAGraph revg(input_dag.g);
|
RevNFAGraph revg(input_dag.g);
|
||||||
map<NFAVertex, dynamic_bitset<> > dfs_states;
|
map<NFAVertex, dynamic_bitset<> > dfs_states;
|
||||||
|
|
||||||
vector<StateInfo> info;
|
auto info = makeInfoTable(running_g);
|
||||||
fillInfoTable(running_g, info);
|
auto input_fs = makeStateBitset(running_g, initial_states);
|
||||||
dynamic_bitset<> input_fs(num_vertices(running_g));
|
|
||||||
fillStateBitset(running_g, *states, input_fs);
|
|
||||||
|
|
||||||
for (auto v : input_start_states) {
|
for (auto v : input_start_states) {
|
||||||
dfs_states[v] = input_fs;
|
dfs_states[v] = input_fs;
|
||||||
@ -303,21 +303,25 @@ void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
|||||||
eg_visitor(running_g, info, input_dag, dfs_states),
|
eg_visitor(running_g, info, input_dag, dfs_states),
|
||||||
make_assoc_property_map(colours));
|
make_assoc_property_map(colours));
|
||||||
|
|
||||||
fillVertexSet(dfs_states[input_dag.accept], info, *states);
|
auto states = getVertices(dfs_states[input_dag.accept], info);
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
DEBUG_PRINTF(" output rstates:");
|
DEBUG_PRINTF(" output rstates:");
|
||||||
for (auto v : *states) {
|
for (const auto &v : states) {
|
||||||
printf(" %u", running_g[v].index);
|
printf(" %u", running_g[v].index);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
return states;
|
||||||
}
|
}
|
||||||
|
|
||||||
void execute_graph(const NGHolder &running_g, const NGHolder &input_dag,
|
flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
|
||||||
set<NFAVertex> *states) {
|
const NGHolder &input_dag,
|
||||||
set<NFAVertex> input_start_states = {input_dag.start, input_dag.startDs};
|
const flat_set<NFAVertex> &initial_states) {
|
||||||
execute_graph(running_g, input_dag, input_start_states, states);
|
auto input_start_states = {input_dag.start, input_dag.startDs};
|
||||||
|
return execute_graph(running_g, input_dag, input_start_states,
|
||||||
|
initial_states);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
@ -35,8 +35,8 @@
|
|||||||
#define NG_EXECUTE_H
|
#define NG_EXECUTE_H
|
||||||
|
|
||||||
#include "ng_holder.h"
|
#include "ng_holder.h"
|
||||||
|
#include "util/ue2_containers.h"
|
||||||
|
|
||||||
#include <set>
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
@ -44,23 +44,25 @@ namespace ue2 {
|
|||||||
class CharReach;
|
class CharReach;
|
||||||
struct ue2_literal;
|
struct ue2_literal;
|
||||||
|
|
||||||
void execute_graph(const NGHolder &g, const ue2_literal &input,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
|
||||||
std::set<NFAVertex> *states, bool kill_sds = false);
|
const flat_set<NFAVertex> &initial,
|
||||||
|
bool kill_sds = false);
|
||||||
|
|
||||||
void execute_graph(const NGHolder &g, const std::vector<CharReach> &input,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g,
|
||||||
std::set<NFAVertex> *states);
|
const std::vector<CharReach> &input,
|
||||||
|
const flat_set<NFAVertex> &initial);
|
||||||
|
|
||||||
/** on exit, states contains any state which may still be enabled after
|
/** on exit, states contains any state which may still be enabled after
|
||||||
* receiving an input which corresponds to some path through the input_dag from
|
* receiving an input which corresponds to some path through the input_dag from
|
||||||
* start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
|
* start or startDs to accept. input_dag MUST be acyclic aside from self-loops.
|
||||||
*/
|
*/
|
||||||
void execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||||
std::set<NFAVertex> *states);
|
const flat_set<NFAVertex> &initial);
|
||||||
|
|
||||||
/* as above, but able to specify the source states for the input graph */
|
/* as above, but able to specify the source states for the input graph */
|
||||||
void execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
|
||||||
const std::set<NFAVertex> &input_start_states,
|
const flat_set<NFAVertex> &input_start_states,
|
||||||
std::set<NFAVertex> *states);
|
const flat_set<NFAVertex> &initial);
|
||||||
|
|
||||||
} // namespace ue2
|
} // namespace ue2
|
||||||
|
|
||||||
|
@ -266,7 +266,7 @@ bool validateEXSL(const NGHolder &g,
|
|||||||
const vector<CharReach> escapes_vec(1, escapes);
|
const vector<CharReach> escapes_vec(1, escapes);
|
||||||
const vector<CharReach> notescapes_vec(1, ~escapes);
|
const vector<CharReach> notescapes_vec(1, ~escapes);
|
||||||
|
|
||||||
set<NFAVertex> states;
|
ue2::flat_set<NFAVertex> states;
|
||||||
/* turn on all states past the prefix */
|
/* turn on all states past the prefix */
|
||||||
DEBUG_PRINTF("region %u is cutover\n", region);
|
DEBUG_PRINTF("region %u is cutover\n", region);
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
@ -276,20 +276,20 @@ bool validateEXSL(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* process the escapes */
|
/* process the escapes */
|
||||||
execute_graph(g, escapes_vec, &states);
|
states = execute_graph(g, escapes_vec, states);
|
||||||
|
|
||||||
/* flood with any number of not escapes */
|
/* flood with any number of not escapes */
|
||||||
set<NFAVertex> prev_states;
|
ue2::flat_set<NFAVertex> prev_states;
|
||||||
while (prev_states != states) {
|
while (prev_states != states) {
|
||||||
prev_states = states;
|
prev_states = states;
|
||||||
execute_graph(g, notescapes_vec, &states);
|
states = execute_graph(g, notescapes_vec, states);
|
||||||
insert(&states, prev_states);
|
insert(&states, prev_states);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* find input starts to use for when we are running the prefix through as
|
/* find input starts to use for when we are running the prefix through as
|
||||||
* when the escape character arrives we may be in matching the prefix
|
* when the escape character arrives we may be in matching the prefix
|
||||||
* already */
|
* already */
|
||||||
set<NFAVertex> prefix_start_states;
|
ue2::flat_set<NFAVertex> prefix_start_states;
|
||||||
for (auto v : vertices_range(prefix)) {
|
for (auto v : vertices_range(prefix)) {
|
||||||
if (v != prefix.accept && v != prefix.acceptEod
|
if (v != prefix.accept && v != prefix.acceptEod
|
||||||
/* and as we have already made it past the prefix once */
|
/* and as we have already made it past the prefix once */
|
||||||
@ -298,11 +298,12 @@ bool validateEXSL(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
execute_graph(prefix, escapes_vec, &prefix_start_states);
|
prefix_start_states =
|
||||||
|
execute_graph(prefix, escapes_vec, prefix_start_states);
|
||||||
|
|
||||||
assert(contains(prefix_start_states, prefix.startDs));
|
assert(contains(prefix_start_states, prefix.startDs));
|
||||||
/* see what happens after we feed it the prefix */
|
/* see what happens after we feed it the prefix */
|
||||||
execute_graph(g, prefix, prefix_start_states, &states);
|
states = execute_graph(g, prefix, prefix_start_states, states);
|
||||||
|
|
||||||
for (auto v : states) {
|
for (auto v : states) {
|
||||||
assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
|
assert(v != g.accept && v != g.acceptEod); /* no cr -> should never be
|
||||||
|
@ -136,7 +136,7 @@ bool firstMatchIsFirst(const NGHolder &p) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
set<NFAVertex> states;
|
ue2::flat_set<NFAVertex> states;
|
||||||
/* turn on all states (except starts - avoid suffix matches) */
|
/* turn on all states (except starts - avoid suffix matches) */
|
||||||
/* If we were doing (1) we would also except states leading to accepts -
|
/* If we were doing (1) we would also except states leading to accepts -
|
||||||
avoid prefix matches */
|
avoid prefix matches */
|
||||||
@ -149,7 +149,7 @@ bool firstMatchIsFirst(const NGHolder &p) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* run the prefix the main graph */
|
/* run the prefix the main graph */
|
||||||
execute_graph(p, p, &states);
|
states = execute_graph(p, p, states);
|
||||||
|
|
||||||
for (auto v : states) {
|
for (auto v : states) {
|
||||||
/* need to check if this vertex may represent an infix match - ie
|
/* need to check if this vertex may represent an infix match - ie
|
||||||
@ -313,7 +313,7 @@ bool sentClearsTail(const NGHolder &g,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
u32 first_bad_region = ~0U;
|
u32 first_bad_region = ~0U;
|
||||||
set<NFAVertex> states;
|
ue2::flat_set<NFAVertex> states;
|
||||||
/* turn on all states */
|
/* turn on all states */
|
||||||
DEBUG_PRINTF("region %u is cutover\n", last_head_region);
|
DEBUG_PRINTF("region %u is cutover\n", last_head_region);
|
||||||
for (auto v : vertices_range(g)) {
|
for (auto v : vertices_range(g)) {
|
||||||
@ -327,7 +327,7 @@ bool sentClearsTail(const NGHolder &g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* run the prefix the main graph */
|
/* run the prefix the main graph */
|
||||||
execute_graph(g, sent, &states);
|
states = execute_graph(g, sent, states);
|
||||||
|
|
||||||
/* .. and check if we are left with anything in the tail region */
|
/* .. and check if we are left with anything in the tail region */
|
||||||
for (auto v : states) {
|
for (auto v : states) {
|
||||||
|
@ -1631,20 +1631,23 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &tbi, const left_id &left,
|
|||||||
assert(left.graph());
|
assert(left.graph());
|
||||||
const NGHolder &h = *left.graph();
|
const NGHolder &h = *left.graph();
|
||||||
|
|
||||||
|
ue2::flat_set<NFAVertex> all_states;
|
||||||
|
insert(&all_states, vertices(h));
|
||||||
|
assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
|
||||||
|
DEBUG_PRINTF("removing sds\n");
|
||||||
|
all_states.erase(h.startDs);
|
||||||
|
|
||||||
|
ue2::flat_set<NFAVertex> states;
|
||||||
|
|
||||||
/* check each pred literal to see if they all kill previous graph
|
/* check each pred literal to see if they all kill previous graph
|
||||||
* state */
|
* state */
|
||||||
for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) {
|
for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) {
|
||||||
const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id);
|
const rose_literal_id &pred_lit = tbi.literals.right.at(lit_id);
|
||||||
const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
|
const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
|
||||||
|
|
||||||
set<NFAVertex> states;
|
|
||||||
insert(&states, vertices(h));
|
|
||||||
assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
|
|
||||||
DEBUG_PRINTF("removing sds\n");
|
|
||||||
states.erase(h.startDs);
|
|
||||||
DEBUG_PRINTF("running graph %zu\n", states.size());
|
DEBUG_PRINTF("running graph %zu\n", states.size());
|
||||||
execute_graph(h, s, &states, true);
|
states = execute_graph(h, s, all_states, true);
|
||||||
DEBUG_PRINTF("ran\n");
|
DEBUG_PRINTF("ran, %zu states on\n", states.size());
|
||||||
|
|
||||||
if (!states.empty()) {
|
if (!states.empty()) {
|
||||||
return false;
|
return false;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user