mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
determinise: use queue, improve api
- Use a queue rather than always building the full vector of state sets. - Make more use of move, emplace, reserve. - Write directly into dstates argument. - Return bool rather than int.
This commit is contained in:
parent
64db576b9e
commit
31141dd35b
@ -289,7 +289,7 @@ unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2,
|
|||||||
auto rdfa = ue2::make_unique<raw_dfa>(d1->kind);
|
auto rdfa = ue2::make_unique<raw_dfa>(d1->kind);
|
||||||
|
|
||||||
Automaton_Merge autom(d1, d2, rm, grey);
|
Automaton_Merge autom(d1, d2, rm, grey);
|
||||||
if (!determinise(autom, rdfa->states, max_states)) {
|
if (determinise(autom, rdfa->states, max_states)) {
|
||||||
rdfa->start_anchored = autom.start_anchored;
|
rdfa->start_anchored = autom.start_anchored;
|
||||||
rdfa->start_floating = autom.start_floating;
|
rdfa->start_floating = autom.start_floating;
|
||||||
rdfa->alpha_size = autom.alphasize;
|
rdfa->alpha_size = autom.alphasize;
|
||||||
@ -374,7 +374,7 @@ unique_ptr<raw_dfa> mergeAllDfas(const vector<const raw_dfa *> &dfas,
|
|||||||
|
|
||||||
DEBUG_PRINTF("merging dfa\n");
|
DEBUG_PRINTF("merging dfa\n");
|
||||||
|
|
||||||
if (determinise(n, rdfa->states, max_states)) {
|
if (!determinise(n, rdfa->states, max_states)) {
|
||||||
DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states);
|
DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states);
|
||||||
return nullptr; /* over state limit */
|
return nullptr; /* over state limit */
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015-2016, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -518,7 +518,7 @@ bool doHaig(const NGHolder &g, som_type som,
|
|||||||
vector<StateSet> nfa_state_map;
|
vector<StateSet> nfa_state_map;
|
||||||
Auto n(g, som, triggers, unordered_som);
|
Auto n(g, som, triggers, unordered_som);
|
||||||
try {
|
try {
|
||||||
if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
||||||
DEBUG_PRINTF("state limit exceeded\n");
|
DEBUG_PRINTF("state limit exceeded\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -726,9 +726,8 @@ unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &df
|
|||||||
NODE_START,
|
NODE_START,
|
||||||
dfas[0]->stream_som_loc_width);
|
dfas[0]->stream_som_loc_width);
|
||||||
|
|
||||||
int rv = determinise(n, rdfa->states, limit, &nfa_state_map);
|
if (!determinise(n, rdfa->states, limit, &nfa_state_map)) {
|
||||||
if (rv) {
|
DEBUG_PRINTF("state limit (%u) exceeded\n", limit);
|
||||||
DEBUG_PRINTF("%d:state limit (%u) exceeded\n", rv, limit);
|
|
||||||
return nullptr; /* over state limit */
|
return nullptr; /* over state limit */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -433,6 +433,7 @@ public:
|
|||||||
}
|
}
|
||||||
return allExternalReports(*rm, test_reports);
|
return allExternalReports(*rm, test_reports);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const ReportManager *rm;
|
const ReportManager *rm;
|
||||||
public:
|
public:
|
||||||
@ -568,7 +569,7 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
|
|||||||
/* Fast path. Automaton_Graph uses a bitfield internally to represent
|
/* Fast path. Automaton_Graph uses a bitfield internally to represent
|
||||||
* states and is quicker than Automaton_Big. */
|
* states and is quicker than Automaton_Big. */
|
||||||
Automaton_Graph n(rm, graph, single_trigger, triggers, prunable);
|
Automaton_Graph n(rm, graph, single_trigger, triggers, prunable);
|
||||||
if (determinise(n, rdfa->states, state_limit)) {
|
if (!determinise(n, rdfa->states, state_limit)) {
|
||||||
DEBUG_PRINTF("state limit exceeded\n");
|
DEBUG_PRINTF("state limit exceeded\n");
|
||||||
return nullptr; /* over state limit */
|
return nullptr; /* over state limit */
|
||||||
}
|
}
|
||||||
@ -580,7 +581,7 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
|
|||||||
} else {
|
} else {
|
||||||
/* Slow path. Too many states to use Automaton_Graph. */
|
/* Slow path. Too many states to use Automaton_Graph. */
|
||||||
Automaton_Big n(rm, graph, single_trigger, triggers, prunable);
|
Automaton_Big n(rm, graph, single_trigger, triggers, prunable);
|
||||||
if (determinise(n, rdfa->states, state_limit)) {
|
if (!determinise(n, rdfa->states, state_limit)) {
|
||||||
DEBUG_PRINTF("state limit exceeded\n");
|
DEBUG_PRINTF("state limit exceeded\n");
|
||||||
return nullptr; /* over state limit */
|
return nullptr; /* over state limit */
|
||||||
}
|
}
|
||||||
|
@ -701,8 +701,8 @@ int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) {
|
|||||||
|
|
||||||
Automaton_Holder autom(h);
|
Automaton_Holder autom(h);
|
||||||
|
|
||||||
unique_ptr<raw_dfa> out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
auto out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
||||||
if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
|
if (determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
|
||||||
return finalise_out(build, h, autom, move(out_dfa), remap);
|
return finalise_out(build, h, autom, move(out_dfa), remap);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -764,8 +764,8 @@ void buildSimpleDfas(const RoseBuildImpl &build, const vector<u32> &frag_map,
|
|||||||
auto h = populate_holder(simple.first, exit_ids);
|
auto h = populate_holder(simple.first, exit_ids);
|
||||||
Automaton_Holder autom(*h);
|
Automaton_Holder autom(*h);
|
||||||
auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
||||||
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
UNUSED bool rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
||||||
assert(!rv);
|
assert(rv);
|
||||||
rdfa->start_anchored = INIT_STATE;
|
rdfa->start_anchored = INIT_STATE;
|
||||||
rdfa->start_floating = DEAD_STATE;
|
rdfa->start_floating = DEAD_STATE;
|
||||||
rdfa->alpha_size = autom.alphasize;
|
rdfa->alpha_size = autom.alphasize;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2015, Intel Corporation
|
* Copyright (c) 2015-2017, Intel Corporation
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
* modification, are permitted provided that the following conditions are met:
|
* modification, are permitted provided that the following conditions are met:
|
||||||
@ -38,14 +38,13 @@
|
|||||||
#include "container.h"
|
#include "container.h"
|
||||||
#include "ue2common.h"
|
#include "ue2common.h"
|
||||||
|
|
||||||
#include <array>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
|
#include <queue>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace ue2 {
|
namespace ue2 {
|
||||||
|
|
||||||
#define DETERMINISE_RESERVE_SIZE 10
|
|
||||||
|
|
||||||
/* Automaton details:
|
/* Automaton details:
|
||||||
*
|
*
|
||||||
* const vector<StateSet> initial()
|
* const vector<StateSet> initial()
|
||||||
@ -73,42 +72,44 @@ namespace ue2 {
|
|||||||
* \param state_limit limit on the number of dfa states to construct
|
* \param state_limit limit on the number of dfa states to construct
|
||||||
* \param statesets_out a mapping from DFA state to the set of NFA states in
|
* \param statesets_out a mapping from DFA state to the set of NFA states in
|
||||||
* the automaton
|
* the automaton
|
||||||
* \return zero on success
|
* \return true on success, false if state limit exceeded
|
||||||
*/
|
*/
|
||||||
template<class Auto, class ds>
|
template<class Auto, class ds>
|
||||||
never_inline
|
never_inline
|
||||||
int determinise(Auto &n, std::vector<ds> &dstates_out, dstate_id_t state_limit,
|
bool determinise(Auto &n, std::vector<ds> &dstates, size_t state_limit,
|
||||||
std::vector<typename Auto::StateSet> *statesets_out = nullptr) {
|
std::vector<typename Auto::StateSet> *statesets_out = nullptr) {
|
||||||
DEBUG_PRINTF("the determinator\n");
|
DEBUG_PRINTF("the determinator\n");
|
||||||
typedef typename Auto::StateSet StateSet;
|
typedef typename Auto::StateSet StateSet;
|
||||||
typedef typename Auto::StateMap DstateIdMap;
|
typedef typename Auto::StateMap DstateIdMap;
|
||||||
DstateIdMap dstate_ids;
|
DstateIdMap dstate_ids;
|
||||||
std::vector<StateSet> statesets;
|
|
||||||
|
|
||||||
const size_t alphabet_size = n.alphasize;
|
const size_t alphabet_size = n.alphasize;
|
||||||
|
|
||||||
std::vector<ds> dstates;
|
dstates.clear();
|
||||||
dstates.reserve(DETERMINISE_RESERVE_SIZE);
|
dstates.reserve(state_limit);
|
||||||
statesets.reserve(DETERMINISE_RESERVE_SIZE);
|
|
||||||
|
|
||||||
dstate_ids[n.dead] = DEAD_STATE;
|
dstate_ids.emplace(n.dead, DEAD_STATE);
|
||||||
dstates.push_back(ds(alphabet_size));
|
dstates.push_back(ds(alphabet_size));
|
||||||
std::fill_n(dstates[0].next.begin(), alphabet_size, DEAD_STATE);
|
std::fill_n(dstates[0].next.begin(), alphabet_size, DEAD_STATE);
|
||||||
|
|
||||||
statesets.push_back(n.dead);
|
std::queue<std::pair<StateSet, dstate_id_t>> q;
|
||||||
|
q.emplace(n.dead, DEAD_STATE);
|
||||||
|
|
||||||
const std::vector<StateSet> &init = n.initial();
|
const std::vector<StateSet> &init = n.initial();
|
||||||
for (u32 i = 0; i < init.size(); i++) {
|
for (u32 i = 0; i < init.size(); i++) {
|
||||||
statesets.push_back(init[i]);
|
q.emplace(init[i], dstates.size());
|
||||||
assert(!contains(dstate_ids, init[i]));
|
assert(!contains(dstate_ids, init[i]));
|
||||||
dstate_ids[init[i]] = dstates.size();
|
dstate_ids.emplace(init[i], dstates.size());
|
||||||
dstates.push_back(ds(alphabet_size));
|
dstates.push_back(ds(alphabet_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<StateSet> succs(alphabet_size, n.dead);
|
std::vector<StateSet> succs(alphabet_size, n.dead);
|
||||||
for (dstate_id_t curr_id = DEAD_STATE; curr_id < dstates.size();
|
|
||||||
curr_id++) {
|
while (!q.empty()) {
|
||||||
StateSet &curr = statesets[curr_id];
|
auto m = std::move(q.front());
|
||||||
|
q.pop();
|
||||||
|
StateSet &curr = m.first;
|
||||||
|
dstate_id_t curr_id = m.second;
|
||||||
|
|
||||||
DEBUG_PRINTF("curr: %hu\n", curr_id);
|
DEBUG_PRINTF("curr: %hu\n", curr_id);
|
||||||
|
|
||||||
@ -139,43 +140,46 @@ int determinise(Auto &n, std::vector<ds> &dstates_out, dstate_id_t state_limit,
|
|||||||
if (s && succs[s] == succs[s - 1]) {
|
if (s && succs[s] == succs[s - 1]) {
|
||||||
succ_id = dstates[curr_id].next[s - 1];
|
succ_id = dstates[curr_id].next[s - 1];
|
||||||
} else {
|
} else {
|
||||||
typename DstateIdMap::const_iterator dstate_id_iter;
|
auto p = dstate_ids.emplace(succs[s], dstates.size());
|
||||||
dstate_id_iter = dstate_ids.find(succs[s]);
|
succ_id = p.first->second;
|
||||||
|
if (!p.second) { /* succs[s] is already present */
|
||||||
if (dstate_id_iter != dstate_ids.end()) {
|
|
||||||
succ_id = dstate_id_iter->second;
|
|
||||||
|
|
||||||
if (succ_id > curr_id && !dstates[succ_id].daddy
|
if (succ_id > curr_id && !dstates[succ_id].daddy
|
||||||
&& n.unalpha[s] < N_CHARS) {
|
&& n.unalpha[s] < N_CHARS) {
|
||||||
dstates[succ_id].daddy = curr_id;
|
dstates[succ_id].daddy = curr_id;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
statesets.push_back(succs[s]);
|
|
||||||
succ_id = dstates.size();
|
|
||||||
dstate_ids[succs[s]] = succ_id;
|
|
||||||
dstates.push_back(ds(alphabet_size));
|
dstates.push_back(ds(alphabet_size));
|
||||||
dstates.back().daddy = n.unalpha[s] < N_CHARS ? curr_id : 0;
|
dstates.back().daddy = n.unalpha[s] < N_CHARS ? curr_id : 0;
|
||||||
|
q.emplace(succs[s], succ_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("-->%hu on %02hx\n", succ_id, n.unalpha[s]);
|
DEBUG_PRINTF("-->%hu on %02hx\n", succ_id, n.unalpha[s]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (succ_id >= state_limit) {
|
if (succ_id >= state_limit) {
|
||||||
DEBUG_PRINTF("succ_id %hu >= state_limit %hu\n",
|
DEBUG_PRINTF("succ_id %hu >= state_limit %zu\n",
|
||||||
succ_id, state_limit);
|
succ_id, state_limit);
|
||||||
return -2;
|
dstates.clear();
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
dstates[curr_id].next[s] = succ_id;
|
dstates[curr_id].next[s] = succ_id;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dstates_out = dstates;
|
// The dstates vector will persist in the raw_dfa.
|
||||||
|
dstates.shrink_to_fit();
|
||||||
|
|
||||||
if (statesets_out) {
|
if (statesets_out) {
|
||||||
statesets_out->swap(statesets);
|
auto &statesets = *statesets_out;
|
||||||
|
statesets.resize(dstate_ids.size());
|
||||||
|
for (auto &m : dstate_ids) {
|
||||||
|
statesets[m.second] = std::move(m.first);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
DEBUG_PRINTF("ok\n");
|
DEBUG_PRINTF("ok\n");
|
||||||
return 0;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
|
Loading…
x
Reference in New Issue
Block a user