mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
determinise: use queue, improve api
- Use a queue rather than always building the full vector of state sets. - Make more use of move, emplace, reserve. - Write directly into dstates argument. - Return bool rather than int.
This commit is contained in:
parent
64db576b9e
commit
31141dd35b
@ -289,7 +289,7 @@ unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2,
|
||||
auto rdfa = ue2::make_unique<raw_dfa>(d1->kind);
|
||||
|
||||
Automaton_Merge autom(d1, d2, rm, grey);
|
||||
if (!determinise(autom, rdfa->states, max_states)) {
|
||||
if (determinise(autom, rdfa->states, max_states)) {
|
||||
rdfa->start_anchored = autom.start_anchored;
|
||||
rdfa->start_floating = autom.start_floating;
|
||||
rdfa->alpha_size = autom.alphasize;
|
||||
@ -374,7 +374,7 @@ unique_ptr<raw_dfa> mergeAllDfas(const vector<const raw_dfa *> &dfas,
|
||||
|
||||
DEBUG_PRINTF("merging dfa\n");
|
||||
|
||||
if (determinise(n, rdfa->states, max_states)) {
|
||||
if (!determinise(n, rdfa->states, max_states)) {
|
||||
DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states);
|
||||
return nullptr; /* over state limit */
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -518,7 +518,7 @@ bool doHaig(const NGHolder &g, som_type som,
|
||||
vector<StateSet> nfa_state_map;
|
||||
Auto n(g, som, triggers, unordered_som);
|
||||
try {
|
||||
if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
||||
if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) {
|
||||
DEBUG_PRINTF("state limit exceeded\n");
|
||||
return false;
|
||||
}
|
||||
@ -726,9 +726,8 @@ unique_ptr<raw_som_dfa> attemptToMergeHaig(const vector<const raw_som_dfa *> &df
|
||||
NODE_START,
|
||||
dfas[0]->stream_som_loc_width);
|
||||
|
||||
int rv = determinise(n, rdfa->states, limit, &nfa_state_map);
|
||||
if (rv) {
|
||||
DEBUG_PRINTF("%d:state limit (%u) exceeded\n", rv, limit);
|
||||
if (!determinise(n, rdfa->states, limit, &nfa_state_map)) {
|
||||
DEBUG_PRINTF("state limit (%u) exceeded\n", limit);
|
||||
return nullptr; /* over state limit */
|
||||
}
|
||||
|
||||
|
@ -433,6 +433,7 @@ public:
|
||||
}
|
||||
return allExternalReports(*rm, test_reports);
|
||||
}
|
||||
|
||||
private:
|
||||
const ReportManager *rm;
|
||||
public:
|
||||
@ -568,7 +569,7 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
|
||||
/* Fast path. Automaton_Graph uses a bitfield internally to represent
|
||||
* states and is quicker than Automaton_Big. */
|
||||
Automaton_Graph n(rm, graph, single_trigger, triggers, prunable);
|
||||
if (determinise(n, rdfa->states, state_limit)) {
|
||||
if (!determinise(n, rdfa->states, state_limit)) {
|
||||
DEBUG_PRINTF("state limit exceeded\n");
|
||||
return nullptr; /* over state limit */
|
||||
}
|
||||
@ -580,7 +581,7 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
|
||||
} else {
|
||||
/* Slow path. Too many states to use Automaton_Graph. */
|
||||
Automaton_Big n(rm, graph, single_trigger, triggers, prunable);
|
||||
if (determinise(n, rdfa->states, state_limit)) {
|
||||
if (!determinise(n, rdfa->states, state_limit)) {
|
||||
DEBUG_PRINTF("state limit exceeded\n");
|
||||
return nullptr; /* over state limit */
|
||||
}
|
||||
|
@ -701,8 +701,8 @@ int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) {
|
||||
|
||||
Automaton_Holder autom(h);
|
||||
|
||||
unique_ptr<raw_dfa> out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
||||
if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
|
||||
auto out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
||||
if (determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
|
||||
return finalise_out(build, h, autom, move(out_dfa), remap);
|
||||
}
|
||||
|
||||
@ -764,8 +764,8 @@ void buildSimpleDfas(const RoseBuildImpl &build, const vector<u32> &frag_map,
|
||||
auto h = populate_holder(simple.first, exit_ids);
|
||||
Automaton_Holder autom(*h);
|
||||
auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
|
||||
UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
||||
assert(!rv);
|
||||
UNUSED bool rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
|
||||
assert(rv);
|
||||
rdfa->start_anchored = INIT_STATE;
|
||||
rdfa->start_floating = DEAD_STATE;
|
||||
rdfa->alpha_size = autom.alphasize;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@ -38,14 +38,13 @@
|
||||
#include "container.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <array>
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define DETERMINISE_RESERVE_SIZE 10
|
||||
|
||||
/* Automaton details:
|
||||
*
|
||||
* const vector<StateSet> initial()
|
||||
@ -73,42 +72,44 @@ namespace ue2 {
|
||||
* \param state_limit limit on the number of dfa states to construct
|
||||
* \param statesets_out a mapping from DFA state to the set of NFA states in
|
||||
* the automaton
|
||||
* \return zero on success
|
||||
* \return true on success, false if state limit exceeded
|
||||
*/
|
||||
template<class Auto, class ds>
|
||||
never_inline
|
||||
int determinise(Auto &n, std::vector<ds> &dstates_out, dstate_id_t state_limit,
|
||||
bool determinise(Auto &n, std::vector<ds> &dstates, size_t state_limit,
|
||||
std::vector<typename Auto::StateSet> *statesets_out = nullptr) {
|
||||
DEBUG_PRINTF("the determinator\n");
|
||||
typedef typename Auto::StateSet StateSet;
|
||||
typedef typename Auto::StateMap DstateIdMap;
|
||||
DstateIdMap dstate_ids;
|
||||
std::vector<StateSet> statesets;
|
||||
|
||||
const size_t alphabet_size = n.alphasize;
|
||||
|
||||
std::vector<ds> dstates;
|
||||
dstates.reserve(DETERMINISE_RESERVE_SIZE);
|
||||
statesets.reserve(DETERMINISE_RESERVE_SIZE);
|
||||
dstates.clear();
|
||||
dstates.reserve(state_limit);
|
||||
|
||||
dstate_ids[n.dead] = DEAD_STATE;
|
||||
dstate_ids.emplace(n.dead, DEAD_STATE);
|
||||
dstates.push_back(ds(alphabet_size));
|
||||
std::fill_n(dstates[0].next.begin(), alphabet_size, DEAD_STATE);
|
||||
|
||||
statesets.push_back(n.dead);
|
||||
std::queue<std::pair<StateSet, dstate_id_t>> q;
|
||||
q.emplace(n.dead, DEAD_STATE);
|
||||
|
||||
const std::vector<StateSet> &init = n.initial();
|
||||
for (u32 i = 0; i < init.size(); i++) {
|
||||
statesets.push_back(init[i]);
|
||||
q.emplace(init[i], dstates.size());
|
||||
assert(!contains(dstate_ids, init[i]));
|
||||
dstate_ids[init[i]] = dstates.size();
|
||||
dstate_ids.emplace(init[i], dstates.size());
|
||||
dstates.push_back(ds(alphabet_size));
|
||||
}
|
||||
|
||||
std::vector<StateSet> succs(alphabet_size, n.dead);
|
||||
for (dstate_id_t curr_id = DEAD_STATE; curr_id < dstates.size();
|
||||
curr_id++) {
|
||||
StateSet &curr = statesets[curr_id];
|
||||
|
||||
while (!q.empty()) {
|
||||
auto m = std::move(q.front());
|
||||
q.pop();
|
||||
StateSet &curr = m.first;
|
||||
dstate_id_t curr_id = m.second;
|
||||
|
||||
DEBUG_PRINTF("curr: %hu\n", curr_id);
|
||||
|
||||
@ -139,43 +140,46 @@ int determinise(Auto &n, std::vector<ds> &dstates_out, dstate_id_t state_limit,
|
||||
if (s && succs[s] == succs[s - 1]) {
|
||||
succ_id = dstates[curr_id].next[s - 1];
|
||||
} else {
|
||||
typename DstateIdMap::const_iterator dstate_id_iter;
|
||||
dstate_id_iter = dstate_ids.find(succs[s]);
|
||||
|
||||
if (dstate_id_iter != dstate_ids.end()) {
|
||||
succ_id = dstate_id_iter->second;
|
||||
|
||||
auto p = dstate_ids.emplace(succs[s], dstates.size());
|
||||
succ_id = p.first->second;
|
||||
if (!p.second) { /* succs[s] is already present */
|
||||
if (succ_id > curr_id && !dstates[succ_id].daddy
|
||||
&& n.unalpha[s] < N_CHARS) {
|
||||
dstates[succ_id].daddy = curr_id;
|
||||
}
|
||||
} else {
|
||||
statesets.push_back(succs[s]);
|
||||
succ_id = dstates.size();
|
||||
dstate_ids[succs[s]] = succ_id;
|
||||
dstates.push_back(ds(alphabet_size));
|
||||
dstates.back().daddy = n.unalpha[s] < N_CHARS ? curr_id : 0;
|
||||
q.emplace(succs[s], succ_id);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("-->%hu on %02hx\n", succ_id, n.unalpha[s]);
|
||||
}
|
||||
|
||||
if (succ_id >= state_limit) {
|
||||
DEBUG_PRINTF("succ_id %hu >= state_limit %hu\n",
|
||||
DEBUG_PRINTF("succ_id %hu >= state_limit %zu\n",
|
||||
succ_id, state_limit);
|
||||
return -2;
|
||||
dstates.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
dstates[curr_id].next[s] = succ_id;
|
||||
}
|
||||
}
|
||||
|
||||
dstates_out = dstates;
|
||||
// The dstates vector will persist in the raw_dfa.
|
||||
dstates.shrink_to_fit();
|
||||
|
||||
if (statesets_out) {
|
||||
statesets_out->swap(statesets);
|
||||
auto &statesets = *statesets_out;
|
||||
statesets.resize(dstate_ids.size());
|
||||
for (auto &m : dstate_ids) {
|
||||
statesets[m.second] = std::move(m.first);
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("ok\n");
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline
|
||||
|
Loading…
x
Reference in New Issue
Block a user