diff --git a/src/nfa/rdfa_merge.cpp b/src/nfa/rdfa_merge.cpp index 50e9b62a..99b1930d 100644 --- a/src/nfa/rdfa_merge.cpp +++ b/src/nfa/rdfa_merge.cpp @@ -289,7 +289,7 @@ unique_ptr mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2, auto rdfa = ue2::make_unique(d1->kind); Automaton_Merge autom(d1, d2, rm, grey); - if (!determinise(autom, rdfa->states, max_states)) { + if (determinise(autom, rdfa->states, max_states)) { rdfa->start_anchored = autom.start_anchored; rdfa->start_floating = autom.start_floating; rdfa->alpha_size = autom.alphasize; @@ -374,7 +374,7 @@ unique_ptr mergeAllDfas(const vector &dfas, DEBUG_PRINTF("merging dfa\n"); - if (determinise(n, rdfa->states, max_states)) { + if (!determinise(n, rdfa->states, max_states)) { DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states); return nullptr; /* over state limit */ } diff --git a/src/nfagraph/ng_haig.cpp b/src/nfagraph/ng_haig.cpp index e4be14c3..50522ff7 100644 --- a/src/nfagraph/ng_haig.cpp +++ b/src/nfagraph/ng_haig.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -518,7 +518,7 @@ bool doHaig(const NGHolder &g, som_type som, vector nfa_state_map; Auto n(g, som, triggers, unordered_som); try { - if (determinise(n, rdfa->states, state_limit, &nfa_state_map)) { + if (!determinise(n, rdfa->states, state_limit, &nfa_state_map)) { DEBUG_PRINTF("state limit exceeded\n"); return false; } @@ -726,9 +726,8 @@ unique_ptr attemptToMergeHaig(const vector &df NODE_START, dfas[0]->stream_som_loc_width); - int rv = determinise(n, rdfa->states, limit, &nfa_state_map); - if (rv) { - DEBUG_PRINTF("%d:state limit (%u) exceeded\n", rv, limit); + if (!determinise(n, rdfa->states, limit, &nfa_state_map)) { + DEBUG_PRINTF("state limit (%u) exceeded\n", limit); return nullptr; /* over state limit */ } diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp index 9448a0bf..6ada273c 100644 --- a/src/nfagraph/ng_mcclellan.cpp +++ b/src/nfagraph/ng_mcclellan.cpp @@ -433,6 +433,7 @@ public: } return allExternalReports(*rm, test_reports); } + private: const ReportManager *rm; public: @@ -568,7 +569,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, /* Fast path. Automaton_Graph uses a bitfield internally to represent * states and is quicker than Automaton_Big. */ Automaton_Graph n(rm, graph, single_trigger, triggers, prunable); - if (determinise(n, rdfa->states, state_limit)) { + if (!determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ } @@ -580,7 +581,7 @@ unique_ptr buildMcClellan(const NGHolder &graph, } else { /* Slow path. Too many states to use Automaton_Graph. */ Automaton_Big n(rm, graph, single_trigger, triggers, prunable); - if (determinise(n, rdfa->states, state_limit)) { + if (!determinise(n, rdfa->states, state_limit)) { DEBUG_PRINTF("state limit exceeded\n"); return nullptr; /* over state limit */ } diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index a2af160e..b5413a67 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -701,8 +701,8 @@ int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { Automaton_Holder autom(h); - unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX_RAW); - if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) { + auto out_dfa = ue2::make_unique(NFA_OUTFIX_RAW); + if (determinise(autom, out_dfa->states, MAX_DFA_STATES)) { return finalise_out(build, h, autom, move(out_dfa), remap); } @@ -764,8 +764,8 @@ void buildSimpleDfas(const RoseBuildImpl &build, const vector &frag_map, auto h = populate_holder(simple.first, exit_ids); Automaton_Holder autom(*h); auto rdfa = ue2::make_unique(NFA_OUTFIX_RAW); - UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); - assert(!rv); + UNUSED bool rv = determinise(autom, rdfa->states, MAX_DFA_STATES); + assert(rv); rdfa->start_anchored = INIT_STATE; rdfa->start_floating = DEAD_STATE; rdfa->alpha_size = autom.alphasize; diff --git a/src/util/determinise.h b/src/util/determinise.h index d7bb592b..688af61b 100644 --- a/src/util/determinise.h +++ b/src/util/determinise.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,14 +38,13 @@ #include "container.h" #include "ue2common.h" -#include #include +#include +#include #include namespace ue2 { -#define DETERMINISE_RESERVE_SIZE 10 - /* Automaton details: * * const vector initial() @@ -73,42 +72,44 @@ namespace ue2 { * \param state_limit limit on the number of dfa states to construct * \param statesets_out a mapping from DFA state to the set of NFA states in * the automaton - * \return zero on success + * \return true on success, false if state limit exceeded */ template never_inline -int determinise(Auto &n, std::vector &dstates_out, dstate_id_t state_limit, +bool determinise(Auto &n, std::vector &dstates, size_t state_limit, std::vector *statesets_out = nullptr) { DEBUG_PRINTF("the determinator\n"); typedef typename Auto::StateSet StateSet; typedef typename Auto::StateMap DstateIdMap; DstateIdMap dstate_ids; - std::vector statesets; const size_t alphabet_size = n.alphasize; - std::vector dstates; - dstates.reserve(DETERMINISE_RESERVE_SIZE); - statesets.reserve(DETERMINISE_RESERVE_SIZE); + dstates.clear(); + dstates.reserve(state_limit); - dstate_ids[n.dead] = DEAD_STATE; + dstate_ids.emplace(n.dead, DEAD_STATE); dstates.push_back(ds(alphabet_size)); std::fill_n(dstates[0].next.begin(), alphabet_size, DEAD_STATE); - statesets.push_back(n.dead); + std::queue> q; + q.emplace(n.dead, DEAD_STATE); const std::vector &init = n.initial(); for (u32 i = 0; i < init.size(); i++) { - statesets.push_back(init[i]); + q.emplace(init[i], dstates.size()); assert(!contains(dstate_ids, init[i])); - dstate_ids[init[i]] = dstates.size(); + dstate_ids.emplace(init[i], dstates.size()); dstates.push_back(ds(alphabet_size)); } std::vector succs(alphabet_size, n.dead); - for (dstate_id_t curr_id = DEAD_STATE; curr_id < dstates.size(); - curr_id++) { - StateSet &curr = statesets[curr_id]; + + while (!q.empty()) { + auto m = std::move(q.front()); + q.pop(); + StateSet &curr = m.first; + dstate_id_t curr_id = m.second; DEBUG_PRINTF("curr: %hu\n", curr_id); @@ -139,43 +140,46 @@ int determinise(Auto &n, std::vector &dstates_out, dstate_id_t state_limit, if (s && succs[s] == succs[s - 1]) { succ_id = dstates[curr_id].next[s - 1]; } else { - typename DstateIdMap::const_iterator dstate_id_iter; - dstate_id_iter = dstate_ids.find(succs[s]); - - if (dstate_id_iter != dstate_ids.end()) { - succ_id = dstate_id_iter->second; - + auto p = dstate_ids.emplace(succs[s], dstates.size()); + succ_id = p.first->second; + if (!p.second) { /* succs[s] is already present */ if (succ_id > curr_id && !dstates[succ_id].daddy && n.unalpha[s] < N_CHARS) { dstates[succ_id].daddy = curr_id; } } else { - statesets.push_back(succs[s]); - succ_id = dstates.size(); - dstate_ids[succs[s]] = succ_id; dstates.push_back(ds(alphabet_size)); dstates.back().daddy = n.unalpha[s] < N_CHARS ? curr_id : 0; + q.emplace(succs[s], succ_id); } DEBUG_PRINTF("-->%hu on %02hx\n", succ_id, n.unalpha[s]); } if (succ_id >= state_limit) { - DEBUG_PRINTF("succ_id %hu >= state_limit %hu\n", + DEBUG_PRINTF("succ_id %hu >= state_limit %zu\n", succ_id, state_limit); - return -2; + dstates.clear(); + return false; } dstates[curr_id].next[s] = succ_id; } } - dstates_out = dstates; + // The dstates vector will persist in the raw_dfa. + dstates.shrink_to_fit(); + if (statesets_out) { - statesets_out->swap(statesets); + auto &statesets = *statesets_out; + statesets.resize(dstate_ids.size()); + for (auto &m : dstate_ids) { + statesets[m.second] = std::move(m.first); + } } + DEBUG_PRINTF("ok\n"); - return 0; + return true; } static inline