diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a416405..a4973c02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -593,8 +593,6 @@ SET (hs_SRCS src/nfa/goughcompile_accel.cpp src/nfa/goughcompile_internal.h src/nfa/goughcompile_reg.cpp - src/nfa/goughcompile_util.cpp - src/nfa/goughcompile_util.h src/nfa/mcclellan.h src/nfa/mcclellan_internal.h src/nfa/mcclellancompile.cpp diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index b75e0463..647dc496 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -41,8 +41,9 @@ #include "util/graph_range.h" #include "util/make_unique.h" #include "util/order_check.h" -#include "util/verify_types.h" +#include "util/report_manager.h" #include "util/ue2_containers.h" +#include "util/verify_types.h" #include "ue2common.h" @@ -77,9 +78,10 @@ namespace { class gough_build_strat : public mcclellan_build_strat { public: - gough_build_strat(raw_som_dfa &r, const GoughGraph &g, - const map &accel_info) - : mcclellan_build_strat(r), rdfa(r), gg(g), + gough_build_strat( + raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm, + const map &accel_info) + : mcclellan_build_strat(r, rm), rdfa(r), gg(g), accel_gough_info(accel_info) {} unique_ptr gatherReports(vector &reports /* out */, vector &reports_eod /* out */, @@ -1035,7 +1037,8 @@ void update_accel_prog_offset(const gough_build_strat &gbs, } aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc) { + const CompileContext &cc, + const ReportManager &rm) { assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8 || !cc.streaming); @@ -1067,7 +1070,7 @@ aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, map accel_allowed; find_allowed_accel_states(*cfg, blocks, &accel_allowed); - gough_build_strat gbs(raw, *cfg, accel_allowed); + gough_build_strat gbs(raw, *cfg, rm, accel_allowed); aligned_unique_ptr basic_dfa = mcclellanCompile_i(raw, gbs, cc); assert(basic_dfa); if (!basic_dfa) { @@ -1195,10 +1198,11 @@ namespace { struct raw_gough_report_list { set reports; - explicit raw_gough_report_list( - const vector> &raw_reports) { + raw_gough_report_list( + const vector> &raw_reports, + const ReportManager &rm, bool do_remap) { for (const auto &m : raw_reports) { - ReportID r = m.first; + ReportID r = do_remap ? rm.getProgramOffset(m.first) : m.first; u32 impl_slot = INVALID_SLOT; if (m.second) { impl_slot = m.second->slot; @@ -1227,11 +1231,13 @@ unique_ptr gough_build_strat::gatherReports( vector &reports_eod, u8 *isSingleReport, ReportID *arbReport) const { - unique_ptr ri = - ue2::make_unique(); - map rev; DEBUG_PRINTF("gathering reports\n"); + const bool remap_reports = has_managed_reports(rdfa.kind); + + auto ri = ue2::make_unique(); + map rev; + assert(!rdfa.states.empty()); vector verts(rdfa.states.size()); @@ -1250,7 +1256,7 @@ unique_ptr gough_build_strat::gatherReports( continue; } - raw_gough_report_list rrl(gg[v].reports); + raw_gough_report_list rrl(gg[v].reports, rm, remap_reports); DEBUG_PRINTF("non empty r %zu\n", reports.size()); if (rev.find(rrl) != rev.end()) { reports.push_back(rev[rrl]); @@ -1269,7 +1275,7 @@ unique_ptr gough_build_strat::gatherReports( } DEBUG_PRINTF("non empty r eod\n"); - raw_gough_report_list rrl(gg[v].reports_eod); + raw_gough_report_list rrl(gg[v].reports_eod, rm, remap_reports); if (rev.find(rrl) != rev.end()) { reports_eod.push_back(rev[rrl]); continue; diff --git a/src/nfa/goughcompile.h b/src/nfa/goughcompile.h index 9da983d4..54f98cef 100644 --- a/src/nfa/goughcompile.h +++ b/src/nfa/goughcompile.h @@ -89,7 +89,8 @@ struct raw_som_dfa : public raw_dfa { }; aligned_unique_ptr goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc); + const CompileContext &cc, + const ReportManager &rm); } // namespace ue2 diff --git a/src/nfa/goughcompile_util.cpp b/src/nfa/goughcompile_util.cpp deleted file mode 100644 index 33030131..00000000 --- a/src/nfa/goughcompile_util.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "goughcompile.h" -#include "goughcompile_util.h" -#include "mcclellancompile_util.h" -#include "util/report_manager.h" - -#include "ue2common.h" - -using namespace std; -using namespace ue2; - -namespace ue2 { - -static -void remapReportsToPrograms(set &reports, - const ReportManager &rm) { - if (reports.empty()) { - return; - } - auto old_reports = reports; - reports.clear(); - for (const auto &r : old_reports) { - u32 program = rm.getProgramOffset(r.report); - reports.emplace(program, r.slot); - } -} - -void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm) { - DEBUG_PRINTF("remap haig reports\n"); - - for (auto &ds : haig.state_som) { - remapReportsToPrograms(ds.reports, rm); - remapReportsToPrograms(ds.reports_eod, rm); - } - - // McClellan-style reports too. - raw_dfa &rdfa = haig; - remapReportsToPrograms(rdfa, rm); -} - -} // namespace ue2 diff --git a/src/nfa/goughcompile_util.h b/src/nfa/goughcompile_util.h deleted file mode 100644 index 05c9d90d..00000000 --- a/src/nfa/goughcompile_util.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef GOUGHCOMPILE_UTIL_H -#define GOUGHCOMPILE_UTIL_H - -namespace ue2 { - -struct raw_som_dfa; -class ReportManager; - -void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm); - -} // namespace ue2 - -#endif // GOUGHCOMPILE_UTIL_H diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 8bc0b9d8..a9fbce94 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -46,6 +46,7 @@ #include "util/container.h" #include "util/make_unique.h" #include "util/order_check.h" +#include "util/report_manager.h" #include "util/ue2_containers.h" #include "util/unaligned.h" #include "util/verify_types.h" @@ -356,8 +357,16 @@ namespace { struct raw_report_list { flat_set reports; - explicit raw_report_list(const flat_set &reports_in) - : reports(reports_in) {} + raw_report_list(const flat_set &reports_in, + const ReportManager &rm, bool do_remap) { + if (do_remap) { + for (auto &id : reports_in) { + reports.insert(rm.getProgramOffset(id)); + } + } else { + reports = reports_in; + } + } bool operator<(const raw_report_list &b) const { return reports < b.reports; @@ -380,6 +389,8 @@ unique_ptr mcclellan_build_strat::gatherReports( ReportID *arbReport) const { DEBUG_PRINTF("gathering reports\n"); + const bool remap_reports = has_managed_reports(rdfa.kind); + auto ri = ue2::make_unique(); map rev; @@ -389,7 +400,7 @@ unique_ptr mcclellan_build_strat::gatherReports( continue; } - raw_report_list rrl(s.reports); + raw_report_list rrl(s.reports, rm, remap_reports); DEBUG_PRINTF("non empty r\n"); if (rev.find(rrl) != rev.end()) { reports.push_back(rev[rrl]); @@ -408,7 +419,7 @@ unique_ptr mcclellan_build_strat::gatherReports( } DEBUG_PRINTF("non empty r eod\n"); - raw_report_list rrl(s.reports_eod); + raw_report_list rrl(s.reports_eod, rm, remap_reports); if (rev.find(rrl) != rev.end()) { reports_eod.push_back(rev[rrl]); continue; @@ -579,8 +590,7 @@ aligned_unique_ptr mcclellanCompile16(dfa_info &info, return nullptr; } - unique_ptr ri - = info.strat.gatherReports(reports, reports_eod, &single, &arb); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map accel_escape_info = populateAccelerationInfo(info.raw, info.strat, cc.grey); @@ -799,8 +809,7 @@ aligned_unique_ptr mcclellanCompile8(dfa_info &info, ReportID arb; u8 single; - unique_ptr ri - = info.strat.gatherReports(reports, reports_eod, &single, &arb); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map accel_escape_info = populateAccelerationInfo(info.raw, info.strat, cc.grey); @@ -1086,8 +1095,9 @@ aligned_unique_ptr mcclellanCompile_i(raw_dfa &raw, dfa_build_strat &strat, } aligned_unique_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, set *accel_states) { - mcclellan_build_strat mbs(raw); + mcclellan_build_strat mbs(raw, rm); return mcclellanCompile_i(raw, mbs, cc, accel_states); } diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index 781e262d..ba519cac 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -44,6 +44,7 @@ struct NFA; namespace ue2 { +class ReportManager; struct CompileContext; struct raw_report_info { @@ -57,6 +58,7 @@ struct raw_report_info { class dfa_build_strat { public: + explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {} virtual ~dfa_build_strat(); virtual raw_dfa &get_raw() const = 0; virtual std::unique_ptr gatherReports( @@ -68,11 +70,14 @@ public: virtual size_t accelSize(void) const = 0; virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) = 0; +protected: + const ReportManager &rm; }; class mcclellan_build_strat : public dfa_build_strat { public: - explicit mcclellan_build_strat(raw_dfa &r) : rdfa(r) {} + mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in) + : dfa_build_strat(rm_in), rdfa(rdfa_in) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr gatherReports( std::vector &reports /* out */, @@ -93,6 +98,7 @@ private: * states */ ue2::aligned_unique_ptr mcclellanCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, std::set *accel_states = nullptr); /* used internally by mcclellan/haig/gough compile process */ diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index d05a6776..234574d8 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -395,27 +395,4 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { } } -static -void remapReportsToPrograms(flat_set &reports, - const ReportManager &rm) { - if (reports.empty()) { - return; - } - auto old_reports = reports; - reports.clear(); - for (const ReportID &id : old_reports) { - u32 program = rm.getProgramOffset(id); - reports.insert(program); - } -} - -void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm) { - DEBUG_PRINTF("remap dfa reports\n"); - for (auto &ds : rdfa.states) { - remapReportsToPrograms(ds.reports, rm); - remapReportsToPrograms(ds.reports_eod, rm); - } -} - - } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index e8bfd4a1..7b6c033a 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -31,7 +31,6 @@ #include "rdfa.h" #include "ue2common.h" -#include "util/report_manager.h" #include @@ -58,8 +57,6 @@ size_t hash_dfa(const raw_dfa &rdfa); dstate_id_t get_sds_or_proxy(const raw_dfa &raw); -void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm); - } // namespace ue2 #endif diff --git a/src/nfa/nfa_kind.h b/src/nfa/nfa_kind.h index f13e3770..128698a4 100644 --- a/src/nfa/nfa_kind.h +++ b/src/nfa/nfa_kind.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,17 +39,44 @@ enum nfa_kind { NFA_INFIX, //!< rose infix NFA_SUFFIX, //!< rose suffix NFA_OUTFIX, //!< "outfix" nfa not triggered by external events + NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports NFA_REV_PREFIX, //! reverse running prefixes (for som) }; -static UNUSED +inline bool is_triggered(enum nfa_kind k) { - return k == NFA_INFIX || k == NFA_SUFFIX || k == NFA_REV_PREFIX; + switch (k) { + case NFA_INFIX: + case NFA_SUFFIX: + case NFA_REV_PREFIX: + return true; + default: + return false; + } } -static UNUSED +inline bool generates_callbacks(enum nfa_kind k) { - return k == NFA_SUFFIX || k == NFA_OUTFIX || k == NFA_REV_PREFIX; + switch (k) { + case NFA_SUFFIX: + case NFA_OUTFIX: + case NFA_OUTFIX_RAW: + case NFA_REV_PREFIX: + return true; + default: + return false; + } +} + +inline +bool has_managed_reports(enum nfa_kind k) { + switch (k) { + case NFA_SUFFIX: + case NFA_OUTFIX: + return true; + default: + return false; + } } } // namespace ue2 diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp index 3a66b2fa..42157e1e 100644 --- a/src/nfagraph/ng_split.cpp +++ b/src/nfagraph/ng_split.cpp @@ -113,6 +113,7 @@ void splitLHS(const NGHolder &base, const vector &pivots, lhs->kind = NFA_INFIX; break; case NFA_REV_PREFIX: + case NFA_OUTFIX_RAW: assert(0); break; } @@ -154,6 +155,7 @@ void splitRHS(const NGHolder &base, const vector &pivots, rhs->kind = NFA_SUFFIX; break; case NFA_REV_PREFIX: + case NFA_OUTFIX_RAW: assert(0); break; } diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp index a0ccb7ae..23c122a7 100644 --- a/src/rose/rose_build_add.cpp +++ b/src/rose/rose_build_add.cpp @@ -1039,6 +1039,7 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h, } break; case NFA_REV_PREFIX: + case NFA_OUTFIX_RAW: DEBUG_PRINTF("kind %u\n", (u32)h.kind); assert(0); } diff --git a/src/rose/rose_build_anchored.cpp b/src/rose/rose_build_anchored.cpp index 805dc920..35ff7138 100644 --- a/src/rose/rose_build_anchored.cpp +++ b/src/rose/rose_build_anchored.cpp @@ -660,7 +660,7 @@ int addAutomaton(RoseBuildImpl &tbi, const NGHolder &h, ReportID *remap) { Automaton_Holder autom(h); - unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX); + unique_ptr out_dfa = ue2::make_unique(NFA_OUTFIX_RAW); if (!determinise(autom, out_dfa->states, MAX_DFA_STATES)) { return finalise_out(tbi, h, autom, move(out_dfa), remap); } @@ -721,7 +721,7 @@ void buildSimpleDfas(const RoseBuildImpl &tbi, NGHolder h; populate_holder(simple.first, exit_ids, &h); Automaton_Holder autom(h); - unique_ptr rdfa = ue2::make_unique(NFA_OUTFIX); + unique_ptr rdfa = ue2::make_unique(NFA_OUTFIX_RAW); UNUSED int rv = determinise(autom, rdfa->states, MAX_DFA_STATES); assert(!rv); rdfa->start_anchored = INIT_STATE; @@ -771,7 +771,8 @@ vector> getAnchoredDfas(RoseBuildImpl &build) { static size_t buildNfas(vector &anchored_dfas, vector> *nfas, - vector *start_offset, const CompileContext &cc) { + vector *start_offset, const CompileContext &cc, + const ReportManager &rm) { const size_t num_dfas = anchored_dfas.size(); nfas->reserve(num_dfas); @@ -785,7 +786,7 @@ size_t buildNfas(vector &anchored_dfas, minimize_hopcroft(rdfa, cc.grey); - auto nfa = mcclellanCompile(rdfa, cc); + auto nfa = mcclellanCompile(rdfa, cc, rm); if (!nfa) { assert(0); throw std::bad_alloc(); @@ -836,7 +837,7 @@ buildAnchoredMatcher(RoseBuildImpl &build, vector &dfas, vector> nfas; vector start_offset; // start offset for each dfa (dots removed) - size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc); + size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); if (total_size > cc.grey.limitRoseAnchoredSize) { throw ResourceLimitError(); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0454c83b..5db24b3b 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -43,7 +43,6 @@ #include "hwlm/hwlm.h" /* engine types */ #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" -#include "nfa/goughcompile_util.h" #include "nfa/mcclellancompile.h" #include "nfa/mcclellancompile_util.h" #include "nfa/nfa_api_queue.h" @@ -895,17 +894,13 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, } if (suff.haig()) { - auto remapped_haig = *suff.haig(); - remapReportsToPrograms(remapped_haig, rm); - auto n = goughCompile(remapped_haig, ssm.somPrecision(), cc); + auto n = goughCompile(*suff.haig(), ssm.somPrecision(), cc, rm); assert(n); return n; } if (suff.dfa()) { - auto remapped_rdfa = *suff.dfa(); - remapReportsToPrograms(remapped_rdfa, rm); - auto d = mcclellanCompile(remapped_rdfa, cc); + auto d = mcclellanCompile(*suff.dfa(), cc, rm); assert(d); return d; } @@ -936,8 +931,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { - remapReportsToPrograms(*rdfa, rm); - auto d = mcclellanCompile(*rdfa, cc); + auto d = mcclellanCompile(*rdfa, cc, rm); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { n = pickImpl(move(d), move(n)); @@ -1024,6 +1018,8 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, const bool is_prefix, const bool is_transient, const map > &infixTriggers, const CompileContext &cc) { + const ReportManager &rm = tbi.rm; + aligned_unique_ptr n; // Should compress state if this rose is non-transient and we're in @@ -1054,12 +1050,12 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, } if (left.dfa()) { - n = mcclellanCompile(*left.dfa(), cc); + n = mcclellanCompile(*left.dfa(), cc, rm); } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix && !is_transient) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - n = mcclellanCompile(*rdfa, cc); + n = mcclellanCompile(*rdfa, cc, rm); } } @@ -1083,7 +1079,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, && (!n || !has_bounded_repeats_other_than_firsts(*n) || !is_fast(*n))) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc); + auto d = mcclellanCompile(*rdfa, cc, rm); assert(d); n = pickImpl(move(d), move(n)); } @@ -1115,6 +1111,7 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, build_context &bc, bool do_prefix) { const RoseGraph &g = tbi.g; const CompileContext &cc = tbi.cc; + const ReportManager &rm = tbi.rm; ue2::unordered_map seen; // already built queue indices @@ -1165,7 +1162,8 @@ bool buildLeftfixes(const RoseBuildImpl &tbi, build_context &bc, // Need to build NFA, which is either predestined to be a Haig (in // SOM mode) or could be all manner of things. if (leftfix.haig()) { - nfa = goughCompile(*leftfix.haig(), tbi.ssm.somPrecision(), cc); + nfa = goughCompile(*leftfix.haig(), tbi.ssm.somPrecision(), cc, + rm); } else { assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v)); nfa = makeLeftNfa(tbi, leftfix, is_prefix, is_transient, @@ -1278,16 +1276,13 @@ public: aligned_unique_ptr operator()(unique_ptr &rdfa) const { // Unleash the McClellan! - raw_dfa tmp(*rdfa); - remapReportsToPrograms(tmp, build.rm); - return mcclellanCompile(tmp, build.cc); + return mcclellanCompile(*rdfa, build.cc, build.rm); } aligned_unique_ptr operator()(unique_ptr &haig) const { // Unleash the Goughfish! - raw_som_dfa tmp(*haig); - remapReportsToPrograms(tmp, build.rm); - return goughCompile(tmp, build.ssm.somPrecision(), build.cc); + return goughCompile(*haig, build.ssm.somPrecision(), build.cc, + build.rm); } aligned_unique_ptr operator()(unique_ptr &holder) const { @@ -1309,7 +1304,7 @@ public: !has_bounded_repeats_other_than_firsts(*n)) { auto rdfa = buildMcClellan(h, &rm, cc.grey); if (rdfa) { - auto d = mcclellanCompile(*rdfa, cc); + auto d = mcclellanCompile(*rdfa, cc, rm); if (d) { n = pickImpl(move(d), move(n)); } diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index e56f322b..079dd556 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -73,6 +73,8 @@ string to_string(nfa_kind k) { return "o"; case NFA_REV_PREFIX: return "r"; + case NFA_OUTFIX_RAW: + return "O"; } assert(0); return "?"; diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index a3fe43d4..792a3d5b 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -303,14 +303,15 @@ bool is_slow(const raw_dfa &rdfa, const set &accel, static aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, - const CompileContext &cc, u32 *start_offset, + const CompileContext &cc, + const ReportManager &rm, u32 *start_offset, u32 *small_region) { *start_offset = remove_leading_dots(rdfa); // Unleash the McClellan! set accel_states; - auto nfa = mcclellanCompile(rdfa, cc, &accel_states); + auto nfa = mcclellanCompile(rdfa, cc, rm, &accel_states); if (!nfa) { DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); return nullptr; @@ -328,7 +329,7 @@ aligned_unique_ptr prepEngine(raw_dfa &rdfa, u32 roseQuality, return nullptr; } - nfa = mcclellanCompile(rdfa, cc, &accel_states); + nfa = mcclellanCompile(rdfa, cc, rm, &accel_states); if (!nfa) { DEBUG_PRINTF("mcclellan compile failed for smallwrite NFA\n"); assert(0); /* able to build orig dfa but not the trimmed? */ @@ -376,11 +377,10 @@ SmallWriteBuildImpl::build(u32 roseQuality) { DEBUG_PRINTF("building rdfa %p\n", rdfa.get()); - remapReportsToPrograms(*rdfa, rm); - u32 start_offset; u32 small_region; - auto nfa = prepEngine(*rdfa, roseQuality, cc, &start_offset, &small_region); + auto nfa = + prepEngine(*rdfa, roseQuality, cc, rm, &start_offset, &small_region); if (!nfa) { DEBUG_PRINTF("some smallwrite outfix could not be prepped\n"); /* just skip the smallwrite optimization */