diff --git a/CMakeLists.txt b/CMakeLists.txt index a4973c02..0a416405 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -593,6 +593,8 @@ SET (hs_SRCS src/nfa/goughcompile_accel.cpp src/nfa/goughcompile_internal.h src/nfa/goughcompile_reg.cpp + src/nfa/goughcompile_util.cpp + src/nfa/goughcompile_util.h src/nfa/mcclellan.h src/nfa/mcclellan_internal.h src/nfa/mcclellancompile.cpp diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index d7312b85..3d99690a 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -50,6 +50,7 @@ #include "util/make_unique.h" #include "util/multibit_build.h" #include "util/multibit_internal.h" +#include "util/report_manager.h" #include "util/ue2_containers.h" #include "util/verify_types.h" #include "grey.h" @@ -990,4 +991,29 @@ unique_ptr makeHolder(const CastleProto &proto, nfa_kind kind, return g; } +static +void remapReportsToPrograms(PureRepeat &pr, const ReportManager &rm) { + if (pr.reports.empty()) { + return; + } + auto old_reports = pr.reports; + pr.reports.clear(); + for (const auto &r : old_reports) { + pr.reports.insert(rm.getProgramOffset(r)); + } +} + +void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm) { + for (auto &m : castle.repeats) { + remapReportsToPrograms(m.second, rm); + } + + auto old_report_map = castle.report_map; + castle.report_map.clear(); + for (auto &m : old_report_map) { + u32 program = rm.getProgramOffset(m.first); + castle.report_map[program].insert(begin(m.second), end(m.second)); + } +} + } // namespace ue2 diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index fc4bb991..73c75326 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,6 +51,7 @@ namespace ue2 { class CharReach; class NGHolder; +class ReportManager; struct CompileContext; /** @@ -158,6 +159,8 @@ bool requiresDedupe(const CastleProto &proto, std::unique_ptr makeHolder(const CastleProto &castle, nfa_kind kind, const CompileContext &cc); +void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm); + } // namespace ue2 #endif // NFA_CASTLECOMPILE_H diff --git a/src/nfa/goughcompile_util.cpp b/src/nfa/goughcompile_util.cpp new file mode 100644 index 00000000..33030131 --- /dev/null +++ b/src/nfa/goughcompile_util.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "goughcompile.h" +#include "goughcompile_util.h" +#include "mcclellancompile_util.h" +#include "util/report_manager.h" + +#include "ue2common.h" + +using namespace std; +using namespace ue2; + +namespace ue2 { + +static +void remapReportsToPrograms(set &reports, + const ReportManager &rm) { + if (reports.empty()) { + return; + } + auto old_reports = reports; + reports.clear(); + for (const auto &r : old_reports) { + u32 program = rm.getProgramOffset(r.report); + reports.emplace(program, r.slot); + } +} + +void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm) { + DEBUG_PRINTF("remap haig reports\n"); + + for (auto &ds : haig.state_som) { + remapReportsToPrograms(ds.reports, rm); + remapReportsToPrograms(ds.reports_eod, rm); + } + + // McClellan-style reports too. + raw_dfa &rdfa = haig; + remapReportsToPrograms(rdfa, rm); +} + +} // namespace ue2 diff --git a/src/nfa/goughcompile_util.h b/src/nfa/goughcompile_util.h new file mode 100644 index 00000000..05c9d90d --- /dev/null +++ b/src/nfa/goughcompile_util.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GOUGHCOMPILE_UTIL_H +#define GOUGHCOMPILE_UTIL_H + +namespace ue2 { + +struct raw_som_dfa; +class ReportManager; + +void remapReportsToPrograms(raw_som_dfa &haig, const ReportManager &rm); + +} // namespace ue2 + +#endif // GOUGHCOMPILE_UTIL_H diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp index 234574d8..d05a6776 100644 --- a/src/nfa/mcclellancompile_util.cpp +++ b/src/nfa/mcclellancompile_util.cpp @@ -395,4 +395,27 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { } } +static +void remapReportsToPrograms(flat_set &reports, + const ReportManager &rm) { + if (reports.empty()) { + return; + } + auto old_reports = reports; + reports.clear(); + for (const ReportID &id : old_reports) { + u32 program = rm.getProgramOffset(id); + reports.insert(program); + } +} + +void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm) { + DEBUG_PRINTF("remap dfa reports\n"); + for (auto &ds : rdfa.states) { + remapReportsToPrograms(ds.reports, rm); + remapReportsToPrograms(ds.reports_eod, rm); + } +} + + } // namespace ue2 diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h index 7b6c033a..e8bfd4a1 100644 --- a/src/nfa/mcclellancompile_util.h +++ b/src/nfa/mcclellancompile_util.h @@ -31,6 +31,7 @@ #include "rdfa.h" #include "ue2common.h" +#include "util/report_manager.h" #include @@ -57,6 +58,8 @@ size_t hash_dfa(const raw_dfa &rdfa); dstate_id_t get_sds_or_proxy(const raw_dfa &raw); +void remapReportsToPrograms(raw_dfa &rdfa, const ReportManager &rm); + } // namespace ue2 #endif diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp index 17f93bb2..713fe370 100644 --- a/src/nfagraph/ng_limex.cpp +++ b/src/nfagraph/ng_limex.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -50,8 +50,9 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/graph_range.h" -#include "util/verify_types.h" +#include "util/report_manager.h" #include "util/ue2_containers.h" +#include "util/verify_types.h" #include #include @@ -346,6 +347,25 @@ prepareGraph(const NGHolder &h_in, const ReportManager *rm, return h; } +static +void remapReportsToPrograms(NGHolder &h, const ReportManager &rm) { + for (const auto &v : vertices_range(h)) { + auto &reports = h[v].reports; + if (reports.empty()) { + continue; + } + auto old_reports = reports; + reports.clear(); + for (const ReportID &id : old_reports) { + u32 program = rm.getProgramOffset(id); + reports.insert(program); + } + DEBUG_PRINTF("vertex %u: remapped reports {%s} to programs {%s}\n", + h[v].index, as_string_list(old_reports).c_str(), + as_string_list(reports).c_str()); + } +} + static aligned_unique_ptr constructNFA(const NGHolder &h_in, const ReportManager *rm, @@ -393,6 +413,11 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm, set zombies = findZombies(*h, br_cyclic, state_ids, cc); + if (generates_callbacks(*h)) { + assert(rm); + remapReportsToPrograms(*h, *rm); + } + if (!cc.streaming || !cc.grey.compressNFAState) { compress_state = false; } diff --git a/src/rose/catchup.c b/src/rose/catchup.c index 7c44bf9f..d6e7860c 100644 --- a/src/rose/catchup.c +++ b/src/rose/catchup.c @@ -49,14 +49,15 @@ typedef struct queue_match PQ_T; static really_inline int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, u64a som, u64a offset, ReportID id, const char from_mpv) { - assert(id < rose->reportProgramCount); - const u32 *programs = getByOffset(rose, rose->reportProgramOffset); + const u32 program = id; + assert(program > 0); + assert(program % ROSE_INSTR_MIN_ALIGN == 0); const size_t match_len = 0; // Unused in this path. const char in_anchored = 0; const char in_catchup = 1; - roseRunProgram(rose, scratch, programs[id], som, offset, match_len, - in_anchored, in_catchup, from_mpv, 0); + roseRunProgram(rose, scratch, program, som, offset, match_len, in_anchored, + in_catchup, from_mpv, 0); return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; } diff --git a/src/rose/match.c b/src/rose/match.c index ef75b113..4e9e72a6 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -644,16 +644,15 @@ int roseReportAdaptor_i(u64a som, u64a offset, ReportID id, void *context) { const struct RoseEngine *rose = scratch->core_info.rose; - assert(id < rose->reportProgramCount); - const u32 *programs = getByOffset(rose, rose->reportProgramOffset); - + // Our match ID is the program offset. + const u32 program = id; const size_t match_len = 0; // Unused in this path. const char in_anchored = 0; const char in_catchup = 0; const char from_mpv = 0; const char skip_mpv_catchup = 1; hwlmcb_rv_t rv = - roseRunProgram(rose, scratch, programs[id], som, offset, match_len, + roseRunProgram(rose, scratch, program, som, offset, match_len, in_anchored, in_catchup, from_mpv, skip_mpv_catchup); if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index ea3a80a3..0454c83b 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -43,7 +43,9 @@ #include "hwlm/hwlm.h" /* engine types */ #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" +#include "nfa/goughcompile_util.h" #include "nfa/mcclellancompile.h" +#include "nfa/mcclellancompile_util.h" #include "nfa/nfa_api_queue.h" #include "nfa/nfa_build_util.h" #include "nfa/nfa_internal.h" @@ -885,19 +887,25 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, const map>> &triggers, suffix_id suff, const CompileContext &cc) { if (suff.castle()) { - auto n = buildRepeatEngine(*suff.castle(), triggers, cc); + auto remapped_castle = *suff.castle(); + remapReportsToPrograms(remapped_castle, rm); + auto n = buildRepeatEngine(remapped_castle, triggers, cc); assert(n); return n; } if (suff.haig()) { - auto n = goughCompile(*suff.haig(), ssm.somPrecision(), cc); + auto remapped_haig = *suff.haig(); + remapReportsToPrograms(remapped_haig, rm); + auto n = goughCompile(remapped_haig, ssm.somPrecision(), cc); assert(n); return n; } if (suff.dfa()) { - auto d = mcclellanCompile(*suff.dfa(), cc); + auto remapped_rdfa = *suff.dfa(); + remapReportsToPrograms(remapped_rdfa, rm); + auto d = mcclellanCompile(remapped_rdfa, cc); assert(d); return d; } @@ -910,7 +918,9 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, // Take a shot at the LBR engine. if (oneTop) { - auto lbr = constructLBR(holder, triggers.at(0), cc); + auto remapped_holder = cloneHolder(holder); + remapReportsToPrograms(*remapped_holder, rm); + auto lbr = constructLBR(*remapped_holder, triggers.at(0), cc); if (lbr) { return lbr; } @@ -926,6 +936,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { + remapReportsToPrograms(*rdfa, rm); auto d = mcclellanCompile(*rdfa, cc); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { @@ -1267,12 +1278,16 @@ public: aligned_unique_ptr operator()(unique_ptr &rdfa) const { // Unleash the McClellan! - return mcclellanCompile(*rdfa, build.cc); + raw_dfa tmp(*rdfa); + remapReportsToPrograms(tmp, build.rm); + return mcclellanCompile(tmp, build.cc); } aligned_unique_ptr operator()(unique_ptr &haig) const { // Unleash the Goughfish! - return goughCompile(*haig, build.ssm.somPrecision(), build.cc); + raw_som_dfa tmp(*haig); + remapReportsToPrograms(tmp, build.rm); + return goughCompile(tmp, build.ssm.somPrecision(), build.cc); } aligned_unique_ptr operator()(unique_ptr &holder) const { @@ -1327,6 +1342,16 @@ aligned_unique_ptr buildOutfix(RoseBuildImpl &build, OutfixInfo &outfix) { return n; } +static +void remapReportsToPrograms(MpvProto &mpv, const ReportManager &rm) { + for (auto &puff : mpv.puffettes) { + puff.report = rm.getProgramOffset(puff.report); + } + for (auto &puff : mpv.triggered_puffettes) { + puff.report = rm.getProgramOffset(puff.report); + } +} + static void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, bool *mpv_as_outfix) { @@ -1349,7 +1374,9 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, } auto *mpv = mpv_outfix->mpv(); - auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes); + auto tmp = *mpv; // copy + remapReportsToPrograms(tmp, tbi.rm); + auto nfa = mpvCompile(tmp.puffettes, tmp.triggered_puffettes); assert(nfa); if (!nfa) { throw CompileError("Unable to generate bytecode."); @@ -4000,6 +4027,8 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { auto boundary_out = makeBoundaryPrograms(*this, bc, boundary, dboundary); + u32 reportProgramOffset = buildReportPrograms(*this, bc); + // Build NFAs set no_retrigger_queues; bool mpv_as_outfix; @@ -4045,8 +4074,6 @@ aligned_unique_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { u32 eodIterOffset; tie(eodIterProgramOffset, eodIterOffset) = buildEodAnchorProgram(*this, bc); - u32 reportProgramOffset = buildReportPrograms(*this, bc); - vector activeLeftIter; buildActiveLeftIter(leftInfoTable, activeLeftIter); diff --git a/src/smallwrite/smallwrite_build.cpp b/src/smallwrite/smallwrite_build.cpp index dfefe5e8..a3fe43d4 100644 --- a/src/smallwrite/smallwrite_build.cpp +++ b/src/smallwrite/smallwrite_build.cpp @@ -376,6 +376,8 @@ SmallWriteBuildImpl::build(u32 roseQuality) { DEBUG_PRINTF("building rdfa %p\n", rdfa.get()); + remapReportsToPrograms(*rdfa, rm); + u32 start_offset; u32 small_region; auto nfa = prepEngine(*rdfa, roseQuality, cc, &start_offset, &small_region); diff --git a/unit/internal/limex_nfa.cpp b/unit/internal/limex_nfa.cpp index 6d77fffe..91ab09db 100644 --- a/unit/internal/limex_nfa.cpp +++ b/unit/internal/limex_nfa.cpp @@ -46,7 +46,9 @@ using namespace std; using namespace testing; using namespace ue2; -static const string SCAN_DATA = "___foo______\n___foofoo_foo_^^^^^^^^^^^^^^^^^^^^^^__bar_bar______0_______z_____bar"; +static const string SCAN_DATA = "___foo______\n___foofoo_foo_^^^^^^^^^^^^^^^^^^" + "^^^^__bar_bar______0_______z_____bar"; +static const u32 MATCH_REPORT = 1024; static int onMatch(u64a, ReportID, void *ctx) { @@ -75,6 +77,8 @@ protected: unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + rm.setProgramOffset(0, MATCH_REPORT); + const map fixed_depth_tops; const map>> triggers; bool compress_state = false; @@ -223,7 +227,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) { char rv = nfaQueueExecToMatch(nfa.get(), &q, end); ASSERT_EQ(MO_MATCHES_PENDING, rv); ASSERT_EQ(0, matches); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); nfaReportCurrentMatches(nfa.get(), &q); ASSERT_EQ(1, matches); @@ -232,7 +236,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) { rv = nfaQueueExecToMatch(nfa.get(), &q, end); ASSERT_EQ(MO_MATCHES_PENDING, rv); ASSERT_EQ(1, matches); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); nfaReportCurrentMatches(nfa.get(), &q); ASSERT_EQ(2, matches); @@ -241,7 +245,7 @@ TEST_P(LimExModelTest, QueueExecToMatch) { rv = nfaQueueExecToMatch(nfa.get(), &q, end); ASSERT_EQ(MO_MATCHES_PENDING, rv); ASSERT_EQ(2, matches); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); nfaReportCurrentMatches(nfa.get(), &q); ASSERT_EQ(3, matches); @@ -267,10 +271,10 @@ TEST_P(LimExModelTest, QueueExecRose) { pushQueue(&q, MQE_TOP, 0); pushQueue(&q, MQE_END, end); - char rv = nfaQueueExecRose(nfa.get(), &q, 0 /* report id */); + char rv = nfaQueueExecRose(nfa.get(), &q, MATCH_REPORT); ASSERT_EQ(MO_MATCHES_PENDING, rv); pushQueue(&q, MQE_START, end); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); } TEST_P(LimExModelTest, CheckFinalState) { @@ -367,6 +371,8 @@ protected: unique_ptr g = buildWrapper(rm, cc, parsed); ASSERT_TRUE(g != nullptr); + rm.setProgramOffset(0, MATCH_REPORT); + const map fixed_depth_tops; const map>> triggers; bool compress_state = false;