From c101beb541a30d04148abb37128aa88852a486aa Mon Sep 17 00:00:00 2001 From: Justin Viiret Date: Tue, 19 Apr 2016 14:42:45 +1000 Subject: [PATCH] castle, lbr: native report remap --- src/nfa/castle.c | 3 ++- src/nfa/castlecompile.cpp | 40 +++++++++----------------------- src/nfa/castlecompile.h | 4 +--- src/nfagraph/ng_lbr.cpp | 29 ++++++++++++++++++----- src/nfagraph/ng_lbr.h | 14 +++++------ src/rose/rose_build_bytecode.cpp | 18 ++++++-------- unit/internal/lbr.cpp | 13 +++++++---- 7 files changed, 59 insertions(+), 62 deletions(-) diff --git a/src/nfa/castle.c b/src/nfa/castle.c index 5558381d..13a44a97 100644 --- a/src/nfa/castle.c +++ b/src/nfa/castle.c @@ -96,7 +96,8 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q, repeatHasMatch(info, rctrl, rstate, offset); DEBUG_PRINTF("repeatHasMatch returned %d\n", match); if (match == REPEAT_MATCH) { - DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, subIdx); + DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset, + subIdx, sub->report); if (q->cb(offset, sub->report, q->context) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } diff --git a/src/nfa/castlecompile.cpp b/src/nfa/castlecompile.cpp index c05cd959..4bddf767 100644 --- a/src/nfa/castlecompile.cpp +++ b/src/nfa/castlecompile.cpp @@ -344,11 +344,14 @@ void buildSubcastles(const CastleProto &proto, vector &subs, u32 &scratchStateSize, u32 &streamStateSize, u32 &tableSize, vector &tables, u32 &sparseRepeats, const ExclusiveInfo &exclusiveInfo, - vector &may_stale) { + vector &may_stale, const ReportManager &rm) { + const bool remap_reports = has_managed_reports(proto.kind); + u32 i = 0; const auto &groupId = exclusiveInfo.groupId; const auto &numGroups = exclusiveInfo.numGroups; vector maxStreamSize(numGroups, 0); + for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); it != ite; ++it, ++i) { const PureRepeat &pr = it->second; @@ -400,7 +403,9 @@ void buildSubcastles(const CastleProto &proto, vector &subs, info.encodingSize = rsi.encodingSize; info.patchesOffset = rsi.patchesOffset; - sub.report = *pr.reports.begin(); + assert(pr.reports.size() == 1); + ReportID id = *pr.reports.begin(); + sub.report = remap_reports ? rm.getProgramOffset(id) : id; if (rtype == REPEAT_SPARSE_OPTIMAL_P) { for (u32 j = 0; j < rsi.patchSize; j++) { @@ -435,7 +440,7 @@ void buildSubcastles(const CastleProto &proto, vector &subs, aligned_unique_ptr buildCastle(const CastleProto &proto, const map>> &triggers, - const CompileContext &cc) { + const CompileContext &cc, const ReportManager &rm) { assert(cc.grey.allowCastle); const size_t numRepeats = proto.repeats.size(); @@ -548,7 +553,7 @@ buildCastle(const CastleProto &proto, buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, scratchStateSize, streamStateSize, tableSize, - tables, sparseRepeats, exclusiveInfo, may_stale); + tables, sparseRepeats, exclusiveInfo, may_stale, rm); DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); vector stale_iter; @@ -816,6 +821,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, ReportID report2) { assert(!c1.repeats.empty()); assert(!c2.repeats.empty()); + assert(c1.kind == c2.kind); if (c1.reach() != c2.reach()) { DEBUG_PRINTF("different reach\n"); @@ -862,6 +868,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, bool is_equal(const CastleProto &c1, const CastleProto &c2) { assert(!c1.repeats.empty()); assert(!c2.repeats.empty()); + assert(c1.kind == c2.kind); if (c1.reach() != c2.reach()) { DEBUG_PRINTF("different reach\n"); @@ -992,29 +999,4 @@ unique_ptr makeHolder(const CastleProto &proto, return g; } -static -void remapReportsToPrograms(PureRepeat &pr, const ReportManager &rm) { - if (pr.reports.empty()) { - return; - } - auto old_reports = pr.reports; - pr.reports.clear(); - for (const auto &r : old_reports) { - pr.reports.insert(rm.getProgramOffset(r)); - } -} - -void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm) { - for (auto &m : castle.repeats) { - remapReportsToPrograms(m.second, rm); - } - - auto old_report_map = castle.report_map; - castle.report_map.clear(); - for (auto &m : old_report_map) { - u32 program = rm.getProgramOffset(m.first); - castle.report_map[program].insert(begin(m.second), end(m.second)); - } -} - } // namespace ue2 diff --git a/src/nfa/castlecompile.h b/src/nfa/castlecompile.h index 1019fb90..938e57c4 100644 --- a/src/nfa/castlecompile.h +++ b/src/nfa/castlecompile.h @@ -123,7 +123,7 @@ void remapCastleTops(CastleProto &proto, std::map &top_map); ue2::aligned_unique_ptr buildCastle(const CastleProto &proto, const std::map>> &triggers, - const CompileContext &cc); + const CompileContext &cc, const ReportManager &rm); /** * \brief Merge two CastleProto prototypes together, if possible. @@ -162,8 +162,6 @@ bool requiresDedupe(const CastleProto &proto, std::unique_ptr makeHolder(const CastleProto &castle, const CompileContext &cc); -void remapReportsToPrograms(CastleProto &castle, const ReportManager &rm); - } // namespace ue2 #endif // NFA_CASTLECOMPILE_H diff --git a/src/nfagraph/ng_lbr.cpp b/src/nfagraph/ng_lbr.cpp index b9cacaa7..d7183817 100644 --- a/src/nfagraph/ng_lbr.cpp +++ b/src/nfagraph/ng_lbr.cpp @@ -36,17 +36,19 @@ #include "ng_holder.h" #include "ng_repeat.h" #include "ng_reports.h" -#include "nfa/shufticompile.h" -#include "nfa/trufflecompile.h" +#include "nfa/castlecompile.h" #include "nfa/lbr_internal.h" #include "nfa/nfa_internal.h" #include "nfa/repeatcompile.h" +#include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" #include "util/alloc.h" #include "util/bitutils.h" // for lg2 #include "util/compile_context.h" #include "util/container.h" #include "util/depth.h" #include "util/dump_charclass.h" +#include "util/report_manager.h" #include "util/verify_types.h" using namespace std; @@ -294,13 +296,19 @@ aligned_unique_ptr constructLBR(const CharReach &cr, return nfa; } -aligned_unique_ptr constructLBR(const PureRepeat &repeat, +aligned_unique_ptr constructLBR(const CastleProto &proto, const vector> &triggers, - const CompileContext &cc) { + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } + if (proto.repeats.size() != 1) { + return nullptr; + } + + const PureRepeat &repeat = proto.repeats.begin()->second; assert(!repeat.reach.none()); if (repeat.reports.size() != 1) { @@ -317,6 +325,9 @@ aligned_unique_ptr constructLBR(const PureRepeat &repeat, } ReportID report = *repeat.reports.begin(); + if (has_managed_reports(proto.kind)) { + report = rm.getProgramOffset(report); + } DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str()); return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max, @@ -326,7 +337,8 @@ aligned_unique_ptr constructLBR(const PureRepeat &repeat, /** \brief Construct an LBR engine from the given graph \p g. */ aligned_unique_ptr constructLBR(const NGHolder &g, const vector> &triggers, - const CompileContext &cc) { + const CompileContext &cc, + const ReportManager &rm) { if (!cc.grey.allowLbr) { return nullptr; } @@ -335,8 +347,13 @@ aligned_unique_ptr constructLBR(const NGHolder &g, if (!isPureRepeat(g, repeat)) { return nullptr; } + if (repeat.reports.size() != 1) { + DEBUG_PRINTF("too many reports\n"); + return nullptr; + } - return constructLBR(repeat, triggers, cc); + CastleProto proto(g.kind, repeat); + return constructLBR(proto, triggers, cc, rm); } /** \brief True if graph \p g could be turned into an LBR engine. */ diff --git a/src/nfagraph/ng_lbr.h b/src/nfagraph/ng_lbr.h index fad079ae..99cb0fcb 100644 --- a/src/nfagraph/ng_lbr.h +++ b/src/nfagraph/ng_lbr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,22 +46,22 @@ namespace ue2 { class CharReach; class NGHolder; class ReportManager; +struct CastleProto; struct CompileContext; -struct DepthMinMax; struct Grey; -struct PureRepeat; /** \brief Construct an LBR engine from the given graph \p g. */ aligned_unique_ptr constructLBR(const NGHolder &g, const std::vector> &triggers, - const CompileContext &cc); + const CompileContext &cc, const ReportManager &rm); -/** \brief Construct an LBR engine from the given PureRepeat. */ +/** \brief Construct an LBR engine from the given CastleProto, which should + * contain only one repeat. */ aligned_unique_ptr -constructLBR(const PureRepeat &repeat, +constructLBR(const CastleProto &proto, const std::vector> &triggers, - const CompileContext &cc); + const CompileContext &cc, const ReportManager &rm); /** \brief True if graph \p g could be turned into an LBR engine. */ bool isLBR(const NGHolder &g, const Grey &grey); diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 5db24b3b..bcf91fea 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -866,14 +866,14 @@ static aligned_unique_ptr buildRepeatEngine(const CastleProto &proto, const map>> &triggers, - const CompileContext &cc) { + const CompileContext &cc, const ReportManager &rm) { // If we only have one repeat, the LBR should always be the best possible // implementation. if (proto.repeats.size() == 1 && cc.grey.allowLbr) { - return constructLBR(proto.repeats.begin()->second, triggers.at(0), cc); + return constructLBR(proto, triggers.at(0), cc, rm); } - aligned_unique_ptr castle_nfa = buildCastle(proto, triggers, cc); + auto castle_nfa = buildCastle(proto, triggers, cc, rm); assert(castle_nfa); // Should always be constructible. return castle_nfa; } @@ -886,9 +886,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, const map>> &triggers, suffix_id suff, const CompileContext &cc) { if (suff.castle()) { - auto remapped_castle = *suff.castle(); - remapReportsToPrograms(remapped_castle, rm); - auto n = buildRepeatEngine(remapped_castle, triggers, cc); + auto n = buildRepeatEngine(*suff.castle(), triggers, cc, rm); assert(n); return n; } @@ -913,9 +911,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, // Take a shot at the LBR engine. if (oneTop) { - auto remapped_holder = cloneHolder(holder); - remapReportsToPrograms(*remapped_holder, rm); - auto lbr = constructLBR(*remapped_holder, triggers.at(0), cc); + auto lbr = constructLBR(holder, triggers.at(0), cc, rm); if (lbr) { return lbr; } @@ -1044,7 +1040,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, assert(!is_prefix); map > > triggers; findTriggerSequences(tbi, infixTriggers.at(left), &triggers); - n = buildRepeatEngine(*left.castle(), triggers, cc); + n = buildRepeatEngine(*left.castle(), triggers, cc, rm); assert(n); return n; // Castles/LBRs are always best! } @@ -1064,7 +1060,7 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, map > > triggers; findTriggerSequences(tbi, infixTriggers.at(left), &triggers); assert(contains(triggers, 0)); // single top - n = constructLBR(*left.graph(), triggers[0], cc); + n = constructLBR(*left.graph(), triggers[0], cc, rm); } if (!n && left.graph()) { diff --git a/unit/internal/lbr.cpp b/unit/internal/lbr.cpp index bb180e5f..2bb359df 100644 --- a/unit/internal/lbr.cpp +++ b/unit/internal/lbr.cpp @@ -49,6 +49,8 @@ using namespace std; using namespace testing; using namespace ue2; +static constexpr u32 MATCH_REPORT = 1024; + struct LbrTestParams { CharReach reach; u32 min; @@ -98,10 +100,11 @@ protected: ASSERT_TRUE(isLBR(*g, grey)); - vector > triggers; - triggers.push_back(vector()); - triggers.back().push_back(CharReach::dot()); /* lbr triggered by . */ - nfa = constructLBR(*g, triggers, cc); + rm.setProgramOffset(0, MATCH_REPORT); + + /* LBR triggered by dot */ + vector> triggers = {{CharReach::dot()}}; + nfa = constructLBR(*g, triggers, cc, rm); ASSERT_TRUE(nfa != nullptr); full_state = aligned_zmalloc_unique(nfa->scratchStateSize); @@ -247,7 +250,7 @@ TEST_P(LbrTest, QueueExecToMatch) { char rv = nfaQueueExecToMatch(nfa.get(), &q, end); ASSERT_EQ(MO_MATCHES_PENDING, rv); ASSERT_EQ(0, matches); - ASSERT_NE(0, nfaInAcceptState(nfa.get(), 0, &q)); + ASSERT_NE(0, nfaInAcceptState(nfa.get(), MATCH_REPORT, &q)); nfaReportCurrentMatches(nfa.get(), &q); ASSERT_EQ(1, matches); }