/* * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef ROSE_BUILD_IMPL_H_17E20A3C6935D6 #define ROSE_BUILD_IMPL_H_17E20A3C6935D6 #include "rose_build.h" #include "rose_build_util.h" #include "rose_graph.h" #include "nfa/mpvcompile.h" #include "nfa/goughcompile.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_revacc.h" #include "util/alloc.h" #include "util/order_check.h" #include "util/queue_index_factory.h" #include "util/ue2_containers.h" #include #include #include #include #include #include #include struct RoseEngine; namespace ue2 { #define ROSE_GROUPS_MAX 64 struct BoundaryReports; struct CastleProto; struct CompileContext; class ReportManager; class SomSlotManager; struct suffix_id { suffix_id(const RoseSuffixInfo &in) : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()), h(in.haig.get()), dfa_min_width(in.dfa_min_width), dfa_max_width(in.dfa_max_width) { assert(!g || g->kind == NFA_SUFFIX); } bool operator==(const suffix_id &b) const { bool rv = g == b.g && c == b.c && h == b.h && d == b.d; assert(!rv || dfa_min_width == b.dfa_min_width); assert(!rv || dfa_max_width == b.dfa_max_width); return rv; } bool operator!=(const suffix_id &b) const { return !(*this == b); } bool operator<(const suffix_id &b) const { const suffix_id &a = *this; ORDER_CHECK(g); ORDER_CHECK(c); ORDER_CHECK(d); ORDER_CHECK(h); return false; } NGHolder *graph() { if (!d && !h) { assert(dfa_min_width == depth(0)); assert(dfa_max_width == depth::infinity()); } return g; } const NGHolder *graph() const { if (!d && !h) { assert(dfa_min_width == depth(0)); assert(dfa_max_width == depth::infinity()); } return g; } CastleProto *castle() { if (!d && !h) { assert(dfa_min_width == depth(0)); assert(dfa_max_width == depth::infinity()); } return c; } const CastleProto *castle() const { if (!d && !h) { assert(dfa_min_width == depth(0)); assert(dfa_max_width == depth::infinity()); } return c; } raw_som_dfa *haig() { return h; } const raw_som_dfa *haig() const { return h; } raw_dfa *dfa() { return d; } const raw_dfa *dfa() const { return d; } size_t hash() const; private: NGHolder *g; CastleProto *c; raw_dfa *d; raw_som_dfa *h; depth dfa_min_width; depth dfa_max_width; friend depth findMinWidth(const suffix_id &s); friend depth findMaxWidth(const suffix_id &s); friend depth findMinWidth(const suffix_id &s, u32 top); friend depth findMaxWidth(const suffix_id &s, u32 top); }; std::set all_reports(const suffix_id &s); std::set all_tops(const suffix_id &s); bool has_eod_accepts(const suffix_id &s); bool has_non_eod_accepts(const suffix_id &s); depth findMinWidth(const suffix_id &s); depth findMaxWidth(const suffix_id &s); depth findMinWidth(const suffix_id &s, u32 top); depth findMaxWidth(const suffix_id &s, u32 top); size_t hash_value(const suffix_id &s); /** \brief represents an engine to the left of a rose role */ struct left_id { left_id(const LeftEngInfo &in) : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), h(in.haig.get()), dfa_min_width(in.dfa_min_width), dfa_max_width(in.dfa_max_width) { assert(!g || !has_managed_reports(*g)); } bool operator==(const left_id &b) const { bool rv = g == b.g && c == b.c && h == b.h && d == b.d; assert(!rv || dfa_min_width == b.dfa_min_width); assert(!rv || dfa_max_width == b.dfa_max_width); return rv; } bool operator!=(const left_id &b) const { return !(*this == b); } bool operator<(const left_id &b) const { const left_id &a = *this; ORDER_CHECK(g); ORDER_CHECK(c); ORDER_CHECK(d); ORDER_CHECK(h); return false; } NGHolder *graph() { if (!d && !h) { assert(dfa_min_width == depth(0)); assert(dfa_max_width == depth::infinity()); } return g; } const NGHolder *graph() const { if (!d && !h) { assert(dfa_min_width == depth(0)); assert(dfa_max_width == depth::infinity()); } return g; } CastleProto *castle() { if (!d && !h) { assert(dfa_min_width == depth(0)); assert(dfa_max_width == depth::infinity()); } return c; } const CastleProto *castle() const { if (!d && !h) { assert(dfa_min_width == depth(0)); assert(dfa_max_width == depth::infinity()); } return c; } raw_som_dfa *haig() { return h; } const raw_som_dfa *haig() const { return h; } raw_dfa *dfa() { return d; } const raw_dfa *dfa() const { return d; } size_t hash() const; private: NGHolder *g; CastleProto *c; raw_dfa *d; raw_som_dfa *h; depth dfa_min_width; depth dfa_max_width; friend bool isAnchored(const left_id &r); friend depth findMinWidth(const left_id &r); friend depth findMaxWidth(const left_id &r); }; std::set all_tops(const left_id &r); bool isAnchored(const left_id &r); depth findMinWidth(const left_id &r); depth findMaxWidth(const left_id &r); u32 num_tops(const left_id &r); size_t hash_value(const left_id &r); struct rose_literal_info { ue2::flat_set delayed_ids; ue2::flat_set vertices; rose_group group_mask = 0; u32 undelayed_id = MO_INVALID_IDX; u32 final_id = MO_INVALID_IDX; /* id reported by fdr */ bool squash_group = false; bool requires_explode = false; bool requires_benefits = false; }; /** * \brief Main literal struct used at Rose build time. Numeric literal IDs * used at build time point at these (via the RoseBuildImpl::literals map). */ struct rose_literal_id { rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in, u32 delay_in) : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {} rose_literal_id(const ue2_literal &s_in, const std::vector &msk_in, const std::vector &cmp_in, rose_literal_table table_in, u32 delay_in); ue2_literal s; std::vector msk; std::vector cmp; rose_literal_table table; u32 delay; u32 distinctiveness; size_t elength(void) const { return s.length() + delay; } }; static inline bool operator<(const rose_literal_id &a, const rose_literal_id &b) { ORDER_CHECK(distinctiveness); ORDER_CHECK(table); ORDER_CHECK(s); ORDER_CHECK(delay); ORDER_CHECK(msk); ORDER_CHECK(cmp); return 0; } // Literals are stored in a map from (string, nocase) -> ID typedef boost::bimap RoseLiteralMap; struct simple_anchored_info { simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit) : min_bound(min_b), max_bound(max_b), literal(lit) {} u32 min_bound; /**< min number of characters required before literal can * start matching */ u32 max_bound; /**< max number of characters allowed before literal can * start matching */ ue2_literal literal; }; static really_inline bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) { ORDER_CHECK(min_bound); ORDER_CHECK(max_bound); ORDER_CHECK(literal); return 0; } struct MpvProto { bool empty() const { return puffettes.empty() && triggered_puffettes.empty(); } void reset() { puffettes.clear(); triggered_puffettes.clear(); } std::vector puffettes; std::vector triggered_puffettes; }; struct OutfixInfo { template explicit OutfixInfo(std::unique_ptr x) : proto(std::move(x)) {} explicit OutfixInfo(MpvProto mpv) : proto(std::move(mpv)) {} u32 get_queue(QueueIndexFactory &qif); u32 get_queue() const { assert(queue != ~0U); return queue; } bool is_nonempty_mpv() const { auto *mpv = boost::get(&proto); return mpv && !mpv->empty(); } bool is_dead() const { auto *mpv = boost::get(&proto); if (mpv) { return mpv->empty(); } return boost::get(&proto) != nullptr; } void clear() { proto = boost::blank(); } // Convenience accessor functions. NGHolder *holder() { auto *up = boost::get>(&proto); return up ? up->get() : nullptr; } raw_dfa *rdfa() { auto *up = boost::get>(&proto); return up ? up->get() : nullptr; } raw_som_dfa *haig() { auto *up = boost::get>(&proto); return up ? up->get() : nullptr; } MpvProto *mpv() { return boost::get(&proto); } // Convenience const accessor functions. const NGHolder *holder() const { auto *up = boost::get>(&proto); return up ? up->get() : nullptr; } const raw_dfa *rdfa() const { auto *up = boost::get>(&proto); return up ? up->get() : nullptr; } const raw_som_dfa *haig() const { auto *up = boost::get>(&proto); return up ? up->get() : nullptr; } const MpvProto *mpv() const { return boost::get(&proto); } /** * \brief Variant wrapping the various engine types. If this is * boost::blank, it means that this outfix is unused (dead). */ boost::variant< boost::blank, std::unique_ptr, std::unique_ptr, std::unique_ptr, MpvProto> proto = boost::blank(); RevAccInfo rev_info; u32 maxBAWidth = 0; //!< max bi-anchored width depth minWidth = depth::infinity(); depth maxWidth = 0; u64a maxOffset = 0; bool in_sbmatcher = false; //!< handled by small-block matcher. private: u32 queue = ~0U; }; std::set all_reports(const OutfixInfo &outfix); // Concrete impl class class RoseBuildImpl : public RoseBuild { public: RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, const CompileContext &cc, const BoundaryReports &boundary); ~RoseBuildImpl() override; // Adds a single literal. void add(bool anchored, bool eod, const ue2_literal &lit, const ue2::flat_set &ids) override; bool addRose(const RoseInGraph &ig, bool prefilter, bool finalChance = false) override; bool addSombeRose(const RoseInGraph &ig) override; bool addOutfix(const NGHolder &h) override; bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override; bool addOutfix(const raw_puff &rp) override; bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override; // Returns true if we were able to add it as a mask bool add(bool anchored, const std::vector &mask, const ue2::flat_set &reports) override; bool addAnchoredAcyclic(const NGHolder &graph) override; bool validateMask(const std::vector &mask, const ue2::flat_set &reports, bool anchored, bool eod) const override; void addMask(const std::vector &mask, const ue2::flat_set &reports, bool anchored, bool eod) override; // Construct a runtime implementation. aligned_unique_ptr buildRose(u32 minWidth) override; aligned_unique_ptr buildFinalEngine(u32 minWidth); void setSom() override { hasSom = true; } std::unique_ptr generateDedupeAux() const override; // Find the maximum bound on the edges to this vertex's successors. u32 calcSuccMaxBound(RoseVertex u) const; /* Returns the ID of the given literal in the literal map, adding it if * necessary. */ u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table); // Variant with msk/cmp. u32 getLiteralId(const ue2_literal &s, const std::vector &msk, const std::vector &cmp, u32 delay, rose_literal_table table); bool hasLiteral(const ue2_literal &s, rose_literal_table table) const; u32 getNewLiteralId(void); void removeVertices(const std::vector &dead); // Is the Rose anchored? bool hasNoFloatingRoots() const; RoseVertex cloneVertex(RoseVertex v); u32 calcHistoryRequired() const; rose_group getInitialGroups() const; rose_group getSuccGroups(RoseVertex start) const; rose_group getGroups(RoseVertex v) const; bool hasDelayedLiteral(RoseVertex v) const; bool hasDelayPred(RoseVertex v) const; bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const; bool hasAnchoredTablePred(RoseVertex v) const; // Is the given vertex a successor of either root or anchored_root? bool isRootSuccessor(const RoseVertex &v) const; /* Is the given vertex a successor of something other than root or * anchored_root? */ bool isNonRootSuccessor(const RoseVertex &v) const; bool isDirectReport(u32 id) const; bool isDelayed(u32 id) const; bool hasFinalId(u32 id) const; bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored * table */ bool isFloating(RoseVertex v) const; /* true iff has literal in floating * table */ bool isInETable(RoseVertex v) const; /* true iff has literal in eod * table */ size_t maxLiteralLen(RoseVertex v) const; size_t minLiteralLen(RoseVertex v) const; // max overlap considered for every pair (ulit, vlit). size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const; void renumberVertices(void); bool isPseudoStar(const RoseEdge &e) const; bool isPseudoStarOrFirstOnly(const RoseEdge &e) const; bool hasOnlyPseudoStarInEdges(RoseVertex v) const; bool isAnyStart(const RoseVertex &v) const { return v == root || v == anchored_root; } bool isVirtualVertex(const RoseVertex &v) const { return g[v].eod_accept || isAnyStart(v); } void handleMixedSensitivity(void); void findTransientLeftfixes(void); const CompileContext &cc; RoseGraph g; const RoseVertex root; const RoseVertex anchored_root; RoseLiteralMap literals; std::map ghost; size_t vertexIndex; ReportID getNewNfaReport() override { return next_nfa_report++; } std::deque literal_info; u32 delay_base_id; bool hasSom; //!< at least one pattern requires SOM. std::map>> anchored_nfas; std::map> anchored_simple; std::map > group_to_literal; u32 group_weak_end; u32 group_end; u32 anchored_base_id; u32 ematcher_region_size; /**< number of bytes the eod table runs over */ /** \brief Mapping from leftfix to queue ID (used in dump code). */ unordered_map leftfix_queue_map; /** \brief Mapping from suffix to queue ID (used in dump code). */ unordered_map suffix_queue_map; /** \brief Mapping from anchored literal ID to the original literal suffix * present when the literal was added to the literal matcher. Used for * overlap calculation in history assignment. */ std::map anchoredLitSuffix; std::map > final_id_to_literal; /* final literal id to * literal id */ unordered_set transient; unordered_map rose_squash_masks; std::vector outfixes; /** \brief MPV outfix entry. Null if not used, and moved into the outfixes * list before we start building the bytecode (at which point it is set to * null again). */ std::unique_ptr mpv_outfix = nullptr; u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX. u32 max_rose_anchored_floating_overlap; QueueIndexFactory qif; ReportManager &rm; SomSlotManager &ssm; const BoundaryReports &boundary; private: ReportID next_nfa_report; }; // Free functions, in rose_build_misc.cpp bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v); size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); ue2_literal findNonOverlappingTail(const std::set &lits, const ue2_literal &s); void setReportId(NGHolder &g, ReportID id); #ifndef NDEBUG bool roseHasTops(const RoseGraph &g, RoseVertex v); #endif u64a findMaxOffset(const std::set &reports, const ReportManager &rm); // Function that operates on a msk/cmp pair and a literal, as used in // hwlmLiteral, and zeroes msk elements that don't add any power to the // literal. void normaliseLiteralMask(const ue2_literal &s, std::vector &msk, std::vector &cmp); #ifndef NDEBUG bool canImplementGraphs(const RoseBuildImpl &tbi); #endif } // namespace ue2 #endif /* ROSE_BUILD_IMPL_H_17E20A3C6935D6 */