vectorscan/src/rose/rose_graph.h
Xiang Wang 9087d59be5 tamarama: add container engine for exclusive nfas
Add the new Tamarama engine that acts as a container for infix/suffix
engines that can be proven to run exclusively of one another.

This reduces stream state for pattern sets with many exclusive engines.
2016-07-08 11:01:34 +10:00

236 lines
8.0 KiB
C++

/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief BGL graph structures used internally by the Rose build process.
*
* BGL graph structures used internally by the build-time portion of Rose. The
* graph used for input is in rose_in_graph.h since it's part of the RoseBuild
* external API.
*/
#ifndef ROSE_GRAPH_H
#define ROSE_GRAPH_H
#include "ue2common.h"
#include "rose_build.h"
#include "rose_internal.h"
#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
#include "util/charreach.h"
#include "util/depth.h"
#include "util/ue2_containers.h"
#include <memory>
#include <set>
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/graph_traits.hpp>
namespace ue2 {
struct CastleProto;
struct raw_dfa;
struct raw_som_dfa;
struct TamaProto;
/** \brief Table type for a literal. */
enum rose_literal_table {
ROSE_ANCHORED, //!< literals anchored to start
ROSE_FLOATING, //!< general floating literals
ROSE_EOD_ANCHORED, //!< literals that match near EOD
ROSE_ANCHORED_SMALL_BLOCK, //!< anchored literals for small block table
ROSE_EVENT //!< "literal-like" events, such as EOD
};
/** \brief Edge history types. */
enum RoseRoleHistory {
ROSE_ROLE_HISTORY_NONE, //!< no special history
ROSE_ROLE_HISTORY_ANCH, //!< previous role is at a fixed offset
ROSE_ROLE_HISTORY_LAST_BYTE, //!< previous role can only match at EOD
ROSE_ROLE_HISTORY_INVALID //!< history not yet assigned
};
#include "util/order_check.h"
/** \brief Provides information about the (pre|in)fix engine to the left of a
* role. */
struct LeftEngInfo {
std::shared_ptr<NGHolder> graph;
std::shared_ptr<CastleProto> castle;
std::shared_ptr<raw_dfa> dfa;
std::shared_ptr<raw_som_dfa> haig;
std::shared_ptr<TamaProto> tamarama;
u32 lag = 0U;
ReportID leftfix_report = MO_INVALID_IDX;
depth dfa_min_width = 0;
depth dfa_max_width = depth::infinity();
bool operator==(const LeftEngInfo &other) const {
return other.graph == graph
&& other.castle == castle
&& other.dfa == dfa
&& other.haig == haig
&& other.tamarama == tamarama
&& other.lag == lag
&& other.leftfix_report == leftfix_report;
}
bool operator!=(const LeftEngInfo &other) const {
return !(*this == other);
}
bool operator<(const LeftEngInfo &b) const {
const LeftEngInfo &a = *this;
ORDER_CHECK(graph);
ORDER_CHECK(castle);
ORDER_CHECK(dfa);
ORDER_CHECK(haig);
ORDER_CHECK(tamarama);
ORDER_CHECK(lag);
ORDER_CHECK(leftfix_report);
return false;
}
void reset(void);
operator bool() const;
bool tracksSom() const { return !!haig; }
};
/** \brief Provides information about the suffix engine to the right of a
* role. */
struct RoseSuffixInfo {
u32 top = 0;
std::shared_ptr<NGHolder> graph; /* if triggers a trailing nfa */
std::shared_ptr<CastleProto> castle;
std::shared_ptr<raw_som_dfa> haig;
std::shared_ptr<raw_dfa> rdfa;
std::shared_ptr<TamaProto> tamarama;
depth dfa_min_width = 0;
depth dfa_max_width = depth::infinity();
bool operator==(const RoseSuffixInfo &b) const;
bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); }
bool operator<(const RoseSuffixInfo &b) const;
void reset(void);
operator bool() const { return graph || castle || haig || rdfa || tamarama; }
};
/** \brief Properties attached to each Rose graph vertex. */
struct RoseVertexProps {
/** \brief Unique dense vertex index. Used for BGL algorithms. */
size_t idx = ~size_t{0};
/** \brief IDs of literals in the Rose literal map. */
flat_set<u32> literals;
/**
* \brief If true, this vertex is a virtual vertex for firing reports at
* EOD. These vertices must have reports and have no associated literals.
*/
bool eod_accept = false;
/** \brief Report IDs to fire. */
flat_set<ReportID> reports;
/** \brief Bitmask of groups that this role sets. */
rose_group groups = 0;
/** \brief Minimum role (end of literal) offset depth in bytes. */
u32 min_offset = ~u32{0};
/** \brief Maximum role (end of literal) offset depth in bytes */
u32 max_offset = 0;
/** \brief SOM for the role is offset from end match offset */
u32 som_adjust = 0;
/** \brief Prefix/infix engine to the left of this role. */
LeftEngInfo left;
/**
* \brief Suffix engine to the right of this role.
*
* Note: information about triggered infixes is associated with the left of
* the destination role.
*/
RoseSuffixInfo suffix;
bool isBoring(void) const;
bool fixedOffset(void) const;
};
/** \brief Properties attached to each Rose graph edge. */
/* bounds are distance from end of prev to start of the next */
struct RoseEdgeProps {
/**
* \brief Minimum distance from the end of the source role's match to the
* start of the target role's match.
*
* Not used when the target has a left engine (as the engine represents
* bounds).
*/
u32 minBound = 0;
/**
* \brief Maximum distance from the end of the source role's match to the
* start of the target role's match.
*
* Not used when the target has a left engine (as the engine represents
* bounds).
*/
u32 maxBound = 0;
/** \brief Which top to trigger on the target role's left engine. */
u32 rose_top = 0;
/** \brief True if the rose_top can clear all other previous tops. */
u8 rose_cancel_prev_top = false;
/** \brief History required by this edge. */
RoseRoleHistory history = ROSE_ROLE_HISTORY_INVALID;
};
bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b);
/**
* \brief Core Rose graph structure.
*
* Note that we use the list selector for the edge and vertex lists: we depend
* on insertion order for determinism, so we must use these containers.
*/
using RoseGraph = boost::adjacency_list<boost::listS, // out edge list per vertex
boost::listS, // vertex list
boost::bidirectionalS, // bidirectional
RoseVertexProps, // bundled vertex properties
RoseEdgeProps, // bundled edge properties
boost::listS // graph edge list
>;
using RoseVertex = RoseGraph::vertex_descriptor;
using RoseEdge = RoseGraph::edge_descriptor;
} // namespace ue2
#endif // ROSE_GRAPH_H