mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-11-19 02:30:35 +03:00
Initial commit of Hyperscan
This commit is contained in:
281
src/nfa/goughcompile_accel.cpp
Normal file
281
src/nfa/goughcompile_accel.cpp
Normal file
@@ -0,0 +1,281 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "goughcompile_internal.h"
|
||||
#include "gough_internal.h"
|
||||
#include "grey.h"
|
||||
#include "mcclellancompile.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
template<typename Graph>
|
||||
void add_edge_if_not_selfloop(const typename Graph::vertex_descriptor &u,
|
||||
const typename Graph::vertex_descriptor &v,
|
||||
Graph &g) {
|
||||
if (u != v) {
|
||||
add_edge(u, v, g);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool can_accel_over_selfloop(const GoughVertexProps &vp, const GoughEdge &e,
|
||||
const GoughEdgeProps &ep, u32 *margin) {
|
||||
if (vp.vars.empty() && ep.vars.empty()) {
|
||||
/* if we update no som information, then it is trivial to accelerate */
|
||||
*margin = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* if the effect of running a self loop stabilises after a small number of
|
||||
* iterations, it is possible to accelerate over the state and only then run
|
||||
* the block N times. To model this we create a graph which shows how the
|
||||
* value for a variable at the end of a self loop block is related to values
|
||||
* at the start */
|
||||
|
||||
typedef boost::adjacency_list<boost::vecS, boost::vecS,
|
||||
boost::bidirectionalS> basic_graph;
|
||||
typedef basic_graph::vertex_descriptor basic_vertex;
|
||||
basic_graph bg;
|
||||
|
||||
map<const GoughSSAVar *, basic_vertex> verts;
|
||||
|
||||
/* create verts */
|
||||
for (const auto &var : ep.vars) {
|
||||
verts[var.get()] = add_vertex(bg);
|
||||
}
|
||||
|
||||
for (const auto &var : vp.vars) {
|
||||
verts[var.get()] = add_vertex(bg);
|
||||
}
|
||||
|
||||
/* wire edges */
|
||||
set<basic_vertex> done;
|
||||
for (const auto &var : ep.vars) {
|
||||
assert(contains(verts, var.get()));
|
||||
basic_vertex v = verts[var.get()];
|
||||
for (GoughSSAVar *pred : var->get_inputs()) {
|
||||
if (!contains(verts, pred)) {
|
||||
continue;
|
||||
}
|
||||
basic_vertex u = verts[pred];
|
||||
if (contains(done, u)) { /* u has already taken on new values this
|
||||
* iteration */
|
||||
for (auto p : inv_adjacent_vertices_range(u, bg)) {
|
||||
add_edge_if_not_selfloop(p, v, bg);
|
||||
}
|
||||
} else {
|
||||
add_edge_if_not_selfloop(u, v, bg);
|
||||
}
|
||||
}
|
||||
done.insert(v);
|
||||
}
|
||||
|
||||
for (const auto &var : vp.vars) {
|
||||
GoughSSAVar *pred = var->get_input(e);
|
||||
assert(contains(verts, var.get()));
|
||||
basic_vertex v = verts[var.get()];
|
||||
if (!contains(verts, pred)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
basic_vertex u = verts[pred];
|
||||
if (contains(done, u)) { /* u has already taken on new values this
|
||||
* iteration */
|
||||
for (auto p : inv_adjacent_vertices_range(u, bg)) {
|
||||
add_edge_if_not_selfloop(p, v, bg);
|
||||
}
|
||||
} else {
|
||||
add_edge_if_not_selfloop(u, v, bg);
|
||||
}
|
||||
/* do not add v to done as all joins happen in parallel */
|
||||
}
|
||||
|
||||
/* check for loops - non self loops may prevent settling */
|
||||
|
||||
if (!is_dag(bg)) {
|
||||
DEBUG_PRINTF("can not %u accel as large loops\n", vp.state_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
*margin = num_vertices(bg); /* TODO: be less conservative */
|
||||
|
||||
if (*margin > 50) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool verify_neighbour(const GoughGraph &g, GoughVertex u,
|
||||
const map<gough_edge_id, vector<gough_ins> > &blocks,
|
||||
const set<GoughVertex> &succs,
|
||||
const vector<gough_ins> &block_sl) {
|
||||
for (const auto &e : out_edges_range(u, g)) {
|
||||
if (!g[e].reach.any()) { /* ignore top edges */
|
||||
continue;
|
||||
}
|
||||
|
||||
GoughVertex t = target(e, g);
|
||||
if (!contains(succs, t)) { /* must be an escape string */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!contains(blocks, gough_edge_id(g, e))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (blocks.at(gough_edge_id(g, e)) != block_sl) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool verify_neighbour_no_block(const GoughGraph &g, GoughVertex u,
|
||||
const map<gough_edge_id, vector<gough_ins> > &blocks,
|
||||
const set<GoughVertex> &succs) {
|
||||
for (const auto &e : out_edges_range(u, g)) {
|
||||
if (!g[e].reach.any()) { /* ignore top edges */
|
||||
continue;
|
||||
}
|
||||
|
||||
GoughVertex t = target(e, g);
|
||||
if (!contains(succs, t)) { /* must be an escape string */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (contains(blocks, gough_edge_id(g, e))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Checks the som aspects of allowing two byte accel - it is expected that the
|
||||
* mcclellan logic will identify escape strings.
|
||||
*
|
||||
* For 2 byte acceleration to be correct we require that any non-escape sequence
|
||||
* characters xy from the accel state has the same effect as just the character
|
||||
* of y.
|
||||
*
|
||||
* The current way of ensuring this is to require:
|
||||
* (a) all edges out of the cyclic state behave identically to the cyclic self
|
||||
* loop edge
|
||||
* (b) edges out of the neighbouring state which do not correspond to escape
|
||||
* string behave identical to the cyclic state edges.
|
||||
*
|
||||
* TODO: these restrictions could be relaxed by looking at the effect on
|
||||
* relevant (live?) vars only, allowing additions to the escape string set, and
|
||||
* considering one byte escapes.
|
||||
*/
|
||||
static
|
||||
bool allow_two_byte_accel(const GoughGraph &g,
|
||||
const map<gough_edge_id, vector<gough_ins> > &blocks,
|
||||
GoughVertex v, const GoughEdge &self_loop) {
|
||||
if (contains(blocks, gough_edge_id(g, self_loop))) {
|
||||
DEBUG_PRINTF("edge plan on self loop\n");
|
||||
const auto &block_sl = blocks.at(gough_edge_id(g, self_loop));
|
||||
|
||||
set<GoughVertex> succs;
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
if (g[e].reach.none()) { /* ignore top edges */
|
||||
continue;
|
||||
}
|
||||
|
||||
gough_edge_id ged(g, e);
|
||||
if (!contains(blocks, ged) || blocks.at(ged) != block_sl) {
|
||||
DEBUG_PRINTF("different out-edge behaviour\n");
|
||||
return false;
|
||||
}
|
||||
succs.insert(target(e, g));
|
||||
}
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w != v && !verify_neighbour(g, w, blocks, succs, block_sl)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
DEBUG_PRINTF("no edge plan on self loop\n");
|
||||
set<GoughVertex> succs;
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
if (g[e].reach.none()) { /* ignore top edges */
|
||||
continue;
|
||||
}
|
||||
|
||||
gough_edge_id ged(g, e);
|
||||
if (contains(blocks, ged)) {
|
||||
DEBUG_PRINTF("different out-edge behaviour\n");
|
||||
return false;
|
||||
}
|
||||
succs.insert(target(e, g));
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w != v && !verify_neighbour_no_block(g, w, blocks, succs)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("allowing two byte accel for %u\n", g[v].state_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
void find_allowed_accel_states(const GoughGraph &g,
|
||||
const map<gough_edge_id, vector<gough_ins> > &blocks,
|
||||
map<dstate_id_t, gough_accel_state_info> *out) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
GoughEdge e;
|
||||
if (!find_normal_self_loop(v, g, &e)) {
|
||||
continue; /* not accelerable */
|
||||
}
|
||||
u32 margin = 0;
|
||||
if (!can_accel_over_selfloop(g[v], e, g[e], &margin)) {
|
||||
continue; /* not accelerable */
|
||||
}
|
||||
bool tba = allow_two_byte_accel(g, blocks, v, e);
|
||||
out->emplace(g[v].state_id, gough_accel_state_info(margin, tba));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
Reference in New Issue
Block a user