Initial commit of Hyperscan

This commit is contained in:
Matthew Barr
2015-10-20 09:13:35 +11:00
commit 904e436f11
610 changed files with 213627 additions and 0 deletions

109
src/alloc.c Normal file
View File

@@ -0,0 +1,109 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime functions for setting custom allocators.
*/
#include <stdlib.h>
#include <string.h>
#include "allocator.h"
#define default_malloc malloc
#define default_free free
hs_alloc_t hs_database_alloc = default_malloc;
hs_alloc_t hs_misc_alloc = default_malloc;
hs_alloc_t hs_scratch_alloc = default_malloc;
hs_alloc_t hs_stream_alloc = default_malloc;
hs_free_t hs_database_free = default_free;
hs_free_t hs_misc_free = default_free;
hs_free_t hs_scratch_free = default_free;
hs_free_t hs_stream_free = default_free;
static
hs_alloc_t normalise_alloc(hs_alloc_t a) {
if (!a) {
return default_malloc;
} else {
return a;
}
}
static
hs_free_t normalise_free(hs_free_t f) {
if (!f) {
return default_free;
} else {
return f;
}
}
HS_PUBLIC_API
hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_set_database_allocator(allocfunc, freefunc);
hs_set_misc_allocator(allocfunc, freefunc);
hs_set_stream_allocator(allocfunc, freefunc);
hs_set_scratch_allocator(allocfunc, freefunc);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_database_alloc = normalise_alloc(allocfunc);
hs_database_free = normalise_free(freefunc);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_misc_alloc = normalise_alloc(allocfunc);
hs_misc_free = normalise_free(freefunc);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_scratch_alloc = normalise_alloc(allocfunc);
hs_scratch_free = normalise_free(freefunc);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_set_stream_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
hs_stream_alloc = normalise_alloc(allocfunc);
hs_stream_free = normalise_free(freefunc);
return HS_SUCCESS;
}

66
src/allocator.h Normal file
View File

@@ -0,0 +1,66 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ALLOCATOR_H
#define ALLOCATOR_H
#include "hs_common.h"
#include "ue2common.h"
#ifdef __cplusplus
extern "C"
{
#endif
extern hs_alloc_t hs_database_alloc;
extern hs_alloc_t hs_misc_alloc;
extern hs_alloc_t hs_scratch_alloc;
extern hs_alloc_t hs_stream_alloc;
extern hs_free_t hs_database_free;
extern hs_free_t hs_misc_free;
extern hs_free_t hs_scratch_free;
extern hs_free_t hs_stream_free;
#ifdef __cplusplus
} /* extern C */
#endif
/** \brief Check the results of an alloc done with hs_alloc for alignment.
*
* If we have incorrect alignment, return an error. Caller should free the
* offending block. */
static really_inline
hs_error_t hs_check_alloc(const void *mem) {
hs_error_t ret = HS_SUCCESS;
if (!mem) {
ret = HS_NOMEM;
} else if (!ISALIGNED_N(mem, alignof(unsigned long long))) {
ret = HS_BAD_ALLOC;
}
return ret;
}
#endif

310
src/compiler/asserts.cpp Normal file
View File

@@ -0,0 +1,310 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
*
* This pass converts the temporary assert vertices created by the Glushkov
* construction process above (vertices with special assertions flags) into
* edges between those vertices' neighbours in the graph.
*
* These edges have the appropriate flags applied to them -- a path (u,t,v)
* through an assert vertex t will be replaced with the edge (u,v) with the
* assertion flags from t.
*
* Edges with mutually incompatible flags (such as the conjunction of
* word-to-word and word-to-nonword) are dropped.
*/
#include "asserts.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_prune.h"
#include "nfagraph/ng_redundancy.h"
#include "nfagraph/ng_util.h"
#include "parser/position.h" // for POS flags
#include "util/compile_error.h"
#include "util/graph_range.h"
#include <queue>
#include <set>
using namespace std;
namespace ue2 {
/** Hard limit on the maximum number of edges we'll clone before we throw up
* our hands and report 'Pattern too large.' */
static const size_t MAX_ASSERT_EDGES = 300000;
/** Flags representing the word-boundary assertions, \\b or \\B. */
static const int WORDBOUNDARY_FLAGS = POS_FLAG_ASSERT_WORD_TO_WORD
| POS_FLAG_ASSERT_WORD_TO_NONWORD
| POS_FLAG_ASSERT_NONWORD_TO_WORD
| POS_FLAG_ASSERT_NONWORD_TO_NONWORD
| POS_FLAG_ASSERT_WORD_TO_WORD_UCP
| POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP
| POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP
| POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
#define OPEN_EDGE 0U
#define DEAD_EDGE (~0U)
static
u32 disjunct(u32 flags1, u32 flags2) {
/* from two asserts in parallel */
DEBUG_PRINTF("disjunct %x %x\n", flags1, flags2);
u32 rv;
if (flags1 == DEAD_EDGE) {
rv = flags2;
} else if (flags2 == DEAD_EDGE) {
rv = flags1;
} else if (flags1 == OPEN_EDGE || flags2 == OPEN_EDGE) {
rv = OPEN_EDGE;
} else {
rv = flags1 | flags2;
}
DEBUG_PRINTF("--> %x\n", rv);
return rv;
}
static
u32 conjunct(u32 flags1, u32 flags2) {
/* from two asserts in series */
DEBUG_PRINTF("conjunct %x %x\n", flags1, flags2);
u32 rv;
if (flags1 == OPEN_EDGE) {
rv = flags2;
} else if (flags2 == OPEN_EDGE) {
rv = flags1;
} else if (flags1 & flags2) {
rv = flags1 & flags2;
} else {
rv = DEAD_EDGE; /* the conjunction of two different word boundary
* assertion is impassable */
}
DEBUG_PRINTF("--> %x\n", rv);
return rv;
}
typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
static
void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
u32 &assert_edge_count) {
DEBUG_PRINTF("replacing assert vertex %u\n", g[t].index);
const u32 flags = g[t].assert_flags;
DEBUG_PRINTF("consider assert vertex %u with flags %u\n",
g[t].index, flags);
// Wire up all the predecessors to all the successors.
for (const auto &inEdge : in_edges_range(t, g)) {
NFAVertex u = source(inEdge, g);
if (u == t) {
continue; // ignore self-loops
}
const u32 flags_inc_in = conjunct(g[inEdge].assert_flags,
flags);
if (flags_inc_in == DEAD_EDGE) {
DEBUG_PRINTF("fail, in-edge has bad flags %d\n",
g[inEdge].assert_flags);
continue;
}
for (const auto &outEdge : out_edges_range(t, g)) {
NFAVertex v = target(outEdge, g);
DEBUG_PRINTF("consider path [%u,%u,%u]\n", g[u].index,
g[t].index, g[v].index);
if (v == t) {
continue; // ignore self-loops
}
const u32 flags_final = conjunct(g[outEdge].assert_flags,
flags_inc_in);
if (flags_final == DEAD_EDGE) {
DEBUG_PRINTF("fail, out-edge has bad flags %d\n",
g[outEdge].assert_flags);
continue;
}
if ((g[u].assert_flags & POS_FLAG_MULTILINE_START)
&& v == g.acceptEod) {
DEBUG_PRINTF("fail, (?m)^ does not match \\n at eod\n");
continue;
}
/* Replace path (u,t,v) with direct edge (u,v), unless the edge
* already exists, in which case we just need to edit its
* properties.
*
* Use edge_cache to prevent us going O(N).
*/
auto cache_key = make_pair(u, v);
auto ecit = edge_cache.find(cache_key);
if (ecit == edge_cache.end()) {
DEBUG_PRINTF("adding edge %u %u\n", g[u].index,
g[v].index);
NFAEdge e = add_edge(u, v, g).first;
edge_cache.emplace(cache_key, e);
g[e].assert_flags = flags;
if (++assert_edge_count > MAX_ASSERT_EDGES) {
throw CompileError(g.expressionIndex,
"Pattern is too large.");
}
} else {
NFAEdge e = ecit->second;
DEBUG_PRINTF("updating edge %u %u [a %u]\n", g[u].index,
g[v].index, g[t].index);
// Edge already exists.
u32 &e_flags = g[e].assert_flags;
e_flags = disjunct(e_flags, flags_final);
assert(e_flags != DEAD_EDGE);
}
}
}
// Clear vertex t to remove all the old edges.
/* no need to clear the cache, as we will never look up its edge as it is
* unreachable */
clear_vertex(t, g);
}
static
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
// Don't try and set the report ID of a special vertex.
assert(!is_special(v, g));
// There should be no reports set already.
assert(g[v].reports.empty());
Report r = rm.getBasicInternalReport(g, adj);
g[v].reports.insert(rm.getInternalId(r));
DEBUG_PRINTF("set report id for vertex %u, adj %d\n",
g[v].index, adj);
}
static
void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
vector<NFAEdge> dead;
for (auto v : adjacent_vertices_range(g.start, g)) {
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
continue;
}
DEBUG_PRINTF("mls %u %08x\n", g[v].index,
g[v].assert_flags);
/* we have found a multi-line start (maybe more than one) */
/* we need to interpose a dummy dot vertex between v and accept if
* required so that ^ doesn't match trailing \n */
for (const auto &e : out_edges_range(v, g)) {
if (target(e, g) == g.accept) {
dead.push_back(e);
}
}
/* assert has been resolved; clear flag */
g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
}
for (const auto &e : dead) {
NFAVertex dummy = add_vertex(g);
g[dummy].char_reach.setall();
setReportId(rm, g, dummy, -1);
add_edge(source(e, g), dummy, g[e], g);
add_edge(dummy, g.accept, g);
}
remove_edges(dead, g);
}
static
bool hasAssertVertices(const NGHolder &g) {
for (auto v : vertices_range(g)) {
int flags = g[v].assert_flags;
if (flags & WORDBOUNDARY_FLAGS) {
return true;
}
}
return false;
}
/** \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
*
* Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
size_t num = 0;
DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
// Sweep over the graph and ascertain that we do actually have vertices
// with assertion flags set. Otherwise, we're done.
if (!hasAssertVertices(g)) {
DEBUG_PRINTF("no assert vertices, done\n");
return;
}
u32 assert_edge_count = 0;
// Build a cache of (u, v) vertex pairs to edge descriptors.
edge_cache_t edge_cache;
for (const auto &e : edges_range(g)) {
edge_cache[make_pair(source(e, g), target(e, g))] = e;
}
for (auto v : vertices_range(g)) {
if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
replaceAssertVertex(g, v, edge_cache, assert_edge_count);
num++;
}
}
checkForMultilineStart(rm, g);
if (num) {
DEBUG_PRINTF("resolved %zu assert vertices\n", num);
pruneUseless(g);
pruneEmptyVertices(g);
g.renumberVertices();
g.renumberEdges();
}
DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g));
assert(!hasAssertVertices(g));
}
} // namespace ue2

51
src/compiler/asserts.h Normal file
View File

@@ -0,0 +1,51 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
*/
#ifndef ASSERTS_H
#define ASSERTS_H
namespace ue2 {
class ReportManager;
class NGWrapper;
/** \brief Convert temporary assert vertices (from construction method) to
* edge-based flags.
*
* Remove the horrors that are the temporary assert vertices which arise from
* our construction method. Allows the rest of our code base to live in
* blissful ignorance of their existence. */
void removeAssertVertices(ReportManager &rm, NGWrapper &g);
} // namespace ue2
#endif // ASSERTS_H

459
src/compiler/compiler.cpp Normal file
View File

@@ -0,0 +1,459 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compiler front-end interface.
*/
#include "asserts.h"
#include "compiler.h"
#include "database.h"
#include "grey.h"
#include "hs_internal.h"
#include "hs_runtime.h"
#include "ue2common.h"
#include "nfagraph/ng_builder.h"
#include "nfagraph/ng_dump.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_util.h"
#include "parser/buildstate.h"
#include "parser/dump.h"
#include "parser/Component.h"
#include "parser/parse_error.h"
#include "parser/Parser.h" // for flags
#include "parser/position.h"
#include "parser/position_dump.h"
#include "parser/position_info.h"
#include "parser/prefilter.h"
#include "parser/shortcut_literal.h"
#include "parser/unsupported.h"
#include "parser/utf8_validate.h"
#include "smallwrite/smallwrite_build.h"
#include "rose/rose_build.h"
#include "rose/rose_build_dump.h"
#include "som/slot_manager_dump.h"
#include "util/alloc.h"
#include "util/compile_error.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <memory>
#include <sstream>
using namespace std;
namespace ue2 {
static
void validateExt(const hs_expr_ext &ext) {
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
HS_EXT_FLAG_MAX_OFFSET |
HS_EXT_FLAG_MIN_LENGTH;
if (ext.flags & ~ALL_EXT_FLAGS) {
throw CompileError("Invalid hs_expr_ext flag set.");
}
if ((ext.flags & HS_EXT_FLAG_MIN_OFFSET) &&
(ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
(ext.min_offset > ext.max_offset)) {
throw CompileError("In hs_expr_ext, min_offset must be less than or "
"equal to max_offset.");
}
if ((ext.flags & HS_EXT_FLAG_MIN_LENGTH) &&
(ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
(ext.min_length > ext.max_offset)) {
throw CompileError("In hs_expr_ext, min_length must be less than or "
"equal to max_offset.");
}
}
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
unsigned flags, ReportID actionId,
const hs_expr_ext *ext)
: utf8(false),
allow_vacuous(flags & HS_FLAG_ALLOWEMPTY),
highlander(flags & HS_FLAG_SINGLEMATCH),
prefilter(flags & HS_FLAG_PREFILTER),
som(SOM_NONE),
index(index_in),
id(actionId),
min_offset(0),
max_offset(MAX_OFFSET),
min_length(0) {
ParseMode mode(flags);
component = parse(expression, mode);
utf8 = mode.utf8; /* utf8 may be set by parse() */
if (utf8 && !isValidUtf8(expression)) {
throw ParseError("Expression is not valid UTF-8.");
}
if (!component) {
assert(0); // parse() should have thrown a ParseError.
throw ParseError("Parse error.");
}
if (flags & ~HS_FLAG_ALL) {
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
throw CompileError("Unrecognised flag.");
}
// FIXME: we disallow highlander + SOM, see UE-1850.
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
"combination with HS_FLAG_SOM_LEFTMOST.");
}
// FIXME: we disallow prefilter + SOM, see UE-1899.
if ((flags & HS_FLAG_PREFILTER) && (flags & HS_FLAG_SOM_LEFTMOST)) {
throw CompileError("HS_FLAG_PREFILTER is not supported in "
"combination with HS_FLAG_SOM_LEFTMOST.");
}
// Set SOM type.
if (flags & HS_FLAG_SOM_LEFTMOST) {
som = SOM_LEFT;
}
// Set extended parameters, if we have them.
if (ext) {
// Ensure that the given parameters make sense.
validateExt(*ext);
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
min_offset = ext->min_offset;
}
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
max_offset = ext->max_offset;
}
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
min_length = ext->min_length;
}
}
// These are validated in validateExt, so an error will already have been
// thrown if these conditions don't hold.
assert(max_offset >= min_offset);
assert(max_offset >= min_length);
// Since prefiltering and SOM aren't supported together, we must squash any
// min_length constraint as well.
if (flags & HS_FLAG_PREFILTER && min_length) {
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
min_length = 0;
}
}
#if defined(DUMP_SUPPORT) || defined(DEBUG)
/**
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
* mode.
*/
void dumpExpression(UNUSED const ParsedExpression &expr,
UNUSED const char *stage, UNUSED const Grey &grey) {
#if defined(DEBUG)
DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id,
expr.index);
ostringstream debug_tree;
dumpTree(debug_tree, expr.component.get());
printf("%s\n", debug_tree.str().c_str());
#endif // DEBUG
#if defined(DUMP_SUPPORT)
if (grey.dumpFlags & Grey::DUMP_PARSE) {
stringstream ss;
ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_"
<< stage << ".txt";
ofstream out(ss.str().c_str());
out << "Component Tree for " << expr.id << endl;
dumpTree(out, expr.component.get());
if (expr.utf8) {
out << "UTF8 mode" << endl;
}
}
#endif // DEBUG
}
#endif
/** \brief Run Component tree optimisations on \a expr. */
static
void optimise(ParsedExpression &expr) {
if (expr.min_length || expr.som) {
return;
}
DEBUG_PRINTF("optimising\n");
expr.component->optimise(true /* root is connected to sds */);
}
void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID id) {
assert(expression);
const CompileContext &cc = ng.cc;
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags,
expression);
// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
throw CompileError("Pattern length exceeds limit.");
}
// Do per-expression processing: errors here will result in an exception
// being thrown up to our caller
ParsedExpression expr(index, expression, flags, id, ext);
dumpExpression(expr, "orig", cc.grey);
// Apply prefiltering transformations if desired.
if (expr.prefilter) {
prefilterTree(expr.component, ParseMode(flags));
dumpExpression(expr, "prefiltered", cc.grey);
}
// Expressions containing zero-width assertions and other extended pcre
// types aren't supported yet. This call will throw a ParseError exception
// if the component tree contains such a construct.
checkUnsupported(*expr.component);
expr.component->checkEmbeddedStartAnchor(true);
expr.component->checkEmbeddedEndAnchor(true);
if (cc.grey.optimiseComponentTree) {
optimise(expr);
dumpExpression(expr, "opt", cc.grey);
}
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
expr.component.get(), expr.index, expr.id);
// You can only use the SOM flags if you've also specified an SOM
// precision mode.
if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
throw CompileError("To use a SOM expression flag in streaming mode, "
"an SOM precision mode (e.g. "
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
}
// If this expression is a literal, we can feed it directly to Rose rather
// than building the NFA graph.
if (shortcutLiteral(ng, expr)) {
DEBUG_PRINTF("took literal short cut\n");
return;
}
unique_ptr<NGWrapper> g = buildWrapper(ng.rm, cc, expr);
if (!g) {
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
"thrown.\n", expr.id);
throw CompileError("Internal error.");
}
if (!expr.allow_vacuous && matches_everywhere(*g)) {
throw CompileError("Pattern matches empty buffer; use "
"HS_FLAG_ALLOWEMPTY to enable support.");
}
if (!ng.addGraph(*g)) {
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id);
throw CompileError("Error compiling expression.");
}
}
static
aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
const u32 minWidth =
ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
auto rose = ng.rose->buildRose(minWidth);
if (!rose) {
DEBUG_PRINTF("error building rose\n");
assert(0);
return nullptr;
}
/* avoid building a smwr if just a pure floating case. */
if (!roseIsPureLiteral(rose.get())) {
u32 qual = roseQuality(rose.get());
auto smwr = ng.smwr->build(qual);
if (smwr) {
rose = roseAddSmallWrite(rose.get(), smwr.get());
}
}
dumpRose(*ng.rose, rose.get(), ng.cc.grey);
dumpReportManager(ng.rm, ng.cc.grey);
dumpSomSlotManager(ng.ssm, ng.cc.grey);
dumpSmallWrite(rose.get(), ng.cc.grey);
return rose;
}
platform_t target_to_platform(const target_t &target_info) {
platform_t p;
p = 0;
if (!target_info.has_avx2()) {
p |= HS_PLATFORM_NOAVX2;
}
return p;
}
struct hs_database *build(NG &ng, unsigned int *length) {
assert(length);
auto rose = generateRoseEngine(ng);
if (!rose) {
throw CompileError("Unable to generate bytecode.");
}
*length = roseSize(rose.get());
if (!*length) {
DEBUG_PRINTF("RoseEngine has zero length\n");
assert(0);
throw CompileError("Internal error.");
}
const char *bytecode = (const char *)(rose.get());
const platform_t p = target_to_platform(ng.cc.target_info);
struct hs_database *db = dbCreate(bytecode, *length, p);
if (!db) {
throw CompileError("Could not allocate memory for bytecode.");
}
return db;
}
static
void stripFromPositions(vector<PositionInfo> &v, Position pos) {
auto removed = remove(v.begin(), v.end(), PositionInfo(pos));
v.erase(removed, v.end());
}
static
void connectInitialStates(GlushkovBuildState &bs,
const ParsedExpression &expr) {
vector<PositionInfo> initials = expr.component->first();
const NFABuilder &builder = bs.getBuilder();
const Position startState = builder.getStart();
const Position startDotStarState = builder.getStartDotStar();
DEBUG_PRINTF("wiring initials = %s\n",
dumpPositions(initials.begin(), initials.end()).c_str());
vector<PositionInfo> starts = {startState, startDotStarState};
// strip start and startDs, which can be present due to boundaries
stripFromPositions(initials, startState);
stripFromPositions(initials, startDotStarState);
// replace epsilons with accepts
for (const auto &s : initials) {
if (s.pos != GlushkovBuildState::POS_EPSILON) {
continue;
}
assert(starts.size() == 2); /* start, startds */
vector<PositionInfo> starts_temp = starts;
starts_temp[0].flags = s.flags;
starts_temp[1].flags = s.flags;
bs.connectAccepts(starts_temp);
}
if (!initials.empty()) {
bs.connectRegions(starts, initials);
}
}
static
void connectFinalStates(GlushkovBuildState &bs, const ParsedExpression &expr) {
vector<PositionInfo> finals = expr.component->last();
DEBUG_PRINTF("wiring finals = %s\n",
dumpPositions(finals.begin(), finals.end()).c_str());
bs.connectAccepts(finals);
}
#ifndef NDEBUG
static
bool isSupported(const Component &c) {
try {
checkUnsupported(c);
return true;
}
catch (ParseError &) {
return false;
}
}
#endif
unique_ptr<NGWrapper> buildWrapper(ReportManager &rm, const CompileContext &cc,
const ParsedExpression &expr) {
assert(isSupported(*expr.component));
const unique_ptr<NFABuilder> builder = makeNFABuilder(rm, cc, expr);
assert(builder);
// Set up START and ACCEPT states; retrieve the special states
const auto bs = makeGlushkovBuildState(*builder, expr.prefilter);
// Map position IDs to characters/components
expr.component->notePositions(*bs);
// Wire the start dotstar state to the firsts
connectInitialStates(*bs, expr);
DEBUG_PRINTF("wire up body of expr\n");
// Build the rest of the FOLLOW set
vector<PositionInfo> initials = {builder->getStartDotStar(),
builder->getStart()};
expr.component->buildFollowSet(*bs, initials);
// Wire the lasts to the accept state
connectFinalStates(*bs, expr);
// Create our edges
bs->buildEdges();
auto g = builder->getGraph();
assert(g);
dumpDotWrapper(*g, "00_before_asserts", cc.grey);
removeAssertVertices(rm, *g);
return g;
}
} // namespace ue2

152
src/compiler/compiler.h Normal file
View File

@@ -0,0 +1,152 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compiler front-end interface
*/
#ifndef COMPILER_H
#define COMPILER_H
#include "ue2common.h"
#include "database.h"
#include "parser/Component.h"
#include "som/som.h"
#include <memory>
#include <boost/core/noncopyable.hpp>
struct hs_database;
struct hs_expr_ext;
namespace ue2 {
struct CompileContext;
struct Grey;
struct target_t;
class NG;
class ReportManager;
class NGWrapper;
/** Class gathering together the pieces of a parsed expression.
* Note: Owns the provided component.
*/
class ParsedExpression : boost::noncopyable {
public:
ParsedExpression(unsigned index, const char *expression, unsigned flags,
ReportID actionId, const hs_expr_ext *ext = nullptr);
bool utf8; //!< UTF-8 mode flag specified
/** \brief root node of parsed component tree. */
std::unique_ptr<ue2::Component> component;
const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified
const bool highlander; //!< HS_FLAG_SINGLEMATCH specified
const bool prefilter; //!< HS_FLAG_PREFILTER specified
som_type som; //!< chosen SOM mode, or SOM_NONE
/** \brief index in expressions array passed to \ref hs_compile_multi */
const unsigned index;
const ReportID id; //!< user-specified pattern ID
u64a min_offset; //!< 0 if not used
u64a max_offset; //!< MAX_OFFSET if not used
u64a min_length; //!< 0 if not used
};
/**
* Add an expression to the compiler.
*
* @param ng
* The global NG object.
* @param index
* The index of the expression (used for errors)
* @param expression
* NULL-terminated PCRE expression
* @param flags
* The full set of Hyperscan flags associated with this rule.
* @param ext
* Struct containing extra parameters for this expression, or NULL if
* none.
* @param actionId
* The identifier to associate with the expression; returned by engine on
* match.
*/
void addExpression(NG &ng, unsigned index, const char *expression,
unsigned flags, const hs_expr_ext *ext, ReportID actionId);
/**
* Build a Hyperscan database out of the expressions we've been given. A
* fatal error will result in an exception being thrown.
*
* @param ng
* The global NG object.
* @param[out] length
* The number of bytes occupied by the compiled structure.
* @return
* The compiled structure. Should be deallocated with the
* hs_database_free() function.
*/
struct hs_database *build(NG &ng, unsigned int *length);
/**
* Constructs an NFA graph from the given expression tree.
*
* @param rm
* Global ReportManager for this compile.
* @param cc
* Global compile context for this compile.
* @param expr
* ParsedExpression object.
* @return
* nullptr on error.
*/
std::unique_ptr<NGWrapper> buildWrapper(ReportManager &rm,
const CompileContext &cc,
const ParsedExpression &expr);
/**
* Build a platform_t out of a target_t.
*/
platform_t target_to_platform(const target_t &target_info);
#if defined(DUMP_SUPPORT) || defined(DEBUG)
void dumpExpression(const ParsedExpression &expr, const char *stage,
const Grey &grey);
#else
static really_inline
void dumpExpression(UNUSED const ParsedExpression &expr,
UNUSED const char *stage, UNUSED const Grey &grey) {
}
#endif
} // namespace
#endif // COMPILER_H

95
src/compiler/error.cpp Normal file
View File

@@ -0,0 +1,95 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compile-time error utils.
*/
#include "allocator.h"
#include "error.h"
#include "ue2common.h"
#include "hs_compile.h"
#include "util/compile_error.h"
#include <cstring>
#include <string>
using std::string;
static const char failureNoMemory[] = "Unable to allocate memory.";
static const char failureInternal[] = "Internal error.";
extern const hs_compile_error_t hs_enomem = {
const_cast<char *>(failureNoMemory), 0
};
extern const hs_compile_error_t hs_einternal = {
const_cast<char *>(failureInternal), 0
};
namespace ue2 {
hs_compile_error_t *generateCompileError(const string &err, int expression) {
hs_compile_error_t *ret =
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
if (ret) {
char *msg = (char *)hs_misc_alloc(err.size() + 1);
if (msg) {
memcpy(msg, err.c_str(), err.size() + 1);
ret->message = msg;
} else {
hs_misc_free(ret);
ret = nullptr;
}
}
if (!ret || !ret->message) {
return const_cast<hs_compile_error_t *>(&hs_enomem);
}
ret->expression = expression;
return ret;
}
hs_compile_error_t *generateCompileError(const CompileError &e) {
return generateCompileError(e.reason, e.hasIndex ? (int)e.index : -1);
}
void freeCompileError(hs_compile_error_t *error) {
if (!error) {
return;
}
if (error == &hs_enomem || error == &hs_einternal) {
// These are not allocated.
return;
}
hs_misc_free(error->message);
hs_misc_free(error);
}
} // namespace ue2

55
src/compiler/error.h Normal file
View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compile-time error utils.
*/
#ifndef COMPILE_ERROR_H
#define COMPILE_ERROR_H
#include <string>
struct hs_compile_error;
// Special errors that aren't allocated with hs_alloc/hs_free.
extern const hs_compile_error hs_enomem;
extern const hs_compile_error hs_einternal;
namespace ue2 {
class CompileError;
hs_compile_error *generateCompileError(const std::string &err, int expression);
hs_compile_error *generateCompileError(const CompileError &e);
void freeCompileError(hs_compile_error *error);
} // namespace ue2
#endif

652
src/crc32.c Normal file
View File

@@ -0,0 +1,652 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "crc32.h"
#include "config.h"
#include "ue2common.h"
#if defined(HAVE_C_X86INTRIN_H)
#include <x86intrin.h>
#elif defined(HAVE_C_INTRIN_H)
#include <intrin.h>
#endif
#ifndef __SSE4_2__
/***
*** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD
*** licensed and available from http://sourceforge.net/projects/slicing-by-8/
***/
/*
* Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved
*
*
* This software program is licensed subject to the BSD License,
* available at http://www.opensource.org/licenses/bsd-license.html.
*
* Abstract:
*
* Tables for software CRC generation
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o32[256] =
{
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
};
/*
* end of the CRC lookup table crc_tableil8_o32
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o40[256] =
{
0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD,
0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C,
0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47,
0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6,
0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E,
0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9,
0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0,
0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43,
0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB,
0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A,
0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC,
0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D,
0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185,
0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306,
0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F,
0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8,
0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600,
0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781,
0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA,
0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B,
0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
};
/*
* end of the CRC lookup table crc_tableil8_o40
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o48[256] =
{
0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC,
0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726,
0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D,
0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7,
0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32,
0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75,
0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A,
0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4,
0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161,
0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB,
0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A,
0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0,
0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065,
0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB,
0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4,
0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3,
0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36,
0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC,
0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7,
0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D,
0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
};
/*
* end of the CRC lookup table crc_tableil8_o48
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o56[256] =
{
0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C,
0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11,
0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41,
0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C,
0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A,
0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB,
0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610,
0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6,
0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040,
0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D,
0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5,
0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8,
0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E,
0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698,
0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443,
0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12,
0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4,
0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9,
0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99,
0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4,
0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
};
/*
* end of the CRC lookup table crc_tableil8_o56
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o64[256] =
{
0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5,
0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406,
0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13,
0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0,
0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151,
0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B,
0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539,
0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD,
0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C,
0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF,
0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18,
0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB,
0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A,
0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE,
0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C,
0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6,
0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27,
0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4,
0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1,
0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532,
0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
};
/*
* end of the CRC lookup table crc_tableil8_o64
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o72[256] =
{
0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2,
0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C,
0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20,
0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E,
0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201,
0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59,
0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778,
0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB,
0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4,
0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA,
0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B,
0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45,
0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A,
0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9,
0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8,
0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090,
0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F,
0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1,
0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D,
0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623,
0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
};
/*
* end of the CRC lookup table crc_tableil8_o72
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o80[256] =
{
0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA,
0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C,
0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334,
0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992,
0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1,
0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0,
0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55,
0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E,
0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D,
0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB,
0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D,
0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB,
0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988,
0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093,
0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766,
0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907,
0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454,
0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2,
0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA,
0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C,
0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
};
/*
* end of the CRC lookup table crc_tableil8_o80
*/
/*
* The following CRC lookup table was generated automagically
* using the following model parameters:
*
* Generator Polynomial = ................. 0x1EDC6F41
* Generator Polynomial Length = .......... 32 bits
* Reflected Bits = ....................... TRUE
* Table Generation Offset = .............. 32 bits
* Number of Slices = ..................... 8 slices
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
* Directory Name = ....................... .\
* File Name = ............................ 8x256_tables.c
*/
static
u32 crc_tableil8_o88[256] =
{
0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE,
0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A,
0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D,
0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929,
0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3,
0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC,
0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782,
0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF,
0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75,
0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1,
0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360,
0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4,
0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E,
0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223,
0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D,
0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852,
0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88,
0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C,
0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB,
0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F,
0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5
};
/*
* end of the CRC lookup table crc_tableil8_o88
*/
//#define VERIFY_ASSERTION
#ifdef VERIFY_ASSERTION
// Trivial byte-by-byte version: you can switch on the assertion in the
// Crc32_ComputeBuf function (by defining VERIFY_ASSERTION) to check this
// against the slicing variant.
static really_inline
u32 crc32c(u32 running_crc, const unsigned char* p_buf, size_t length) {
u32 crc = running_crc;
while (length--) {
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
}
return crc;
}
#endif // VERIFY_ASSERTION
// Slicing-by-8 approach, which is much faster. Derived from Intel's
// BSD-licensed code, with additions to handled aligned case automatically.
static really_inline
u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
const size_t length) {
u32 crc = running_crc;
// Process byte-by-byte until p_buf is aligned
const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, 4);
size_t init_bytes = aligned_buf - p_buf;
size_t running_length = ((length - init_bytes)/8)*8;
size_t end_bytes = length - init_bytes - running_length;
while (p_buf < aligned_buf) {
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
}
// Main aligned loop, processes eight bytes at a time.
u32 term1, term2;
for (size_t li = 0; li < running_length/8; li++) {
u32 block = *(const u32 *)p_buf;
crc ^= block;
p_buf += 4;
term1 = crc_tableil8_o88[crc & 0x000000FF] ^
crc_tableil8_o80[(crc >> 8) & 0x000000FF];
term2 = crc >> 16;
crc = term1 ^
crc_tableil8_o72[term2 & 0x000000FF] ^
crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
block = *(const u32 *)p_buf;
term1 = crc_tableil8_o56[block & 0x000000FF] ^
crc_tableil8_o48[(block >> 8) & 0x000000FF];
term2 = block >> 16;
crc = crc ^
term1 ^
crc_tableil8_o40[term2 & 0x000000FF] ^
crc_tableil8_o32[(term2 >> 8) & 0x000000FF];
p_buf += 4;
}
// Remaining bytes
for(size_t li = 0; li < end_bytes; li++) {
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
}
return crc;
}
#else // __SSE4_2__
#ifdef ARCH_64_BIT
#define CRC_WORD 8
#define CRC_TYPE u64a
#define CRC_FUNC _mm_crc32_u64
#else
#define CRC_WORD 4
#define CRC_TYPE u32
#define CRC_FUNC _mm_crc32_u32
#endif
/*
* Use the crc32 instruction from SSE4.2 to compute our checksum - same
* polynomial as the above function.
*/
static really_inline
u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf,
const size_t length) {
u32 crc = running_crc;
// Process byte-by-byte until p_buf is aligned
const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, CRC_WORD);
size_t init_bytes = aligned_buf - p_buf;
size_t running_length = ((length - init_bytes)/CRC_WORD)*CRC_WORD;
size_t end_bytes = length - init_bytes - running_length;
while (p_buf < aligned_buf) {
crc = _mm_crc32_u8(crc, *p_buf++);
}
// Main aligned loop, processes a word at a time.
for (size_t li = 0; li < running_length/CRC_WORD; li++) {
CRC_TYPE block = *(const CRC_TYPE *)p_buf;
crc = CRC_FUNC(crc, block);
p_buf += CRC_WORD;
}
// Remaining bytes
for(size_t li = 0; li < end_bytes; li++) {
crc = _mm_crc32_u8(crc, *p_buf++);
}
return crc;
}
#endif
#ifdef VERIFY_ASSERTION
#include <assert.h>
#endif
// Externally visible function
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
#ifdef __SSE4_2__
u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
#else
u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
#endif
#ifdef VERIFY_ASSERTION
assert(crc == crc32c(inCrc32, (const unsigned char *)buf, bufLen));
#endif
return crc;
}

46
src/crc32.h Normal file
View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CRC32_H_36A5015B5840C1
#define CRC32_H_36A5015B5840C1
#include "ue2common.h"
#ifdef __cplusplus
extern "C"
{
#endif
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen);
#ifdef __cplusplus
}
#endif
#endif /* CRC32_H_36A5015B5840C1 */

507
src/database.c Normal file
View File

@@ -0,0 +1,507 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime code for hs_database manipulation.
*/
#include <stdio.h>
#include <string.h>
#include "allocator.h"
#include "hs_common.h"
#include "hs_internal.h"
#include "hs_version.h"
#include "ue2common.h"
#include "database.h"
#include "crc32.h"
#include "rose/rose_internal.h"
#include "util/unaligned.h"
static really_inline
int db_correctly_aligned(const void *db) {
return ISALIGNED_N(db, alignof(unsigned long long));
}
HS_PUBLIC_API
hs_error_t hs_free_database(hs_database_t *db) {
if (db && db->magic != HS_DB_MAGIC) {
return HS_INVALID;
}
hs_database_free(db);
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
size_t *serialized_length) {
if (!db || !bytes || !serialized_length) {
return HS_INVALID;
}
if (!db_correctly_aligned(db)) {
return HS_BAD_ALIGN;
}
hs_error_t ret = validDatabase(db);
if (ret != HS_SUCCESS) {
return ret;
}
size_t length = sizeof(struct hs_database) + db->length;
char *out = hs_misc_alloc(length);
ret = hs_check_alloc(out);
if (ret != HS_SUCCESS) {
hs_misc_free(out);
return ret;
}
memset(out, 0, length);
u32 *buf = (u32 *)out;
*buf = db->magic;
buf++;
*buf = db->version;
buf++;
*buf = db->length;
buf++;
memcpy(buf, &db->platform, sizeof(u64a));
buf += 2;
*buf = db->crc32;
buf++;
*buf = db->reserved0;
buf++;
*buf = db->reserved1;
buf++;
const char *bytecode = hs_get_bytecode(db);
memcpy(buf, bytecode, db->length);
*bytes = out;
*serialized_length = length;
return HS_SUCCESS;
}
// check that the database header's platform is compatible with the current
// runtime platform.
static
hs_error_t db_check_platform(const u64a p) {
if (p != hs_current_platform
&& p != hs_current_platform_no_avx2) {
return HS_DB_PLATFORM_ERROR;
}
// passed all checks
return HS_SUCCESS;
}
// Decode and check the database header, returning appropriate errors or
// HS_SUCCESS if it's OK. The header should be allocated on the stack
// and later copied into the deserialized database.
static
hs_error_t db_decode_header(const char **bytes, const size_t length,
struct hs_database *header) {
if (!*bytes) {
return HS_INVALID;
}
if (length < sizeof(struct hs_database)) {
return HS_INVALID;
}
// There's no requirement, really, that the serialized stream of bytes
// we've been given is 4-byte aligned, so we use unaligned loads here.
const u32 *buf = (const u32 *)*bytes;
// Zero header so that none of it (e.g. its padding) is uninitialized.
memset(header, 0, sizeof(struct hs_database));
header->magic = unaligned_load_u32(buf++);
if (header->magic != HS_DB_MAGIC) {
return HS_INVALID;
}
header->version = unaligned_load_u32(buf++);
if (header->version != HS_DB_VERSION) {
return HS_DB_VERSION_ERROR;
}
header->length = unaligned_load_u32(buf++);
if (length != sizeof(struct hs_database) + header->length) {
DEBUG_PRINTF("bad length %zu, expecting %zu\n", length,
sizeof(struct hs_database) + header->length);
return HS_INVALID;
}
header->platform = unaligned_load_u64a(buf);
buf += 2;
header->crc32 = unaligned_load_u32(buf++);
header->reserved0 = unaligned_load_u32(buf++);
header->reserved1 = unaligned_load_u32(buf++);
*bytes = (const char *)buf;
return HS_SUCCESS; // Header checks out
}
// Check the CRC on a database
static
hs_error_t db_check_crc(const hs_database_t *db) {
const char *bytecode = hs_get_bytecode(db);
u32 crc = Crc32c_ComputeBuf(0, bytecode, db->length);
if (crc != db->crc32) {
DEBUG_PRINTF("crc mismatch! 0x%x != 0x%x\n", crc, db->crc32);
return HS_INVALID;
}
return HS_SUCCESS;
}
static
void db_copy_bytecode(const char *serialized, hs_database_t *db) {
// we need to align things manually
uintptr_t shift = (uintptr_t)db->bytes & 0x3f;
db->bytecode = offsetof(struct hs_database, bytes) - shift;
char *bytecode = (char *)db + db->bytecode;
// Copy the bytecode into place
memcpy(bytecode, serialized, db->length);
}
HS_PUBLIC_API
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
hs_database_t *db) {
if (!bytes || !db) {
return HS_INVALID;
}
// We require the user to deserialize into an 8-byte aligned region.
if (!ISALIGNED_N(db, 8)) {
return HS_BAD_ALIGN;
}
// Decode the header
hs_database_t header;
hs_error_t ret = db_decode_header(&bytes, length, &header);
if (ret != HS_SUCCESS) {
return ret;
}
// Make sure the serialized database is for our platform
ret = db_check_platform(header.platform);
if (ret != HS_SUCCESS) {
return ret;
}
// Zero new space for safety
size_t dblength = sizeof(struct hs_database) + header.length;
memset(db, 0, dblength);
// Copy the decoded header into place
memcpy(db, &header, sizeof(header));
// Copy the bytecode into the correctly-aligned location, set offsets
db_copy_bytecode(bytes, db);
if (db_check_crc(db) != HS_SUCCESS) {
return HS_INVALID;
}
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
hs_database_t **db) {
if (!bytes || !db) {
return HS_INVALID;
}
*db = NULL;
// Decode and check the header
hs_database_t header;
hs_error_t ret = db_decode_header(&bytes, length, &header);
if (ret != HS_SUCCESS) {
return ret;
}
// Make sure the serialized database is for our platform
ret = db_check_platform(header.platform);
if (ret != HS_SUCCESS) {
return ret;
}
// Allocate space for new database
size_t dblength = sizeof(struct hs_database) + header.length;
struct hs_database *tempdb = hs_database_alloc(dblength);
ret = hs_check_alloc(tempdb);
if (ret != HS_SUCCESS) {
hs_database_free(tempdb);
return ret;
}
// Zero new space for safety
memset(tempdb, 0, dblength);
// Copy the decoded header into place
memcpy(tempdb, &header, sizeof(header));
// Copy the bytecode into the correctly-aligned location, set offsets
db_copy_bytecode(bytes, tempdb);
if (db_check_crc(tempdb) != HS_SUCCESS) {
hs_database_free(tempdb);
return HS_INVALID;
}
*db = tempdb;
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_database_size(const hs_database_t *db, size_t *size) {
if (!size) {
return HS_INVALID;
}
hs_error_t ret = validDatabase(db);
if (unlikely(ret != HS_SUCCESS)) {
return ret;
}
*size = sizeof(struct hs_database) + db->length;
return HS_SUCCESS;
}
HS_PUBLIC_API
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
size_t *size) {
// Decode and check the header
hs_database_t header;
hs_error_t ret = db_decode_header(&bytes, length, &header);
if (ret != HS_SUCCESS) {
return ret;
}
if (!size) {
return HS_INVALID;
}
*size = sizeof(struct hs_database) + header.length;
return HS_SUCCESS;
}
hs_error_t dbIsValid(const hs_database_t *db) {
if (db->magic != HS_DB_MAGIC) {
DEBUG_PRINTF("bad magic\n");
return HS_INVALID;
}
if (db->version != HS_DB_VERSION) {
DEBUG_PRINTF("bad version\n");
return HS_DB_VERSION_ERROR;
}
if (db_check_platform(db->platform) != HS_SUCCESS) {
DEBUG_PRINTF("bad platform\n");
return HS_DB_PLATFORM_ERROR;
}
if (!ISALIGNED_16(hs_get_bytecode(db))) {
DEBUG_PRINTF("bad alignment\n");
return HS_INVALID;
}
hs_error_t rv = db_check_crc(db);
if (rv != HS_SUCCESS) {
DEBUG_PRINTF("bad crc\n");
return rv;
}
return HS_SUCCESS;
}
/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
* \ref hs_database, ensuring that it is padded correctly to give cacheline
* alignment. */
hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
size_t db_len = sizeof(struct hs_database) + len;
DEBUG_PRINTF("db size %zu\n", db_len);
DEBUG_PRINTF("db platform %llx\n", platform);
struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
if (hs_check_alloc(db) != HS_SUCCESS) {
hs_database_free(db);
return NULL;
}
// So that none of our database is uninitialized
memset(db, 0, db_len);
// we need to align things manually
size_t shift = (uintptr_t)db->bytes & 0x3f;
DEBUG_PRINTF("shift is %zu\n", shift);
db->bytecode = offsetof(struct hs_database, bytes) - shift;
char *bytecode = (char *)db + db->bytecode;
assert(ISALIGNED_CL(bytecode));
db->magic = HS_DB_MAGIC;
db->version = HS_DB_VERSION;
db->length = len;
db->platform = platform;
// Copy bytecode
memcpy(bytecode, in_bytecode, len);
db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
return db;
}
#if defined(_WIN32)
#define SNPRINTF_COMPAT _snprintf
#else
#define SNPRINTF_COMPAT snprintf
#endif
/** Allocate a buffer and prints the database info into it. Returns an
* appropriate error code on failure, or HS_SUCCESS on success. */
static
hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
u32 raw_mode) {
assert(s);
*s = NULL;
u8 release = (version >> 8) & 0xff;
u8 minor = (version >> 16) & 0xff;
u8 major = (version >> 24) & 0xff;
const char *avx2 = (plat & HS_PLATFORM_NOAVX2) ? "NOAVX2" : " AVX2";
const char *mode = NULL;
if (raw_mode == HS_MODE_STREAM) {
mode = "STREAM";
} else if (raw_mode == HS_MODE_VECTORED) {
mode = "VECTORED";
} else {
assert(raw_mode == HS_MODE_BLOCK);
mode = "BLOCK";
}
// Initial allocation size, which should be large enough to print our info.
// If it isn't, snprintf will tell us and we can resize appropriately.
size_t len = 256;
while (1) {
char *buf = hs_misc_alloc(len);
hs_error_t ret = hs_check_alloc(buf);
if (ret != HS_SUCCESS) {
hs_misc_free(buf);
return ret;
}
// Note: SNPRINTF_COMPAT is a macro defined above, to cope with systems
// that don't have snprintf but have a workalike.
int p_len = SNPRINTF_COMPAT(
buf, len, "Version: %u.%u.%u Features: %s Mode: %s",
major, minor, release, avx2, mode);
if (p_len < 0) {
DEBUG_PRINTF("snprintf output error, returned %d\n", p_len);
hs_misc_free(buf);
break;
} else if ((size_t)p_len < len) { // output fit within buffer.
assert(buf[p_len] == '\0');
*s = buf;
return HS_SUCCESS;
} else { // output didn't fit: resize and reallocate.
len = (size_t)p_len + 1; // must add one for null terminator.
hs_misc_free(buf);
}
}
return HS_NOMEM;
}
HS_PUBLIC_API
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
char **info) {
if (!info) {
return HS_INVALID;
}
*info = NULL;
if (!bytes || length < sizeof(struct hs_database)) {
return HS_INVALID;
}
const u32 *buf = (const u32 *)bytes;
u32 magic = unaligned_load_u32(buf++);
if (magic != HS_DB_MAGIC) {
return HS_INVALID;
}
u32 version = unaligned_load_u32(buf++);
buf++; /* length */
platform_t plat;
plat = unaligned_load_u64a(buf);
buf += 2;
buf++; /* crc */
buf++; /* reserved 0 */
buf++; /* reserved 1 */
const char *t_raw = (const char *)buf;
u32 mode = unaligned_load_u32(t_raw + offsetof(struct RoseEngine, mode));
return print_database_string(info, version, plat, mode);
}
HS_PUBLIC_API
hs_error_t hs_database_info(const hs_database_t *db, char **info) {
if (!info) {
return HS_INVALID;
}
*info = NULL;
if (!db || !db_correctly_aligned(db) || db->magic != HS_DB_MAGIC) {
return HS_INVALID;
}
platform_t plat;
plat = db->platform;
const struct RoseEngine *rose = hs_get_bytecode(db);
return print_database_string(info, db->version, plat, rose->mode);
}

119
src/database.h Normal file
View File

@@ -0,0 +1,119 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime code for hs_database manipulation.
*/
#ifndef DATABASE_H_D467FD6F343DDE
#define DATABASE_H_D467FD6F343DDE
#ifdef __cplusplus
extern "C"
{
#endif
#include "hs_compile.h" // for HS_MODE_ flags
#include "hs_version.h"
#include "ue2common.h"
#define HS_DB_VERSION HS_VERSION_32BIT
#define HS_DB_MAGIC (0xdbdbdbdbU)
// Values in here cannot (easily) change - add new ones!
// CPU type is the low 6 bits (we can't need more than 64, surely!)
#define HS_PLATFORM_INTEL 1
#define HS_PLATFORM_CPU_MASK 0x3F
#define HS_PLATFORM_NOAVX2 (4<<13)
/** \brief Platform features bitmask. */
typedef u64a platform_t;
static UNUSED
const platform_t hs_current_platform = {
#if !defined(__AVX2__)
HS_PLATFORM_NOAVX2 |
#endif
0,
};
static UNUSED
const platform_t hs_current_platform_no_avx2 = {
HS_PLATFORM_NOAVX2 |
0,
};
/*
* a header to enclose the actual bytecode - useful for keeping info about the
* compiled data.
*/
struct hs_database {
u32 magic;
u32 version;
u32 length;
u64a platform;
u32 crc32;
u32 reserved0;
u32 reserved1;
u32 bytecode; // offset relative to db start
u32 padding[16];
char bytes[];
};
static really_inline
const void *hs_get_bytecode(const struct hs_database *db) {
return ((const char *)db + db->bytecode);
}
/**
* Cheap database sanity checks used in block mode scan calls and streaming
* mode open calls.
*/
static really_inline
hs_error_t validDatabase(const hs_database_t *db) {
if (!db || db->magic != HS_DB_MAGIC) {
return HS_INVALID;
}
if (db->version != HS_DB_VERSION) {
return HS_DB_VERSION_ERROR;
}
return HS_SUCCESS;
}
hs_error_t dbIsValid(const struct hs_database *db);
struct hs_database *dbCreate(const char *bytecode, size_t len, u64a platform);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* DATABASE_H_D467FD6F343DDE */

39
src/fdr/CMakeLists.txt Normal file
View File

@@ -0,0 +1,39 @@
# The set of rules and other nastiness for generating FDR/Teddy source
# we need to add these as explicit dependencies
set(AUTOGEN_PY_FILES
arch.py
autogen.py
autogen_utils.py
base_autogen.py
fdr_autogen.py
teddy_autogen.py
)
function(fdr_autogen type out)
add_custom_command (
COMMENT "AUTOGEN ${out}"
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${out}
COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/autogen.py ${type} > ${CMAKE_CURRENT_BINARY_DIR}/${out}
DEPENDS ${AUTOGEN_PY_FILES}
)
add_custom_target(autogen_${type} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${out})
endfunction(fdr_autogen)
#now build the functions
fdr_autogen(runtime fdr_autogen.c)
fdr_autogen(compiler fdr_autogen_compiler.cpp)
fdr_autogen(teddy_runtime teddy_autogen.c)
fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp)
set(fdr_GENERATED_SRC
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen.c
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen.c
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
PARENT_SCOPE)
set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE)
include_directories(${CMAKE_CURRENT_BINARY_DIR})

58
src/fdr/arch.py Executable file
View File

@@ -0,0 +1,58 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import autogen_utils
# wrapper for architectures
class Arch:
def __init__(self, name, extensions = []):
self.name = name
self.extensions = extensions
self.target = None
def get_guard(self):
# these defines definitely fall into the "belt-and-suspenders"
# category of paranoia
if (self.guard_list == []):
return "#if 1"
return "#if " + " && ".join(self.guard_list)
class X86Arch(Arch):
def __init__(self, name, extensions = []):
Arch.__init__(self, name, extensions)
self.guard_list = [ ]
self.target = "0"
if "AVX2" in extensions:
self.target += " | HS_CPU_FEATURES_AVX2"
self.guard_list += [ "defined(__AVX2__)" ]
arch_x86_64 = X86Arch("x86_64", extensions = [ ])
arch_x86_64_avx2 = X86Arch("x86_64_avx2", extensions = [ "AVX2" ])

159
src/fdr/autogen.py Executable file
View File

@@ -0,0 +1,159 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
from autogen_utils import *
from fdr_autogen import *
from teddy_autogen import *
from arch import *
# FDR setup
# these are either produced - if the guard succeeds, or #defined to zeroes.
# either the function or the zero is fine in our array of function pointers
def produce_fdr_runtimes(l):
for m in l:
m.produce_code()
def produce_fdr_compiles(l):
print "void getFdrDescriptions(vector<FDREngineDescription> *out) {"
print " static const FDREngineDef defns[] = {"
for m in l:
m.produce_compile_call()
print " };"
print " out->clear();"
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
print " out->push_back(FDREngineDescription(defns[i]));"
print " }"
print "}"
def build_fdr_matchers():
all_matchers = [ ]
domains = [8, 10, 11, 12, 13]
big_domains = [ 14, 15 ]
common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 }
for d in domains:
all_matchers += [ M3(stride = 1, domain = d, **common) ]
all_matchers += [ M3(stride = 2, domain = d, **common) ]
all_matchers += [ M3(stride = 4, domain = d, **common) ]
for d in big_domains:
all_matchers += [ M3(stride = 1, domain = d, **common) ]
return all_matchers
# teddy setup
def build_teddy_matchers():
all_matchers = [ ]
# AVX2
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = False) ]
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = True) ]
for n_msk in range(1, 5):
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = False, num_masks = n_msk, num_buckets = 16) ]
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = True, num_masks = n_msk, num_buckets = 16) ]
# SSE/SSE2/SSSE3
for n_msk in range(1, 5):
all_matchers += [ MT(arch = arch_x86_64, packed = False, num_masks = n_msk, num_buckets = 8) ]
all_matchers += [ MT(arch = arch_x86_64, packed = True, num_masks = n_msk, num_buckets = 8) ]
return all_matchers
def produce_teddy_compiles(l):
print "void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {"
print " static const TeddyEngineDef defns[] = {"
for m in l:
m.produce_compile_call()
print " };"
print " out->clear();"
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
print " out->push_back(TeddyEngineDescription(defns[i]));"
print " }"
print "}"
# see below - we don't produce our 'zeros' at the point of the teddy runtimes as they
# are linked. So we either generate the function or we don't - then at the point of the
# header in fdr_autogen.c we either generate the header or we #define the zero.
def produce_teddy_runtimes(l):
# Since we're using -Wmissing-prototypes, we need headers first.
for m in l:
m.produce_guard()
print m.produce_header(visible = True, header_only = True)
m.close_guard()
for m in l:
m.produce_guard()
m.produce_code()
m.close_guard()
# see produce_teddy_runtimes() comment for the rationale
def produce_teddy_headers(l):
for m in l:
m.produce_guard()
print m.produce_header(visible = True, header_only = True)
m.produce_zero_alternative()
# general utilities
def make_fdr_function_pointers(matcher_list):
print """
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
static FDRFUNCTYPE funcs[] = {
"""
all_funcs = ",\n".join([ " %s" % m.get_name() for m in matcher_list ])
print all_funcs
print """
};
"""
def assign_ids(matcher_list, next_id):
for m in matcher_list:
m.id = next_id
next_id += 1
return next_id
# Main entry point
m = build_fdr_matchers()
next_id = assign_ids(m, 0)
tm = build_teddy_matchers()
next_id = assign_ids(tm, next_id)
if sys.argv[1] == "compiler":
produce_fdr_compiles(m)
elif sys.argv[1] == "runtime":
produce_fdr_runtimes(m)
produce_teddy_headers(tm)
make_fdr_function_pointers(m+tm)
elif sys.argv[1] == "teddy_runtime":
produce_teddy_runtimes(tm)
elif sys.argv[1] == "teddy_compiler":
produce_teddy_compiles(tm)

285
src/fdr/autogen_utils.py Executable file
View File

@@ -0,0 +1,285 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
def fail_out(msg = ""):
print >>sys.stderr, "Internal failure in autogen.py: " + msg
sys.exit(1)
class IntegerType:
def __init__(self, size):
self.size = size
def get_name(self):
return { 256: "m256", 128 : "m128", 64 : "u64a", 32 : "u32" , 16 : "u16", 8 : "u8"}[self.size]
def size_in_bytes(self):
return self.size / 8
def isSIMDOnIntel(self):
return False
def zero_expression(self):
return "0"
def constant_to_string(self, n):
if self.size == 64:
suffix = "ULL"
else:
suffix = ""
return "0x%x%s" % (n & ((1 << self.size) - 1), suffix)
def lowbits(self, n):
return (1 << n) - 1
def highbits(self, n):
return ~(self.lowbits(self.size - n))
def lowbit_mask(self, n):
return self.constant_to_string(self.lowbits(n))
def highbit_mask(self, n):
return self.constant_to_string(self.highbits(n))
def lowbit_extract_expr(self, expr_string, n):
return "(%s & %s)" % ( expr_string, self.lowbit_mask(n))
def highbit_extract_expr(self, expr_string, n):
return "(%s >> %d)" % (expr_string, self.size - n)
def flip_lowbits_expr(self, expr_string, n):
return "(%s ^ %s)" % ( expr_string, self.lowbit_mask(n))
def bit_extract_expr(self, expr_string, low, high):
lbm = self.lowbit_mask(high - low)
return "((%s >> %d) & %s)" % (expr_string, low, lbm)
# shifts are +ve if left and -ve if right
def shift_expr(self, expr_string, n):
if n <= -self.size or n >= self.size:
return self.zero_expression()
elif (n > 0):
return "(%s << %d)" % (expr_string, n)
elif (n < 0):
return "(%s >> %d)" % (expr_string, -n)
else:
return "(%s)" % (expr_string)
# code is:
# "normal" (always between buf and len) - the default
# "aligned" (means normal + aligned to a natural boundary)
# "cautious_forward" (means may go off the end of buf+len)
# "cautious_backwards" (means may go off the start of buf)
# "cautious_everywhere" (means may go off both)
def load_expr_data(self, offset = 0, code = "normal",
base_string = "ptr", bounds_lo = "buf", bounds_hi = "buf + len"):
if code is "normal":
return "lv_%s(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
elif code is "aligned":
if self.size is 8:
fail_out("no aligned byte loads")
return "lv_%s_a(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
elif code is "cautious_forward":
return "lv_%s_cf(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
elif code is "cautious_backward":
return "lv_%s_cb(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
elif code is "cautious_everywhere":
return "lv_%s_ce(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
class SIMDIntegerType(IntegerType):
def __init__(self, size):
IntegerType.__init__(self, size)
def isSIMDOnIntel(self):
return True
def zero_expression(self):
return "zeroes128()"
def lowbit_extract_expr(self, expr_string, n):
if (n <= 32):
tmpType = IntegerType(32)
tmpExpr = "movd(%s)" % expr_string
elif (32 < n <= 64):
tmpType = IntegerType(64)
tmpExpr = "movq(%s)" % expr_string
return tmpType.lowbit_extract_expr(tmpExpr, n)
def highbit_extract_expr(self, expr_string, n):
fail_out("Unimplemented high bit extract on m128")
def bit_extract_expr(self, expr_string, low, high, flip):
fail_out("Unimplemented bit extract on m128")
def shift_expr(self, expr_string, n):
if n % 8 != 0:
fail_out("Trying to shift a m128 by a bit granular value")
# should check that n is divisible by 8
if n <= -self.size or n >= self.size:
return self.zero_expression()
elif (n > 0):
return "_mm_slli_si128(%s, %s)" % (expr_string, n / 8)
elif (n < 0):
return "_mm_srli_si128(%s, %s)" % (expr_string, -n / 8)
else:
return "(%s)" % (expr_string)
def lowbit_mask(self, n):
if n % 8 != 0:
fail_out("Trying to make a lowbit mask in a m128 by a bit granular value")
return self.shift_expr("ones128()", -(128 - n))
def getRequiredType(bits):
if bits == 128:
return SIMDIntegerType(bits)
for b in [ 8, 16, 32, 64]:
if (bits <= b):
return IntegerType(b)
return None
class IntegerVariable:
def __init__(self, name, type):
self.name = name
self.type = type
def gen_initializer_stmt(self, initialization_string = None):
if initialization_string:
return "%s %s = %s;" % (self.type.get_name(), self.name, initialization_string)
else:
return "%s %s;" % (self.type.get_name(), self.name)
class Step:
def __init__(self, context, offset = 0):
self.context = context
self.matcher = context.matcher
self.offset = offset
self.latency = 1
self.dependency_list = []
self.latest = None
self.context.add_step(self)
# return a string, complete with indentation
def emit(self):
indent = " " * (self.offset*2 + self.matcher.default_body_indent)
s = "\n".join( [ indent + line for line in self.val.split("\n")] )
if self.latest:
s += " // " + str(self.debug_step) + " L" + str(self.latency) + " LTST:%d" % self.latest
if self.dependency_list:
s += " Derps: "
for (d,l) in self.dependency_list:
s += "%d/%d " % (d.debug_step,l)
return s
def add_dependency(self, step, anti_dependency = False, output_dependency = False):
if anti_dependency or output_dependency:
self.dependency_list += [ (step, 1) ]
else:
self.dependency_list += [ (step, step.latency) ]
def nv(self, type, var_name):
return self.context.new_var(self, type, var_name)
def gv(self, var_name, reader = True, writer = False):
return self.context.get_var(self, var_name, reader = reader, writer = writer)
# utility steps, generic
class LabelStep(Step):
def __init__(self, context, offset = 0, label_prefix = "off"):
Step.__init__(self, context, offset)
self.val = "%s%d: UNUSED;" % (label_prefix, offset)
class OpenScopeStep(Step):
def __init__(self, context, offset = 0):
Step.__init__(self, context, offset)
self.val = "{"
class CloseScopeStep(Step):
def __init__(self, context, offset = 0):
Step.__init__(self, context, offset)
self.val = "}"
class CodeGenContext:
def __init__(self, matcher):
self.vars = {}
self.steps = []
self.ctr = 0
self.matcher = matcher
self.var_writer = {} # var to a single writer
self.var_readers = {} # var to a list of all the readers that read the last value
def new_var(self, step, type, var_name):
var = IntegerVariable(var_name, type)
self.vars[var_name] = var
self.var_writer[var_name] = step
return var
def get_var(self, step, var_name, reader = True, writer = False):
if reader:
writer_step = self.var_writer[var_name]
if writer_step:
step.add_dependency(writer_step)
self.var_readers.setdefault(var_name, []).append(step)
if writer and not reader:
if self.var_writer[var_name]:
step.add_dependency(self.var_writer[var_name], output_dependency = True)
if writer:
if self.var_readers.has_key(var_name):
for reader in [ r for r in self.var_readers[var_name] if r is not step ]:
step.add_dependency(reader, anti_dependency = True)
self.var_readers[var_name] = []
self.var_writer[var_name] = step
return self.vars[var_name]
def add_step(self, step):
self.steps += [ step ]
step.debug_step = self.ctr
self.ctr += 1
def dontschedule(self, finals):
return "\n".join( [ s.emit() for s in self.steps ] )
def schedule(self, finals):
for f in finals:
f.latest = f.latency
worklist = finals
while worklist:
current = worklist[0]
worklist = worklist[1:]
for (dep, lat) in current.dependency_list:
if dep.latest is None or dep.latest < (current.latest + dep.latency):
dep.latest = current.latest + lat
if dep not in worklist:
worklist += [ dep ]
self.steps.sort(reverse = True, key = lambda s : s.latest)
return "\n".join( [ s.emit() for s in self.steps ] )

167
src/fdr/base_autogen.py Normal file
View File

@@ -0,0 +1,167 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
from autogen_utils import *
from base_autogen import *
from string import Template
class MatcherBase:
def __init__(self):
pass
def get_name(self):
return "fdr_exec_%03d" % self.id
def produce_header(self, visible, header_only = False):
s = ""
if not visible:
s += "static never_inline"
s += """
hwlm_error_t %s(UNUSED const struct FDR *fdr,
UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name()
if header_only:
s += ";"
else:
s += "{"
s += "\n"
return s
def produce_guard(self):
print self.arch.get_guard()
def produce_zero_alternative(self):
print """
#else
#define %s 0
#endif
""" % self.get_name()
# trivial function for documentation/modularity
def close_guard(self):
print "#endif"
def produce_common_declarations(self):
return """
const u8 * buf = a->buf;
const size_t len = a->len;
const u8 * ptr = buf + a->start_offset;
hwlmcb_rv_t controlVal = *a->groups;
hwlmcb_rv_t * control = &controlVal;
u32 floodBackoff = FLOOD_BACKOFF_START;
const u8 * tryFloodDetect = a->firstFloodDetect;
UNUSED u32 bit, bitRem, confSplit, idx;
u32 byte, cf;
const struct FDRConfirm *fdrc;
u32 last_match = (u32)-1;
"""
def produce_continue_check(self):
return """if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
*a->groups = controlVal;
return HWLM_TERMINATED;
}
"""
def produce_flood_check(self):
return """
if (P0(ptr > tryFloodDetect)) {
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes);
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
*a->groups = controlVal;
return HWLM_TERMINATED;
}
}
"""
def produce_footer(self):
return """
*a->groups = controlVal;
return HWLM_SUCCESS;
}
"""
def produce_confirm_base(self, conf_var_name, conf_var_size, offset, cautious, enable_confirmless, do_bailout = False):
if cautious:
caution_string = "VECTORING"
else:
caution_string = "NOT_CAUTIOUS"
conf_split_mask = IntegerType(32).constant_to_string(
self.conf_top_level_split - 1)
if enable_confirmless:
quick_check_string = """
if (!fdrc->mult) {
u32 id = fdrc->nBitsOrSoleID;
if ((last_match == id) && (fdrc->flags & NoRepeat))
continue;
last_match = id;
controlVal = a->cb(ptr+byte-buf, ptr+byte-buf, id, a->ctxt);
continue;
} """
else:
quick_check_string = ""
if do_bailout:
bailout_string = """
if ((ptr + byte < buf + a->start_offset) || (ptr + byte >= buf + len)) continue;"""
else:
bailout_string = ""
return Template("""
if (P0(!!$CONFVAR)) {
do {
bit = findAndClearLSB_$CONFVAR_SIZE(&$CONFVAR);
byte = bit / $NUM_BUCKETS + $OFFSET;
bitRem = bit % $NUM_BUCKETS;
$BAILOUT_STRING
confSplit = *(ptr+byte) & $SPLIT_MASK;
idx = confSplit * $NUM_BUCKETS + bitRem;
cf = confBase[idx];
if (!cf)
continue;
fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);
if (!(fdrc->groups & *control))
continue;
$QUICK_CHECK_STRING
confWithBit(fdrc, a, ptr - buf + byte, $CAUTION_STRING, $CONF_PULL_BACK, control, &last_match);
} while(P0(!!$CONFVAR));
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
*a->groups = controlVal;
return HWLM_TERMINATED;
}
}""").substitute(CONFVAR = conf_var_name,
CONFVAR_SIZE = conf_var_size,
NUM_BUCKETS = self.num_buckets,
OFFSET = offset,
SPLIT_MASK = conf_split_mask,
QUICK_CHECK_STRING = quick_check_string,
BAILOUT_STRING = bailout_string,
CAUTION_STRING = caution_string,
CONF_PULL_BACK = self.conf_pull_back)
def indent(block, depth):
return "\n".join([ (" " * (4*depth)) + line for line in block.splitlines() ] )

View File

@@ -0,0 +1,49 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "engine_description.h"
#include "hs_compile.h" // for hs_platform_info
#include "util/target_info.h"
namespace ue2 {
EngineDescription::~EngineDescription() {}
bool EngineDescription::isValidOnTarget(const target_t &target_in) const {
return target_in.can_run_on_code_built_for(code_target);
}
target_t targetByArchFeatures(u64a cpu_features) {
hs_platform_info p;
p.tune = HS_TUNE_FAMILY_GENERIC;
p.cpu_features = cpu_features;
return target_t(p);
}
} // namespace ue2

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ENGINE_DESCRIPTION_H
#define ENGINE_DESCRIPTION_H
#include "ue2common.h"
#include "util/target_info.h"
namespace ue2 {
class EngineDescription {
u32 id;
target_t code_target; // the target that we built this code for
u32 numBuckets;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
public:
EngineDescription(u32 id_in, const target_t &code_target_in,
u32 numBuckets_in, u32 confirmPullBackDistance_in,
u32 confirmTopLevelSplit_in)
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in),
confirmPullBackDistance(confirmPullBackDistance_in),
confirmTopLevelSplit(confirmTopLevelSplit_in) {}
virtual ~EngineDescription();
u32 getID() const { return id; }
u32 getNumBuckets() const { return numBuckets; }
u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; }
u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; }
bool isValidOnTarget(const target_t &target_in) const;
virtual u32 getDefaultFloodSuffixLength() const = 0;
virtual bool typicallyHoldsOneCharLits() const { return true; }
};
/** Returns a target given a CPU feature set value. */
target_t targetByArchFeatures(u64a cpu_features);
} // namespace ue2
#endif

126
src/fdr/fdr.c Normal file
View File

@@ -0,0 +1,126 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "util/simd_utils.h"
#define P0(cnd) unlikely(cnd)
#include "fdr.h"
#include "fdr_internal.h"
#include "teddy_internal.h"
#include "flood_runtime.h"
#include "fdr_confirm.h"
#include "fdr_confirm_runtime.h"
#include "fdr_streaming_runtime.h"
#include "fdr_loadval.h"
static really_inline UNUSED
u32 getPreStartVal(const struct FDR_Runtime_Args *a, u32 numBits) {
u32 r = 0;
if (a->start_offset == 0) {
if (numBits <= 8) {
r = a->buf_history[a->len_history - 1];
} else {
r = a->buf_history[a->len_history - 1];
r |= (a->buf[0] << 8);
}
} else {
if (numBits <= 8) {
r = a->buf[a->start_offset - 1];
} else {
r = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
}
}
return r & ((1 << numBits) - 1);
}
#include "fdr_autogen.c"
#define FAKE_HISTORY_SIZE 16
static const u8 fake_history[FAKE_HISTORY_SIZE];
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, size_t start,
HWLMCallback cb, void *ctxt, hwlm_group_t groups) {
const struct FDR_Runtime_Args a = {
buf,
len,
fake_history,
0,
fake_history, // nocase
0,
start,
cb,
ctxt,
&groups,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
0
};
if (unlikely(a.start_offset >= a.len)) {
return HWLM_SUCCESS;
} else {
assert(funcs[fdr->engineID]);
return funcs[fdr->engineID](fdr, &a);
}
}
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups, u8 * stream_state) {
struct FDR_Runtime_Args a = {
buf,
len,
hbuf,
hlen,
hbuf, // nocase - start same as caseful, override later if needed
hlen, // nocase
start,
cb,
ctxt,
&groups,
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
hbuf ? CONF_LOADVAL_CALL_CAUTIOUS(hbuf + hlen - 8, hbuf, hbuf + hlen)
: (u64a)0
};
fdrUnpackState(fdr, &a, stream_state);
hwlm_error_t ret;
if (unlikely(a.start_offset >= a.len)) {
ret = HWLM_SUCCESS;
} else {
assert(funcs[fdr->engineID]);
ret = funcs[fdr->engineID](fdr, &a);
}
fdrPackState(fdr, &a, stream_state);
return ret;
}

91
src/fdr/fdr.h Normal file
View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: runtime API.
*/
#ifndef FDR_H
#define FDR_H
#include "ue2common.h"
#include "hwlm/hwlm.h"
// C linkage in the API
#ifdef __cplusplus
extern "C" {
#endif
struct FDR;
/** \brief Returns size in bytes of the given FDR engine. */
size_t fdrSize(const struct FDR *fdr);
/** \brief Returns non-zero if the contents of the stream state indicate that
* there is active FDR history beyond the regularly used history. */
u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state);
/**
* \brief Block-mode scan.
*
* \param fdr FDR matcher engine.
* \param buf Buffer to scan.
* \param len Length of buffer to scan.
* \param start First offset in buf at which a match may end.
* \param cb Callback to call when a match is found.
* \param ctxt Caller-provided context pointer supplied to callback on match.
* \param groups Initial groups mask.
*/
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups);
/**
* \brief Streaming-mode scan.
*
* \param fdr FDR matcher engine.
* \param hbuf History buffer.
* \param hlen Length of history buffer (hbuf).
* \param buf Buffer to scan.
* \param len Length of buffer to scan (buf).
* \param start First offset in buf at which a match may end.
* \param cb Callback to call when a match is found.
* \param ctxt Caller-provided context pointer supplied to callback on match.
* \param groups Initial groups mask.
* \param stream_state Persistent stream state for use by FDR.
*/
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups, u8 *stream_state);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // FDR_H

574
src/fdr/fdr_autogen.py Executable file
View File

@@ -0,0 +1,574 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
from autogen_utils import *
from base_autogen import *
from string import Template
class OrStep(Step):
def __init__(self, context, offset, width):
Step.__init__(self, context, offset)
s_var = self.gv("st%d" % offset)
if width < 128:
self.val = "s |= %s;" % s_var.name
else:
self.val = "s = or%d(s, %s);" % (width, s_var.name)
class ShiftStateStep(Step):
def __init__(self, context, offset = 0, stride_used = 1):
Step.__init__(self, context, offset)
m = self.matcher
state = m.state_variable
shift_distance = -1 * stride_used * m.num_buckets
self.val = "%s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
class BulkLoadStep(Step):
def __init__(self, context, offset, size, define_var = True, aligned = True):
Step.__init__(self, context, offset)
m = self.matcher
self.latency = 4
blt = m.bulk_load_type
if aligned:
init_string = blt.load_expr_data(self.offset, code = "aligned")
else:
init_string = blt.load_expr_data(self.offset)
var_name = "current_data_%d" % offset
if define_var:
lb_var = self.nv(blt, var_name)
self.val = lb_var.gen_initializer_stmt(init_string)
else:
lb_var = self.gv(var_name, reader = False, writer = True)
self.val = "%s = %s;" % (var_name, init_string)
class ValueExtractStep(Step):
def __init__(self, context, offset, sub_load_cautious = False):
Step.__init__(self, context, offset)
m = self.matcher
self.latency = 2
dsb = m.datasize_bytes
modval = offset % dsb
if m.domain > 8 and modval == dsb - 1:
# Case 1: reading more than one byte over the end of the bulk load
self.latency = 4
if sub_load_cautious:
code_string = "cautious_forward"
else:
code_string = "normal"
load_string = m.single_load_type.load_expr_data(self.offset, code_string)
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
else:
# Case 2: reading a value that can be found entirely in the current register
if m.fdr2_force_naive_load:
load_string = m.single_load_type.load_expr_data(self.offset, "normal")
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
else:
lb_var = self.gv("current_data_%d" % (offset - modval))
if modval == 0:
# Case 2a: value is at LSB end of the register and must be left-
# shifted into place if there is a "reach_shift_adjust" required
temp_string = "(%s << %d)" % (lb_var.name, m.reach_shift_adjust)
else:
# Case 2b: value is in the middle of the register and will be
# right-shifted into place (adjusted by "reach_shift_adjust")
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
init_string = "(%s) & 0x%x" % (temp_string, m.reach_mask)
v_var = self.nv(m.value_extract_type, "v%d" % offset)
self.val = v_var.gen_initializer_stmt(init_string)
class TableLookupStep(Step):
def __init__(self, context, reach_multiplier, offset = 0):
Step.__init__(self, context, offset)
m = self.matcher
self.latency = 4
v_var = self.gv("v%d" % offset)
s_var = self.nv(m.state_type, "st%d" % offset)
init_string = "*(const %s *)(ft + %s*%dU)" % ( m.state_type.get_name(),
v_var.name, reach_multiplier)
self.val = s_var.gen_initializer_stmt(init_string)
class ShiftReachMaskStep(Step):
def __init__(self, context, offset):
Step.__init__(self, context, offset)
m = self.matcher
extr = m.extract_frequency
modval = offset % extr
s_var = self.gv("st%d" % offset, writer = True)
self.val = "%s = %s;" % (s_var.name, s_var.type.shift_expr(s_var.name, modval * m.num_buckets))
class ConfExtractStep(Step):
def __init__(self, context, offset):
Step.__init__(self, context, offset)
m = self.matcher
if m.state_type.isSIMDOnIntel():
self.latency = 2
init_string = m.state_type.lowbit_extract_expr("s", m.extract_size)
extr_var = self.nv(m.extr_type, "extr%d" % offset)
self.val = extr_var.gen_initializer_stmt(init_string)
class ConfAccumulateStep(Step):
def __init__(self, context, extract_offset, conf_offset, define_var = True):
Step.__init__(self, context, extract_offset)
m = self.matcher
extr_var = self.gv("extr%d" % extract_offset)
extr_var_cast = "((%s)%s)" % (m.conf_type.get_name(), extr_var.name)
if extract_offset == conf_offset:
# create conf_var as a straight copy of extr
if define_var:
conf_var = self.nv(m.conf_type, "conf%d" % conf_offset)
self.val = conf_var.gen_initializer_stmt(extr_var_cast)
else:
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
self.val = "%s = %s;" % (conf_var.name, extr_var_cast)
else:
# shift extr_var and insert/OR it in conf_var
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
shift_dist = (extract_offset - conf_offset) * m.num_buckets
self.val = "%s |= %s;" % (conf_var.name, m.conf_type.shift_expr(extr_var_cast, shift_dist))
self.latency = 2
class ConfirmFlipStep(Step):
def __init__(self, context, offset):
Step.__init__(self, context, offset)
m = self.matcher
conf_var = self.gv("conf%d" % self.offset, writer = True)
self.val = "%s = %s;" % (conf_var.name,
conf_var.type.flip_lowbits_expr(conf_var.name, self.matcher.confirm_frequency * m.num_buckets))
class ConfirmStep(Step):
def __init__(self, context, offset, cautious = False):
Step.__init__(self, context, offset)
m = self.matcher
conf_var = self.gv("conf%d" % offset, writer = True)
self.val = m.produce_confirm_base(conf_var.name, conf_var.type.size, offset, cautious,
enable_confirmless = m.stride == 1, do_bailout = False)
class M3(MatcherBase):
def get_hash_safety_parameters(self):
h_size = self.single_load_type.size_in_bytes()
return (0, h_size - 1)
def produce_compile_call(self):
print " { %d, %d, %d, %d, %d, %s, %d, %d }," % (
self.id, self.state_width, self.num_buckets,
self.stride, self.domain,
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
def produce_main_loop(self, switch_variant = False):
stride_offsets = xrange(0, self.loop_bytes, self.stride)
stride_offsetSet = set(stride_offsets)
so_steps_last_block = []
sh = None
last_confirm = None
ctxt = CodeGenContext(self)
if switch_variant:
print " ptr -= (iterBytes - dist);"
print " { " # need an extra scope around switch variant to stop its globals escaping
else:
print " if (doMainLoop) {"
print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {"
print self.produce_flood_check()
print " __builtin_prefetch(ptr + (iterBytes*4));"
print " assert(((size_t)ptr % START_MOD) == 0);"
# just do globally for now
if switch_variant:
subsidiary_load_cautious = True
confirm_cautious = True
else:
subsidiary_load_cautious = False
confirm_cautious = False
if not self.fdr2_force_naive_load:
bulk_load_steps = [ off for off in range(self.loop_bytes)
if off % self.datasize_bytes == 0 and
(set(range(off, off + self.datasize_bytes - 1)) & stride_offsetSet)]
else:
bulk_load_steps = []
confirm_steps = [ off for off in range(self.loop_bytes) if off % self.confirm_frequency == 0 ]
for off in bulk_load_steps:
lb_var = ctxt.new_var(None, self.bulk_load_type, "current_data_%d" % off)
print " " + lb_var.gen_initializer_stmt()
for off in confirm_steps:
var_name = "conf%d" % off
conf_def_var = ctxt.new_var(None, self.conf_type, var_name)
if switch_variant:
init_string = "(%s)-1" % self.conf_type.get_name()
else:
init_string = ""
print " " + conf_def_var.gen_initializer_stmt(init_string)
if switch_variant:
print " switch(iterBytes - dist) {"
for i in range(0, self.loop_bytes):
print " case %d:" % i
# init and poison conf; over-precise but harmless
conf_id = (i / self.confirm_frequency) * self.confirm_frequency
if i % self.confirm_frequency:
conf_fixup_bits = self.conf_type.size - (self.num_buckets * (i % self.confirm_frequency))
print " conf%d >>= %d;" % (conf_id, conf_fixup_bits)
else:
print " conf%d = 0;" % conf_id
# init state
state_fixup = i % self.extract_frequency
state = self.state_variable
shift_distance = self.num_buckets * state_fixup
if state_fixup:
print " %s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
if self.state_width < 128:
print " %s |= %s;" % (state.name, state.type.lowbit_mask(shift_distance))
else:
print " %s = or%d(%s, %s);" % (state.name, self.state_width, state.name, state.type.lowbit_mask(shift_distance))
if not self.fdr2_force_naive_load:
# init current_data (could poison it in some cases)
load_mod = i % self.datasize_bytes
load_offset = i - load_mod
if load_mod:
# not coming in on an even boundary means having to do a load var
# actually, there are a bunch of things we can do on this bulk load
# to avoid having to be 'cautious_backwards' but I'm not completely
# sure they are good ideas
init_string = self.bulk_load_type.load_expr_data(load_offset,
code = "cautious_backward")
var_name = "current_data_%d" % load_offset
lb_var = ctxt.get_var(None, var_name, reader = False, writer = True)
print " %s = %s;" % (lb_var.name, init_string)
print " goto off%d;" % i
print " case %d: goto skipSwitch;" % self.loop_bytes
print " }"
print " {"
for off in range(self.loop_bytes):
# X_mod is the offset we're up to relative to the last X operation
# X_offset is which of the last X operations matches this iteration
if (switch_variant):
LabelStep(ctxt, off)
if off in bulk_load_steps:
if not self.fdr2_force_naive_load:
BulkLoadStep(ctxt, off, self.datasize, define_var = False, aligned = not switch_variant)
if off in stride_offsets:
if switch_variant:
OpenScopeStep(ctxt, off)
ValueExtractStep(ctxt, off, sub_load_cautious = subsidiary_load_cautious)
TableLookupStep(ctxt, self.reach_mult, off)
if off % self.extract_frequency:
ShiftReachMaskStep(ctxt, off)
so = OrStep(ctxt, off, self.state_width)
if switch_variant:
CloseScopeStep(ctxt, off)
if sh != None:
so.add_dependency(sh)
so_steps_last_block += [ so ]
extract_mod = off % self.extract_frequency
extract_offset = off - extract_mod
extract_ready = extract_mod == self.extract_frequency - 1
if extract_ready:
if switch_variant:
OpenScopeStep(ctxt, off)
ex = ConfExtractStep(ctxt, extract_offset)
ConfAccumulateStep(ctxt, extract_offset, confirm_offset, define_var = False)
for so_step in so_steps_last_block:
ex.add_dependency(so_step)
if switch_variant:
CloseScopeStep(ctxt, off)
so_steps_last_block = []
sh = ShiftStateStep(ctxt, extract_offset, stride_used = self.extract_frequency)
sh.add_dependency(ex)
confirm_mod = off % self.confirm_frequency
confirm_offset = off - confirm_mod
confirm_ready = confirm_mod == self.confirm_frequency - 1
if confirm_ready:
cflip = ConfirmFlipStep(ctxt, confirm_offset)
cf = ConfirmStep(ctxt, confirm_offset, cautious = confirm_cautious )
if last_confirm:
cf.add_dependency(last_confirm)
last_confirm = cf
if not switch_variant:
print ctxt.schedule([ last_confirm, sh ])
else:
print ctxt.dontschedule([ last_confirm, sh ])
if switch_variant:
print "skipSwitch:;"
print " ptr += iterBytes;"
print " }" # close extra scope around switch variant
print " }"
def produce_init_state(self):
state = self.state_variable
s_type = self.state_type
shift_distance = -1 * self.num_buckets
shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance))
s = Template("""
$TYPENAME s;
if (a->len_history) {
u32 tmp = getPreStartVal(a, $DOMAIN);
s = *((const $TYPENAME *)ft + tmp);
$SHIFT_EXPR;
} else {
s = *(const $TYPENAME *)&fdr->start;
}
""").substitute(TYPENAME = s_type.get_name(),
ZERO_EXPR = s_type.zero_expression(),
DOMAIN = self.domain,
SHIFT_EXPR = shift_expr)
return s
def produce_code(self):
(behind, ahead) = self.get_hash_safety_parameters()
loop_read_behind = behind
loop_read_ahead = self.loop_bytes + ahead
# we set up mask and shift stuff for extracting our masks from registers
#
# we have a choice as to whether to mask out the value early or
# extract the value (shift first) then mask it
#
# Intel has a free scaling factor from 1/2/4/8 so we want to combine
# the extra needed shift for SSE registers with the mask operation
ssb = self.state_type.size / 8 # state size in bytes
# Intel path
if ssb == 16 and self.domain == 16:
# obscure corner - we don't have the room in the register to
# do this for all values so we don't. domain==16 is pretty
# bad anyhow, of course
self.reach_mult = 8
else:
self.reach_mult = ssb
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
self.reach_mask = ((1 << self.domain) - 1) << self.reach_shift_adjust
print self.produce_header(visible = False)
print "// ",
print " Arch: " + self.arch.name,
print " State type: " + self.state_type.get_name(),
print " Num buckets: %d" % self.num_buckets,
print " Domain: %d" % self.domain,
print " Stride: %d" % self.stride
print self.produce_common_declarations()
print
print "\tconst size_t tabSize = %d;" % self.table_size
print """
const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));
const u32 * confBase = (const u32 *)(ft + tabSize);
"""
print self.produce_init_state()
print "\tconst size_t iterBytes = %d;" % self.loop_bytes
print "\tconst size_t START_MOD = %d;" % self.datasize_bytes
print "\tconst size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
print """
while (ptr < buf + len) {
u8 doMainLoop = 1;
size_t remaining = len - (ptr - buf);
size_t dist;
if (remaining <= iterBytes) {
dist = remaining; // once through the switch and we're done
} else if (remaining < 2 * iterBytes) {
// nibble some stuff off the front, skip the main loop,
// then come back here
dist = iterBytes; // maybe could be cleverer
} else {
// now, we need to see if we can make it to a main loop iteration
// if so, we need to ensure that the main loop iteration is aligned
// to a START_MOD boundary and i >= 8 so we can read ptr + i - 8
// see if we can do it - if not, just switch the main loop off,
// eat iterBytes in cautious mode, and come back to this loop
const u8 * target = MAX(buf + 8, ptr);
target = ROUNDUP_PTR(target, START_MOD);
dist = target - ptr;
if (dist > iterBytes) {
doMainLoop = 0;
dist = iterBytes;
}
}
"""
self.produce_main_loop(switch_variant = True)
self.produce_main_loop(switch_variant = False)
print """
}
"""
print self.produce_footer()
def get_name(self):
return "fdr_exec_%s_d%d_s%d_w%d" % (self.arch.name, self.domain, self.stride, self.state_width)
def __init__(self, state_width, domain, stride,
arch,
table_state_width = None,
num_buckets = 8,
extract_frequency = None,
confirm_frequency = None):
# First - set up the values that are fundamental to how this matcher will operate
self.arch = arch
# get the width of the state width on which we operate internally
if state_width not in [ 128 ]:
fail_out("Unknown state width: %d" % state_width)
self.state_width = state_width
self.state_type = getRequiredType(self.state_width)
self.state_variable = IntegerVariable("s", self.state_type)
table_state_width = state_width
self.table_state_width = state_width
self.table_state_type = getRequiredType(self.table_state_width)
# domain is the number of bits that we draw from our input to
# index our 'reach' table
if not 8 <= domain <= 16:
fail_out("Unsupported domain: %d" % domain)
self.domain = domain
# this is the load type required for this domain if we want to
# load it one at a time
self.single_load_type = getRequiredType(self.domain)
# table size
self.table_size = 2**domain * table_state_width // 8
# stride is the frequency with which we make data-driven
# accesses to our reach table
if stride not in [ 1, 2, 4, 8]:
fail_out("Unsupported stride: %d" % stride)
if stride * num_buckets > state_width:
fail_out("Stride %d is too big for the number of buckets %d given state width %d\n" % (stride, num_buckets, state_width))
self.stride = stride
if num_buckets != 8:
fail_out("Unsupported number of buckets: %d" % num_buckets)
if state_width % num_buckets and state_width == 128:
fail_out("Bucket scheme requires bit-shifts on m128 (failing)")
self.num_buckets = num_buckets
# Second - set up derived or optimization values - these can be
# overridden by arguments that are passed in
self.datasize = 64
self.bulk_load_type = IntegerType(self.datasize)
self.datasize_bytes = self.datasize/8
self.value_extract_type = IntegerType(self.datasize)
self.fdr2_force_naive_load = False # disable everywhere for trunk
# extract frequency is how frequently (in bytes) we destructively shift
# our state value after having pulled out that many bytes into a
# confirm register (of one sort or another).
# none means a default value - datasize, our biggest easily available GPR
if extract_frequency is None:
extract_frequency = self.datasize_bytes
self.extract_frequency = extract_frequency
self.extract_size = self.extract_frequency*self.num_buckets
if extract_frequency < stride:
fail_out("Can't extract at extract frequency %d with stride %d" % (extract_frequency, stride))
if extract_frequency not in [ None, 1, 2, 4, 8, 16]:
fail_out("Weird extract frequency: %d" % extract_frequency)
if self.extract_size <= 32:
self.extr_type = IntegerType(32)
elif self.extract_size <= 64:
self.extr_type = IntegerType(64)
else:
fail_out("Implausible size %d required for confirm extract step" % size)
# extract_frequency is how often we pull out our state and place
# it somewhere in a lossless fashion
# confirm_frequency, on the other hand, is how frequently we
# take the state extracted by extract_frequency and cobble it
# together into a matching loop
# confirm_frequency must be a multiple of extract_frequency
# and must fit into a fast register; for now; we're going to
# stay in the GPR domain
if confirm_frequency is None:
confirm_frequency = self.extract_frequency
self.confirm_frequency = confirm_frequency
if confirm_frequency % self.extract_frequency:
fail_out("Confirm frequency %d must be evenly divisible by extract_frequency %d" % (confirm_frequency, self.extract_frequency))
self.conf_size = self.confirm_frequency * self.num_buckets
if self.conf_size <= 32:
self.conf_type = IntegerType(32)
elif self.conf_size <= 64:
self.conf_type = IntegerType(64)
else:
fail_out("Implausible size %d required for confirm accumulate step" % self.conf_size)
# how many bytes in flight at once
self.loop_bytes = 16
# confirm configuration
# how many entries in the top-level confirm table - 256 means
# complete split on the last character
self.conf_top_level_split = 256
# how much we 'pull back' in confirm - this is obviously related
# to the first level conf but we will keep two separate paramters
# for this to avoid the risk of conflating these
self.conf_pull_back = 1
if self.conf_pull_back > 0 and self.conf_top_level_split < 256:
fail_out("Pull back distance %d not supported by top level split %d" % (self.conf_pull_back, self.conf_top_level_split))
# minor stuff
self.default_body_indent = 8

562
src/fdr/fdr_compile.cpp Normal file
View File

@@ -0,0 +1,562 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: build API.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile.h"
#include "fdr_confirm.h"
#include "fdr_compile_internal.h"
#include "fdr_engine_description.h"
#include "teddy_compile.h"
#include "teddy_engine_description.h"
#include "grey.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/dump_mask.h"
#include "util/target_info.h"
#include "util/ue2string.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <boost/core/noncopyable.hpp>
using namespace std;
namespace ue2 {
namespace {
class FDRCompiler : boost::noncopyable {
private:
const FDREngineDescription &eng;
vector<u8> tab;
const vector<hwlmLiteral> &lits;
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
bool make_small;
u8 *tabIndexToMask(u32 indexInTable);
void assignStringToBucket(LiteralIndex l, BucketIndex b);
void assignStringsToBuckets();
#ifdef DEBUG
void dumpMasks(const u8 *defaultMask);
#endif
void setupTab();
aligned_unique_ptr<FDR> setupFDR(pair<u8 *, size_t> link);
void createInitialState(FDR *fdr);
public:
FDRCompiler(const vector<hwlmLiteral> &lits_in,
const FDREngineDescription &eng_in, bool make_small_in)
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
make_small(make_small_in) {}
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
};
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
assert(indexInTable < tab.size());
return &tab[0] + (indexInTable * (eng.getSchemeWidth() / 8));
}
static
void setbit(u8 *msk, u32 bit) {
msk[bit / 8] |= 1U << (bit % 8);
}
static
void clearbit(u8 *msk, u32 bit) {
msk[bit / 8] &= ~(1U << (bit % 8));
}
static
void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) {
for (u32 i = 0; i < num_bytes; i++) {
dest[i] = a[i] & b[i];
}
}
void FDRCompiler::createInitialState(FDR *fdr) {
u8 *start = (u8 *)&fdr->start;
/* initial state should to be 1 in each slot in the bucket up to bucket
* minlen - 1, and 0 thereafter */
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
// Find the minimum length for the literals in this bucket.
const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
u32 min_len = ~0U;
for (vector<LiteralIndex>::const_iterator it = bucket_lits.begin(),
ite = bucket_lits.end();
it != ite; ++it) {
min_len = min(min_len, verify_u32(lits[*it].s.length()));
}
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
assert(min_len);
for (PositionInBucket i = 0; i < eng.getBucketWidth(b); i++) {
if (i < min_len - 1) {
setbit(start, eng.getSchemeBit(b, i));
}
}
}
}
aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
size_t tabSize = eng.getTabSizeBytes();
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
pair<u8 *, size_t> confirmTmp =
setupFullMultiConfs(lits, eng, bucketToLits, make_small);
assert(ISALIGNED_16(tabSize));
assert(ISALIGNED_16(confirmTmp.second));
assert(ISALIGNED_16(floodControlTmp.second));
assert(ISALIGNED_16(link.second));
size_t headerSize = ROUNDUP_16(sizeof(FDR));
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.second +
floodControlTmp.second + link.second);
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
"total=%zu\n",
headerSize, tabSize, confirmTmp.second, floodControlTmp.second,
size);
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
assert(fdr); // otherwise would have thrown std::bad_alloc
fdr->size = size;
fdr->engineID = eng.getID();
fdr->maxStringLen = verify_u32(maxLen(lits));
createInitialState(fdr.get());
u8 *fdr_base = (u8 *)fdr.get();
u8 * ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
copy(tab.begin(), tab.end(), ptr);
ptr += tabSize;
memcpy(ptr, confirmTmp.first, confirmTmp.second);
ptr += confirmTmp.second;
aligned_free(confirmTmp.first);
fdr->floodOffset = verify_u32(ptr - fdr_base);
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
ptr += floodControlTmp.second;
aligned_free(floodControlTmp.first);
if (link.first) {
fdr->link = verify_u32(ptr - fdr_base);
memcpy(ptr, link.first, link.second);
aligned_free(link.first);
} else {
fdr->link = 0;
}
return fdr;
}
void FDRCompiler::assignStringToBucket(LiteralIndex l, BucketIndex b) {
bucketToLits[b].push_back(l);
}
struct LitOrder {
explicit LitOrder(const vector<hwlmLiteral> &vl_) : vl(vl_) {}
bool operator()(const u32 &i1, const u32 &i2) const {
const string &i1s = vl[i1].s;
const string &i2s = vl[i2].s;
size_t len1 = i1s.size(), len2 = i2s.size();
if (len1 != len2) {
return len1 < len2;
} else {
string::const_reverse_iterator it1, it2;
tie(it1, it2) =
std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
if (it1 == i1s.rend()) {
return false;
}
return *it1 < *it2;
}
}
private:
const vector<hwlmLiteral> &vl;
};
static u64a getScoreUtil(u32 len, u32 count) {
if (len == 0) {
return (u64a)-1;
}
const u32 LEN_THRESH = 128;
const u32 elen = (len > LEN_THRESH) ? LEN_THRESH : len;
const u64a lenScore =
(LEN_THRESH * LEN_THRESH * LEN_THRESH) / (elen * elen * elen);
return count * lenScore; // deemphasize count - possibly more than needed
// this might be overkill in the other direction
}
//#define DEBUG_ASSIGNMENT
void FDRCompiler::assignStringsToBuckets() {
typedef u64a SCORE; // 'Score' type
const SCORE MAX_SCORE = (SCORE)-1;
const u32 CHUNK_MAX = 512;
const u32 BUCKET_MAX = 16;
typedef pair<SCORE, u32> SCORE_INDEX_PAIR;
u32 ls = verify_u32(lits.size());
// make a vector that contains our literals as pointers or u32 LiteralIndex values
vector<LiteralIndex> vli;
vli.resize(ls);
map<u32, u32> lenCounts;
for (LiteralIndex l = 0; l < ls; l++) {
vli[l] = l;
lenCounts[lits[l].s.size()]++;
}
// sort vector by literal length + if tied on length, 'magic' criteria of some kind (tbd)
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
#ifdef DEBUG_ASSIGNMENT
for (map<u32, u32>::iterator i = lenCounts.begin(), e = lenCounts.end();
i != e; ++i) {
printf("l<%d>:%d ", i->first, i->second);
}
printf("\n");
#endif
// TODO: detailed early stage literal analysis for v. small cases (actually look at lits)
// yes - after we factor this out and merge in the Teddy style of building we can look
// at this, although the teddy merge modelling is quite different. It's still probably
// adaptable to some extent for this class of problem
u32 firstIds[CHUNK_MAX]; // how many are in this chunk (CHUNK_MAX - 1 contains 'last' bound)
u32 count[CHUNK_MAX]; // how many are in this chunk
u32 length[CHUNK_MAX]; // how long things in the chunk are
const u32 MAX_CONSIDERED_LENGTH = 16;
u32 currentChunk = 0;
u32 currentSize = 0;
u32 chunkStartID = 0;
u32 maxPerChunk = ls/(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1;
for (u32 i = 0; i < ls && currentChunk < CHUNK_MAX - 1; i++) {
LiteralIndex l = vli[i];
if ((currentSize < MAX_CONSIDERED_LENGTH && (lits[l].s.size() != currentSize)) ||
(currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
currentSize = lits[l].s.size();
if (currentChunk) {
count[currentChunk - 1 ] = i - chunkStartID;
}
chunkStartID = firstIds[currentChunk] = i;
length[currentChunk] = currentSize;
currentChunk++;
}
}
count[currentChunk - 1] = ls - chunkStartID;
// close off chunks with an empty row
firstIds[currentChunk] = ls;
length[currentChunk] = 0;
count[currentChunk] = 0;
u32 nChunks = currentChunk + 1;
#ifdef DEBUG_ASSIGNMENT
for (u32 j = 0; j < nChunks; j++) {
printf("%d %d %d %d\n", j, firstIds[j], count[j], length[j]);
}
#endif
SCORE_INDEX_PAIR t[CHUNK_MAX][BUCKET_MAX]; // pair of score, index
u32 nb = eng.getNumBuckets();
for (u32 j = 0; j < nChunks; j++) {
u32 cnt = 0;
for (u32 k = j; k < nChunks; ++k) {
cnt += count[k];
}
t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0);
}
for (u32 i = 1; i < nb; i++) {
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0);
u32 cnt = count[j];
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
SCORE score = getScoreUtil(length[j], cnt);
if (score > best.first) {
break; // if we're now worse locally than our best score, give up
}
score += t[k][i-1].first;
if (score < best.first) {
best = make_pair(score, k);
}
}
t[j][i] = best;
}
t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration
}
#ifdef DEBUG_ASSIGNMENT
for (u32 j = 0; j < nChunks; j++) {
for (u32 i = 0; i < nb; i++) {
SCORE_INDEX_PAIR v = t[j][i];
printf("<%7lld,%3d>", v.first, v.second);
}
printf("\n");
}
#endif
// our best score is in best[0][N_BUCKETS-1] and we can follow the links
// to find where our buckets should start and what goes into them
for (u32 i = 0, n = nb; n && (i != nChunks - 1); n--) {
u32 j = t[i][n - 1].second;
if (j == 0) {
j = nChunks - 1;
}
// put chunks between i - j into bucket (NBUCKETS-1) - n
#ifdef DEBUG_ASSIGNMENT
printf("placing from %d to %d in bucket %d\n", firstIds[i], firstIds[j],
nb - n);
#endif
for (u32 k = firstIds[i]; k < firstIds[j]; k++) {
assignStringToBucket((LiteralIndex)vli[k], nb - n);
}
i = j;
}
}
#ifdef DEBUG
void FDRCompiler::dumpMasks(const u8 *defaultMask) {
const size_t width = eng.getSchemeWidth();
printf("default mask: %s\n", dumpMask(defaultMask, width).c_str());
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
u8 *m = tabIndexToMask(i);
if (memcmp(m, defaultMask, width / 8)) {
printf("tab %04x: %s\n", i, dumpMask(m, width).c_str());
}
}
}
#endif
static
bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
const vector<LiteralIndex> &vl,
const vector<hwlmLiteral> &lits,
SuffixPositionInString pos,
std::map<u32, ue2::unordered_set<u32> > &m2) {
u32 distance = 0;
if (eng.bits <= 8) {
distance = 1;
} else if (eng.bits <= 16) {
distance = 2;
} else if (eng.bits <= 32) {
distance = 4;
}
for (vector<LiteralIndex>::const_iterator i = vl.begin(), e = vl.end();
i != e; ++i) {
if (e - i > 5) {
__builtin_prefetch(&lits[*(i + 5)]);
}
const hwlmLiteral &lit = lits[*i];
const size_t sz = lit.s.size();
u32 mask = 0;
u32 dontCares = 0;
for (u32 cnt = 0; cnt < distance; cnt++) {
int newPos = pos - cnt;
u8 dontCareByte = 0x0;
u8 maskByte = 0x0;
if (newPos < 0 || ((u32)newPos >= sz)) {
dontCareByte = 0xff;
} else {
u8 c = lit.s[sz - newPos - 1];
maskByte = c;
u32 remainder = eng.bits - cnt * 8;
assert(remainder != 0);
if (remainder < 8) {
u8 cmask = (1U << remainder) - 1;
maskByte &= cmask;
dontCareByte |= ~cmask;
}
if (lit.nocase && ourisalpha(c)) {
maskByte &= 0xdf;
dontCareByte |= 0x20;
}
}
u32 loc = cnt * 8;
mask |= maskByte << loc;
dontCares |= dontCareByte << loc;
}
// truncate m and dc down to nBits
mask &= (1U << eng.bits) - 1;
dontCares &= (1U << eng.bits) - 1;
if (dontCares == ((1U << eng.bits) - 1)) {
return true;
}
m2[dontCares].insert(mask);
}
return false;
}
void FDRCompiler::setupTab() {
const size_t mask_size = eng.getSchemeWidth() / 8;
assert(mask_size);
vector<u8> defaultMask(mask_size, 0xff);
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
}
typedef std::map<u32, ue2::unordered_set<u32> > M2SET;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
const vector<LiteralIndex> &vl = bucketToLits[b];
SuffixPositionInString pLimit = eng.getBucketWidth(b);
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
u32 bit = eng.getSchemeBit(b, pos);
M2SET m2;
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
if (done) {
clearbit(&defaultMask[0], bit);
continue;
}
for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e;
++i) {
u32 dc = i->first;
const ue2::unordered_set<u32> &mskSet = i->second;
u32 v = ~dc;
do {
u32 b2 = v & dc;
for (ue2::unordered_set<u32>::const_iterator
i2 = mskSet.begin(),
e2 = mskSet.end();
i2 != e2; ++i2) {
u32 val = (*i2 & ~dc) | b2;
clearbit(tabIndexToMask(val), bit);
}
v = (v + (dc & -dc)) | ~dc;
} while (v != ~dc);
}
}
}
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
u8 *m = tabIndexToMask(i);
andMask(m, m, &defaultMask[0], mask_size);
}
#ifdef DEBUG
dumpMasks(&defaultMask[0]);
#endif
}
aligned_unique_ptr<FDR> FDRCompiler::build(pair<u8 *, size_t> link) {
assignStringsToBuckets();
setupTab();
return setupFDR(link);
}
} // namespace
static
aligned_unique_ptr<FDR>
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey, u32 hint,
hwlmStreamingControl *stream_control) {
pair<u8 *, size_t> link(nullptr, 0);
if (stream_control) {
link = fdrBuildTableStreaming(lits, stream_control);
}
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
if (grey.fdrAllowTeddy) {
aligned_unique_ptr<FDR> fdr
= teddyBuildTableHinted(lits, make_small, hint, target, link);
if (fdr) {
DEBUG_PRINTF("build with teddy succeeded\n");
return fdr;
} else {
DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
}
}
const unique_ptr<FDREngineDescription> des =
(hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
: getFdrDescription(hint);
if (!des) {
return nullptr;
}
FDRCompiler fc(lits, *des, make_small);
return fc.build(link);
}
aligned_unique_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
bool make_small, const target_t &target,
const Grey &grey,
hwlmStreamingControl *stream_control) {
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID,
stream_control);
}
#if !defined(RELEASE_BUILD)
aligned_unique_ptr<FDR>
fdrBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small, u32 hint,
const target_t &target, const Grey &grey,
hwlmStreamingControl *stream_control) {
pair<u8 *, size_t> link(nullptr, 0);
return fdrBuildTableInternal(lits, make_small, target, grey, hint,
stream_control);
}
#endif
} // namespace ue2
// FIXME: should be compile-time only
size_t fdrSize(const FDR *fdr) {
assert(fdr);
return fdr->size;
}

66
src/fdr/fdr_compile.h Normal file
View File

@@ -0,0 +1,66 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: build API.
*/
#ifndef FDR_COMPILE_H
#define FDR_COMPILE_H
#include "ue2common.h"
#include "util/alloc.h"
#include <vector>
struct FDR;
namespace ue2 {
struct hwlmLiteral;
struct hwlmStreamingControl;
struct Grey;
struct target_t;
ue2::aligned_unique_ptr<FDR>
fdrBuildTable(const std::vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey,
hwlmStreamingControl *stream_control = nullptr);
#if !defined(RELEASE_BUILD)
ue2::aligned_unique_ptr<FDR>
fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
u32 hint, const target_t &target, const Grey &grey,
hwlmStreamingControl *stream_control = nullptr);
#endif
} // namespace ue2
#endif

View File

@@ -0,0 +1,88 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_COMPILE_INTERNAL_H
#define FDR_COMPILE_INTERNAL_H
#include "ue2common.h"
#include "hwlm/hwlm_literal.h"
#include <map>
#include <utility>
#include <vector>
struct FDRConfirm;
struct LitInfo;
namespace ue2 {
// a pile of decorative typedefs
// good for documentation purposes more than anything else
typedef u32 LiteralIndex;
typedef u32 ConfirmIndex;
typedef u32 SuffixPositionInString; // zero is last byte, counting back
// into the string
typedef u32 BucketIndex;
typedef u32 SchemeBitIndex;
typedef u32 PositionInBucket; // zero is 'we are matching right now!",
// counting towards future matches
class EngineDescription;
class FDREngineDescription;
struct hwlmStreamingControl;
size_t getFDRConfirm(const std::vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
bool make_small);
std::pair<u8 *, size_t> setupFullMultiConfs(
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
std::map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits,
bool make_small);
// all suffixes include an implicit max_bucket_width suffix to ensure that
// we always read a full-scale flood "behind" us in terms of what's in our
// state; if we don't have a flood that's long enough we won't be in the
// right state yet to allow blindly advancing
std::pair<u8 *, size_t>
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
const EngineDescription &eng);
std::pair<u8 *, size_t>
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control);
static constexpr u32 HINT_INVALID = 0xffffffff;
// fdr_compile_util.cpp utilities
size_t maxLen(const std::vector<hwlmLiteral> &lits);
size_t minLenCount(const std::vector<hwlmLiteral> &lits, size_t *count);
u32 absdiff(u32 i, u32 j);
} // namespace ue2
#endif

View File

@@ -0,0 +1,65 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr_compile_internal.h"
#include "hwlm/hwlm_literal.h"
#include <algorithm>
#include <vector>
using namespace std;
namespace ue2 {
size_t maxLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0;
for (const auto &lit : lits) {
rv = max(rv, lit.s.size());
}
return rv;
}
size_t minLenCount(const vector<hwlmLiteral> &lits, size_t *count) {
size_t rv = (size_t)-1;
*count = 0;
for (const auto &lit : lits) {
if (lit.s.size() < rv) {
rv = lit.s.size();
*count = 1;
} else if (lit.s.size() == rv) {
(*count)++;
}
}
return rv;
}
u32 absdiff(u32 i, u32 j) {
return (i > j) ? (i - j) : (j - i);
}
} // namespace ue2

100
src/fdr/fdr_confirm.h Normal file
View File

@@ -0,0 +1,100 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_CONFIRM_H
#define FDR_CONFIRM_H
#include "ue2common.h"
#include "hwlm/hwlm.h"
static really_inline
u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) {
return ((lv & andmsk) * mult) >> (sizeof(u64a)*8 - nBits);
}
// data structures
// TODO: fix this hard-coding
#define CONF_TYPE u64a
#define CONF_HASH_CALL mul_hash_64
typedef enum LitInfoFlags {
NoFlags = 0,
Caseless = 1,
NoRepeat = 2,
ComplexConfirm = 4
} LitInfoFlags;
/**
* \brief Structure describing a literal, linked to by FDRConfirm.
*
* This structure is followed in memory by a variable-sized string prefix at
* LitInfo::s, for strings that are longer than CONF_TYPE.
*/
struct LitInfo {
CONF_TYPE v;
CONF_TYPE msk;
hwlm_group_t groups;
u32 size;
u32 id; // literal ID as passed in
u8 flags; /* LitInfoFlags */
u8 next;
u8 extended_size;
u8 s[1]; // literal prefix, which continues "beyond" this struct.
};
#define FDRC_FLAG_NO_CONFIRM 1
/**
* \brief FDR confirm header.
*
* This structure is followed in memory by:
*
* -# lit index mapping (array of u32)
* -# list of LitInfo structures
*/
struct FDRConfirm {
CONF_TYPE andmsk;
CONF_TYPE mult;
u32 nBitsOrSoleID; // if flags is NO_CONFIRM then this is soleID
u32 flags; // sole meaning is 'non-zero means no-confirm' (that is all)
hwlm_group_t groups;
u32 soleLitSize;
u32 soleLitCmp;
u32 soleLitMsk;
};
static really_inline
const u32 *getConfirmLitIndex(const struct FDRConfirm *fdrc) {
const u8 *base = (const u8 *)fdrc;
const u32 *litIndex =
(const u32 *)(base + ROUNDUP_N(sizeof(*fdrc), alignof(u32)));
assert(ISALIGNED(litIndex));
return litIndex;
}
#endif // FDR_CONFIRM_H

View File

@@ -0,0 +1,479 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile_internal.h"
#include "fdr_confirm.h"
#include "engine_description.h"
#include "teddy_engine_description.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/compare.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cstring>
#include <set>
using namespace std;
namespace ue2 {
typedef u8 ConfSplitType;
typedef pair<BucketIndex, ConfSplitType> BucketSplitPair;
typedef map<BucketSplitPair, pair<FDRConfirm *, size_t> > BC2CONF;
// return the number of bytes beyond a length threshold in all strings in lits
static
size_t thresholdedSize(const vector<hwlmLiteral> &lits, size_t threshold) {
size_t tot = 0;
for (const auto &lit : lits) {
size_t sz = lit.s.size();
if (sz > threshold) {
tot += ROUNDUP_N(sz - threshold, 8);
}
}
return tot;
}
static
u64a make_u64a_mask(const vector<u8> &v) {
assert(v.size() <= sizeof(u64a));
if (v.size() > sizeof(u64a)) {
throw std::exception();
}
u64a mask = 0;
size_t vlen = v.size();
size_t len = std::min(vlen, sizeof(mask));
unsigned char *m = (unsigned char *)&mask;
memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
return mask;
}
/**
* Build a temporary vector of LitInfo structures (without the corresponding
* pointers to the actual strings; these cannot be laid out yet). These
* stay in 1:1 correspondence with the lits[] vector as that's the only
* place we have to obtain our full strings.
*/
static
void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
CONF_TYPE &andmsk) {
const CONF_TYPE all_ones = ~(u64a)0;
andmsk = all_ones; // fill in with 'and' of all literal masks
for (LiteralIndex i = 0; i < lits.size(); i++) {
const hwlmLiteral &lit = lits[i];
LitInfo &info = tmpLitInfo[i];
memset(&info, 0, sizeof(info));
info.id = lit.id;
u8 flags = NoFlags;
if (lit.nocase) {
flags |= Caseless;
}
if (lit.noruns) {
flags |= NoRepeat;
}
if (lit.msk.size() > lit.s.size()) {
flags |= ComplexConfirm;
info.extended_size = verify_u8(lit.msk.size());
}
info.flags = flags;
info.size = verify_u32(lit.s.size());
info.groups = lit.groups;
// these are built up assuming a LE machine
CONF_TYPE msk = all_ones;
CONF_TYPE val = 0;
for (u32 j = 0; j < sizeof(CONF_TYPE); j++) {
u32 shiftLoc = (sizeof(CONF_TYPE) - j - 1) * 8;
if (j >= lit.s.size()) {
msk &= ~((CONF_TYPE)0xff << shiftLoc);
} else {
u8 c = lit.s[lit.s.size() - j - 1];
if (lit.nocase && ourisalpha(c)) {
msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc);
val |= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc;
} else {
val |= (CONF_TYPE)c << shiftLoc;
}
}
}
info.v = val;
info.msk = msk;
if (!lit.msk.empty()) {
u64a l_msk = make_u64a_mask(lit.msk);
u64a l_cmp = make_u64a_mask(lit.cmp);
// test for consistency - if there's intersection, then v and msk
// values must line up
UNUSED u64a intersection = l_msk & info.msk;
assert((info.v & intersection) == (l_cmp & intersection));
// incorporate lit.msk, lit.cmp into v and msk
info.msk |= l_msk;
info.v |= l_cmp;
}
andmsk &= info.msk;
}
}
//#define FDR_CONFIRM_DUMP 1
static
size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
bool applyOneCharOpt, bool make_small, bool make_confirm) {
vector<LitInfo> tmpLitInfo(lits.size());
CONF_TYPE andmsk;
fillLitInfo(lits, tmpLitInfo, andmsk);
#ifdef FDR_CONFIRM_DUMP
printf("-------------------\n");
#endif
// just magic numbers and crude measures for now
u32 nBits;
if (make_small) {
nBits = min(10U, lg2(lits.size()) + 1);
} else {
nBits = min(13U, lg2(lits.size()) + 4);
}
CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
u32 flags = 0;
// we use next three variables for 'confirmless' case to speed-up
// confirmation process
u32 soleLitSize = 0;
u32 soleLitCmp = 0;
u32 soleLitMsk = 0;
if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 &&
lits[0].msk.empty()) || make_confirm == false) {
flags = FDRC_FLAG_NO_CONFIRM;
if (lits[0].noruns) {
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
}
mult = 0;
soleLitSize = lits[0].s.size() - 1;
// we can get to this point only in confirmless case;
// it means that we have only one literal per FDRConfirm (no packing),
// with no literal mask and size of literal is less or equal
// to the number of masks of Teddy engine;
// maximum number of masks for Teddy is 4, so the size of
// literal is definitely less or equal to size of u32
assert(lits[0].s.size() <= sizeof(u32));
for (u32 i = 0; i < lits[0].s.size(); i++) {
u32 shiftLoc = (sizeof(u32) - i - 1) * 8;
u8 c = lits[0].s[lits[0].s.size() - i - 1];
if (lits[0].nocase && ourisalpha(c)) {
soleLitCmp |= (u32)(c & CASE_CLEAR) << shiftLoc;
soleLitMsk |= (u32)CASE_CLEAR << shiftLoc;
}
else {
soleLitCmp |= (u32)c << shiftLoc;
soleLitMsk |= (u32)0xff << shiftLoc;
}
}
}
// we can walk the vector and assign elements from the vectors to a
// map by hash value
map<u32, vector<LiteralIndex> > res2lits;
hwlm_group_t gm = 0;
for (LiteralIndex i = 0; i < lits.size(); i++) {
LitInfo & li = tmpLitInfo[i];
u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
res2lits[hash].push_back(i);
gm |= li.groups;
}
#ifdef FDR_CONFIRM_DUMP
// print out the literals reversed - makes it easier to line up analyses
// that are end-offset based
for (map<u32, vector<LiteralIndex> >::iterator i = res2lits.begin(),
e = res2lits.end(); i != e; ++i) {
u32 hash = i->first;
vector<LiteralIndex> & vlidx = i->second;
if (vlidx.size() > 1) {
printf("%x -> %zu literals\n", hash, vlidx.size());
u32 min_len = lits[vlidx.front()].s.size();
vector<set<u8> > vsl; // contains the set of chars at each location
// reversed from the end
vsl.resize(1024);
u32 total_string_size = 0;
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
total_string_size += lits[litIdx].s.size();
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]);
}
min_len = MIN(min_len, lits[litIdx].s.size());
}
printf("common ");
for (u32 j = 0; j < min_len; j++) {
if (vsl[j].size() == 1) {
printf("%02x", (u32)*vsl[j].begin());
} else {
printf("__");
}
}
printf("\n");
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' ');
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
u32 dist_from_end = lits[litIdx].s.size() - j;
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
printf("__");
} else {
printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]);
}
}
printf("\n");
}
u32 total_compares = 0;
for (u32 j = 0; j < 1024; j++) { // naughty
total_compares += vsl[j].size();
}
printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size);
}
}
#endif
const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
const size_t totalLitSize = thresholdedSize(lits, sizeof(CONF_TYPE));
// this size can now be a worst-case as we can always be a bit smaller
size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
sizeof(LitInfo) * lits.size() + totalLitSize;
size = ROUNDUP_N(size, alignof(FDRConfirm));
FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size);
assert(fdrc); // otherwise would have thrown std::bad_alloc
fdrc->andmsk = andmsk;
fdrc->mult = mult;
fdrc->nBitsOrSoleID = (flags & FDRC_FLAG_NO_CONFIRM) ? lits[0].id : nBits;
fdrc->flags = flags;
fdrc->soleLitSize = soleLitSize;
fdrc->soleLitCmp = soleLitCmp;
fdrc->soleLitMsk = soleLitMsk;
fdrc->groups = gm;
// After the FDRConfirm, we have the lit index array.
u8 *fdrc_base = (u8 *)fdrc;
u8 *ptr = fdrc_base + sizeof(*fdrc);
ptr = ROUNDUP_PTR(ptr, alignof(u32));
u32 *bitsToLitIndex = (u32 *)ptr;
ptr += bitsToLitIndexSize;
// After the lit index array, we have the LitInfo structures themselves,
// which vary in size (as each may have a variable-length string after it).
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
// Walk the map by hash value assigning indexes and laying out the
// elements (and their associated string confirm material) in memory.
for (std::map<u32, vector<LiteralIndex> >::const_iterator
i = res2lits.begin(), e = res2lits.end(); i != e; ++i) {
const u32 hash = i->first;
const vector<LiteralIndex> &vlidx = i->second;
bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc);
for (vector<LiteralIndex>::const_iterator i2 = vlidx.begin(),
e2 = vlidx.end(); i2 != e2; ++i2) {
LiteralIndex litIdx = *i2;
// Write LitInfo header.
u8 *oldPtr = ptr;
LitInfo &finalLI = *(LitInfo *)ptr;
finalLI = tmpLitInfo[litIdx];
ptr += sizeof(LitInfo); // String starts directly after LitInfo.
// Write literal prefix (everything before the last N characters,
// as the last N are already confirmed).
const string &t = lits[litIdx].s;
if (t.size() > sizeof(CONF_TYPE)) {
size_t prefix_len = t.size() - sizeof(CONF_TYPE);
memcpy(&finalLI.s[0], t.c_str(), prefix_len);
ptr = &finalLI.s[0] + prefix_len;
}
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
if (i2 + 1 == e2) {
finalLI.next = 0x0;
} else {
// our next field represents an adjustment on top of
// current address + the actual size of the literal
// so we track any rounding up done for alignment and
// add this in - that way we don't have to use bigger
// than a u8 (for now)
assert((size_t)(ptr - oldPtr) > t.size());
finalLI.next = verify_u8(ptr - oldPtr - t.size());
}
}
assert((size_t)(ptr - fdrc_base) <= size);
}
*fdrc_p = fdrc;
// Return actual used size, not worst-case size. Must be rounded up to
// FDRConfirm alignment so that the caller can lay out a sequence of these.
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
alignof(FDRConfirm));
assert(actual_size <= size);
return actual_size;
}
static
u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
const EngineDescription &eng, BC2CONF &bc2Conf,
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
bool make_small) {
u32 pullBack = eng.getConfirmPullBackDistance();
u32 splitMask = eng.getConfirmTopLevelSplit() - 1;
bool splitHasCase = splitMask & 0x20;
bool makeConfirm = true;
unique_ptr<TeddyEngineDescription> teddyDescr =
getTeddyDescription(eng.getID());
if (teddyDescr) {
makeConfirm = teddyDescr->needConfirm(lits);
}
u32 totalConfirmSize = 0;
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
if (!bucketToLits[b].empty()) {
vector<vector<hwlmLiteral> > vl(eng.getConfirmTopLevelSplit());
for (vector<LiteralIndex>::const_iterator
i = bucketToLits[b].begin(),
e = bucketToLits[b].end();
i != e; ++i) {
hwlmLiteral lit = lits[*i]; // copy
// c is last char of this literal
u8 c = *(lit.s.rbegin());
bool suppressSplit = false;
if (pullBack) {
// make a shorter string to work over if we're pulling back
// getFDRConfirm doesn't know about that stuff
assert(lit.s.size() >= pullBack);
lit.s.resize(lit.s.size() - pullBack);
u8 c_sub, c_sub_msk;
if (lit.msk.empty()) {
c_sub = 0;
c_sub_msk = 0;
} else {
c_sub = *(lit.cmp.rbegin());
c_sub_msk = *(lit.msk.rbegin());
size_t len = lit.msk.size() -
min(lit.msk.size(), (size_t)pullBack);
lit.msk.resize(len);
lit.cmp.resize(len);
}
// if c_sub_msk is 0xff and lit.nocase
// resteer 'c' to an exact value and set suppressSplit
if ((c_sub_msk == 0xff) && (lit.nocase)) {
suppressSplit = true;
c = c_sub;
}
}
if (!suppressSplit && splitHasCase && lit.nocase &&
ourisalpha(c)) {
vl[(u8)(mytoupper(c) & splitMask)].push_back(lit);
vl[(u8)(mytolower(c) & splitMask)].push_back(lit);
} else {
vl[c & splitMask].push_back(lit);
}
}
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
if (!vl[c].empty()) {
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
FDRConfirm *fdrc;
size_t size = getFDRConfirm(vl[c], &fdrc,
eng.typicallyHoldsOneCharLits(),
make_small, makeConfirm);
BucketSplitPair p = make_pair(b, c);
bc2Conf[p] = make_pair(fdrc, size);
totalConfirmSize += size;
}
}
}
}
return totalConfirmSize;
}
pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
const EngineDescription &eng,
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
bool make_small) {
BC2CONF bc2Conf;
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
make_small);
u32 primarySwitch = eng.getConfirmTopLevelSplit();
u32 nBuckets = eng.getNumBuckets();
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
assert(buf); // otherwise would have thrown std::bad_alloc
u32 *confBase = (u32 *)buf;
u8 *ptr = buf + totalConfSwitchSize;
for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e;
++i) {
const pair<FDRConfirm *, size_t> &p = i->second;
// confirm offset is relative to the base of this structure, now
u32 confirm_offset = verify_u32(ptr - (u8 *)buf);
memcpy(ptr, p.first, p.second);
ptr += p.second;
aligned_free(p.first);
BucketIndex b = i->first.first;
u8 c = i->first.second;
u32 idx = c * nBuckets + b;
confBase[idx] = confirm_offset;
}
return make_pair(buf, totalSize);
}
} // namespace ue2

View File

@@ -0,0 +1,244 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_CONFIRM_RUNTIME_H
#define FDR_CONFIRM_RUNTIME_H
#include "fdr_internal.h"
#include "fdr_loadval.h"
#include "hwlm/hwlm.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/compare.h"
#define CONF_LOADVAL_CALL lv_u64a
#define CONF_LOADVAL_CALL_CAUTIOUS lv_u64a_ce
// this is ordinary confirmation function which runs through
// the whole confirmation procedure
static really_inline
void confWithBit(const struct FDRConfirm * fdrc,
const struct FDR_Runtime_Args * a,
size_t i,
CautionReason r,
u32 pullBackAmount,
hwlmcb_rv_t *control,
u32 * last_match) {
assert(i < a->len);
assert(ISALIGNED(fdrc));
const u8 * buf = a->buf;
const size_t len = a->len;
CONF_TYPE v;
const u8 * confirm_loc = buf + i - pullBackAmount - 7;
if (likely(r == NOT_CAUTIOUS || confirm_loc >= buf)) {
v = CONF_LOADVAL_CALL(confirm_loc, buf, buf + len);
} else { // r == VECTORING, confirm_loc < buf
u64a histBytes = a->histBytes;
v = CONF_LOADVAL_CALL_CAUTIOUS(confirm_loc, buf, buf + len);
// stitch together v (which doesn't move) and history (which does)
u32 overhang = buf - confirm_loc;
histBytes >>= 64 - (overhang * 8);
v |= histBytes;
}
u32 c = CONF_HASH_CALL(v, fdrc->andmsk, fdrc->mult, fdrc->nBitsOrSoleID);
u32 start = getConfirmLitIndex(fdrc)[c];
if (P0(start)) {
const struct LitInfo *l =
(const struct LitInfo *)((const u8 *)fdrc + start);
u8 oldNext; // initialized in loop
do {
assert(ISALIGNED(l));
if (P0( (v & l->msk) != l->v)) {
goto out;
}
if ((*last_match == l->id) && (l->flags & NoRepeat)) {
goto out;
}
const u8 * loc = buf + i - l->size + 1 - pullBackAmount;
u8 caseless = l->flags & Caseless;
if (loc < buf) {
u32 full_overhang = buf - loc;
const u8 * history = (caseless) ?
a->buf_history_nocase : a->buf_history;
size_t len_history = (caseless) ?
a->len_history_nocase : a->len_history;
// can't do a vectored confirm either if we don't have
// the bytes
if (full_overhang > len_history) {
goto out;
}
// as for the regular case, no need to do a full confirm if
// we're a short literal
if (unlikely(l->size > sizeof(CONF_TYPE))) {
const u8 * s1 = l->s;
const u8 * s2 = s1 + full_overhang;
const u8 * loc1 = history + len_history - full_overhang;
const u8 * loc2 = buf;
size_t size1 = MIN(full_overhang,
l->size - sizeof(CONF_TYPE));
size_t wind_size2_back = sizeof(CONF_TYPE) +
full_overhang;
size_t size2 = wind_size2_back > l->size ?
0 : l->size - wind_size2_back;
if (cmpForward(loc1, s1, size1, caseless)) {
goto out;
}
if (cmpForward(loc2, s2, size2, caseless)) {
goto out;
}
}
} else { // NON-VECTORING PATH
// if string < conf_type we don't need regular string cmp
if (unlikely(l->size > sizeof(CONF_TYPE))) {
if (cmpForward(loc, l->s, l->size - sizeof(CONF_TYPE), caseless)) {
goto out;
}
}
}
if (P0(!(l->groups & *control))) {
goto out;
}
if (unlikely(l->flags & ComplexConfirm)) {
const u8 * loc2 = buf + i - l->extended_size + 1 - pullBackAmount;
if (loc2 < buf) {
u32 full_overhang = buf - loc2;
size_t len_history = (caseless) ?
a->len_history_nocase : a->len_history;
if (full_overhang > len_history) {
goto out;
}
}
}
*last_match = l->id;
*control = a->cb(loc - buf, i, l->id, a->ctxt);
out:
oldNext = l->next; // oldNext is either 0 or an 'adjust' value
l = (const struct LitInfo*)((const u8 *)l + oldNext + l->size);
} while (oldNext);
}
}
// 'light-weight' confirmation function which is used by 1-mask Teddy;
// in the 'confirmless' case it simply calls callback function,
// otherwise it calls 'confWithBit' function for the full confirmation procedure
static really_inline
void confWithBit1(const struct FDRConfirm * fdrc,
const struct FDR_Runtime_Args * a,
size_t i,
CautionReason r,
hwlmcb_rv_t *control,
u32 * last_match) {
assert(i < a->len);
assert(ISALIGNED(fdrc));
if (unlikely(fdrc->mult)) {
confWithBit(fdrc, a, i, r, 0, control, last_match);
return;
} else {
u32 id = fdrc->nBitsOrSoleID;
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
return;
}
*last_match = id;
*control = a->cb(i, i, id, a->ctxt);
}
}
// This is 'light-weight' confirmation function which is used by 2-3-4-mask Teddy
// In the 'confirmless' case it makes fast 32-bit comparison,
// otherwise it calls 'confWithBit' function for the full confirmation procedure
static really_inline
void confWithBitMany(const struct FDRConfirm * fdrc,
const struct FDR_Runtime_Args * a,
size_t i,
CautionReason r,
hwlmcb_rv_t *control,
u32 * last_match) {
assert(i < a->len);
assert(ISALIGNED(fdrc));
if (i < a->start_offset) {
return;
}
if (unlikely(fdrc->mult)) {
confWithBit(fdrc, a, i, r, 0, control, last_match);
return;
} else {
const u32 id = fdrc->nBitsOrSoleID;
const u32 len = fdrc->soleLitSize;
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
return;
}
if (r == VECTORING && len > i - a->start_offset) {
if (len > (i + a->len_history)) {
return;
}
u32 cmp = (u32)a->buf[i] << 24;
if (len <= i) {
for (u32 j = 1; j <= len; j++) {
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
}
} else {
for (u32 j = 1; j <= i; j++) {
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
}
cmp |= (u32)(a->histBytes >> (40 + i * 8));
}
if ((fdrc->soleLitMsk & cmp) != fdrc->soleLitCmp) {
return;
}
}
*last_match = id;
*control = a->cb(i - len, i, id, a->ctxt);
}
}
#endif

98
src/fdr/fdr_dump.cpp Normal file
View File

@@ -0,0 +1,98 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile_internal.h"
#include "fdr_dump.h"
#include "fdr_engine_description.h"
#include "teddy_engine_description.h"
#include "ue2common.h"
#include <cstdio>
#include <memory>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using std::unique_ptr;
namespace ue2 {
static
bool fdrIsTeddy(const FDR *fdr) {
assert(fdr);
u32 engine = fdr->engineID;
/* teddys don't have an fdr engine description (which is why the dump code
* is so broken). */
return !getFdrDescription(engine);
}
void fdrPrintStats(const FDR *fdr, FILE *f) {
const bool isTeddy = fdrIsTeddy(fdr);
if (isTeddy) {
fprintf(f, "TEDDY: %u\n", fdr->engineID);
} else {
fprintf(f, "FDR: %u\n", fdr->engineID);
}
if (isTeddy) {
unique_ptr<TeddyEngineDescription> des =
getTeddyDescription(fdr->engineID);
if (des) {
fprintf(f, " masks %u\n", des->numMasks);
fprintf(f, " buckets %u\n", des->getNumBuckets());
fprintf(f, " packed %s\n", des->packed ? "true" : "false");
} else {
fprintf(f, " <unknown engine>\n");
}
} else {
unique_ptr<FDREngineDescription> des =
getFdrDescription(fdr->engineID);
if (des) {
fprintf(f, " stride %u\n", des->stride);
fprintf(f, " buckets %u\n", des->getNumBuckets());
fprintf(f, " width %u\n", des->schemeWidth);
} else {
fprintf(f, " <unknown engine>\n");
}
}
fprintf(f, " strings ???\n");
fprintf(f, " size %zu bytes\n", fdrSize(fdr));
fprintf(f, " max length %u\n", fdr->maxStringLen);
fprintf(f, " floodoff %u (%x)\n", fdr->floodOffset, fdr->floodOffset);
}
} // namespace ue2

49
src/fdr/fdr_dump.h Normal file
View File

@@ -0,0 +1,49 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: dump API.
*/
#ifndef FDR_DUMP_H
#define FDR_DUMP_H
#if defined(DUMP_SUPPORT)
#include <cstdio>
struct FDR;
namespace ue2 {
void fdrPrintStats(const struct FDR *fdr, FILE *f);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif // FDR_DUMP_H

View File

@@ -0,0 +1,216 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr_compile_internal.h"
#include "fdr_engine_description.h"
#include "hs_compile.h"
#include "util/target_info.h"
#include "util/compare.h" // for ourisalpha()
#include "util/make_unique.h"
#include <cassert>
#include <cstdlib>
#include <map>
#include <string>
using namespace std;
namespace ue2 {
#include "fdr_autogen_compiler.cpp"
FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
def.numBuckets, def.confirmPullBackDistance,
def.confirmTopLevelSplit),
schemeWidth(def.schemeWidth), stride(def.stride), bits(def.bits) {}
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
// rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
// the +1 avoids pain due to various reach choices
return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1;
}
static
u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
u32 desiredStride = 1; // always our safe fallback
if (min_len > 1) {
if (num_lits < 250) {
// small cases we just go for it
desiredStride = min_len;
} else if (num_lits < 800) {
// intermediate cases
desiredStride = min_len - 1;
} else if (num_lits < 5000) {
// for larger but not huge sizes, go to stride 2 only if we have at
// least minlen 3
desiredStride = MIN(min_len - 1, 2);
}
}
// patch if count is quite large - a ton of length 2 literals can
// break things
#ifdef TRY_THIS_LATER
if ((min_len == 2) && (desiredStride == 2) && (min_len_count > 20)) {
desiredStride = 1;
}
#endif
// patch stuff just for the stride 4 case; don't let min_len=4,
// desiredStride=4 through as even a few length 4 literals can break things
// (far more fragile)
if ((min_len == 4) && (desiredStride == 4) && (min_len_count > 2)) {
desiredStride = 2;
}
return desiredStride;
}
unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
const vector<hwlmLiteral> &vl,
bool make_small) {
vector<FDREngineDescription> allDescs;
getFdrDescriptions(&allDescs);
// find desired stride
size_t count;
size_t msl = minLenCount(vl, &count);
u32 desiredStride = findDesiredStride(vl.size(), msl, count);
DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
desiredStride);
const FDREngineDescription *best = nullptr;
u32 best_score = 0;
for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
const FDREngineDescription &eng = allDescs[engineID];
if (!eng.isValidOnTarget(target)) {
continue;
}
if (msl < eng.stride) {
continue;
}
u32 score = 100;
score -= absdiff(desiredStride, eng.stride);
if (eng.stride <= desiredStride) {
score += eng.stride;
}
u32 effLits = vl.size(); /* * desiredStride;*/
u32 ideal;
if (effLits < eng.getNumBuckets()) {
if (eng.stride == 1) {
ideal = 8;
} else {
ideal = 10;
}
} else if (effLits < 20) {
ideal = 10;
} else if (effLits < 100) {
ideal = 11;
} else if (effLits < 1000) {
ideal = 12;
} else if (effLits < 10000) {
ideal = 13;
} else {
ideal = 15;
}
if (ideal != 8 && eng.schemeWidth == 32) {
ideal += 1;
}
if (make_small) {
ideal -= 2;
}
if (eng.stride > 1) {
ideal++;
}
DEBUG_PRINTF("effLits %u\n", effLits);
if (target.is_atom_class() && !make_small && effLits < 4000) {
/* Unless it is a very heavy case, we want to build smaller tables
* on lightweight machines due to their small caches. */
ideal -= 2;
}
score -= absdiff(ideal, eng.bits);
DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
"-> score=%u\n",
eng.getID(), eng.schemeWidth, eng.bits,
eng.getNumBuckets(), eng.stride, score);
if (!best || score > best_score) {
best = &eng;
best_score = score;
}
}
if (!best) {
DEBUG_PRINTF("failed to find engine\n");
return nullptr;
}
DEBUG_PRINTF("using engine %u\n", best->getID());
return ue2::make_unique<FDREngineDescription>(*best);
}
SchemeBitIndex FDREngineDescription::getSchemeBit(BucketIndex b,
PositionInBucket p) const {
assert(p < getBucketWidth(b));
SchemeBitIndex sbi = p * getNumBuckets() + b;
assert(sbi < getSchemeWidth());
return sbi;
}
u32 FDREngineDescription::getBucketWidth(BucketIndex) const {
u32 sw = getSchemeWidth();
u32 nm = getNumBuckets();
assert(sw % nm == 0);
return sw/nm;
}
unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID) {
vector<FDREngineDescription> allDescs;
getFdrDescriptions(&allDescs);
if (engineID >= allDescs.size()) {
return nullptr;
}
return ue2::make_unique<FDREngineDescription>(allDescs[engineID]);
}
} // namespace ue2

View File

@@ -0,0 +1,80 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_ENGINE_DESCRIPTION_H
#define FDR_ENGINE_DESCRIPTION_H
#include "engine_description.h"
#include "util/ue2_containers.h"
#include <map>
#include <memory>
#include <vector>
namespace ue2 {
struct FDREngineDef {
u32 id;
u32 schemeWidth;
u32 numBuckets;
u32 stride;
u32 bits;
u64a cpu_features;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
};
class FDREngineDescription : public EngineDescription {
public:
u32 schemeWidth;
u32 stride;
u32 bits;
u32 getSchemeWidth() const { return schemeWidth; }
u32 getBucketWidth(BucketIndex b) const;
SchemeBitIndex getSchemeBit(BucketIndex b, PositionInBucket p) const;
u32 getNumTableEntries() const { return 1 << bits; }
u32 getTabSizeBytes() const {
return schemeWidth / 8 * getNumTableEntries();
}
explicit FDREngineDescription(const FDREngineDef &def);
u32 getDefaultFloodSuffixLength() const override;
bool typicallyHoldsOneCharLits() const override { return stride == 1; }
};
std::unique_ptr<FDREngineDescription>
chooseEngine(const target_t &target, const std::vector<hwlmLiteral> &vl,
bool make_small);
std::unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID);
void getFdrDescriptions(std::vector<FDREngineDescription> *out);
} // namespace ue2
#endif

111
src/fdr/fdr_internal.h Normal file
View File

@@ -0,0 +1,111 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: data structures.
*/
#ifndef FDR_INTERNAL_H
#define FDR_INTERNAL_H
#include "ue2common.h"
#include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback
typedef enum {
NOT_CAUTIOUS, //!< not near a boundary (quantify?)
VECTORING //!< potentially vectoring
} CautionReason;
/** \brief number of different ids that can be triggered by floods of any given
* character. */
#define FDR_FLOOD_MAX_IDS 16
struct FDRFlood {
hwlm_group_t allGroups; //!< all the groups or'd together
u32 suffix;
/** \brief 0 to FDR_FLOOD_MAX_IDS-1 ids that are generated once per char on
* a flood.
* If larger we won't handle this through the flood path at all. */
u16 idCount;
u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids
hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids
u32 len[FDR_FLOOD_MAX_IDS]; //!< lengths to go with the string ids
};
/** \brief FDR structure.
*
* 1. struct as-is
* 2. primary matching table
* 3. confirm stuff
*/
struct FDR {
u32 engineID;
u32 size;
u32 maxStringLen;
u32 floodOffset;
/** link is the relative offset of a secondary included FDR table for
* stream handling if we're a primary FDR table or the subsidiary tertiary
* structures (spillover strings and hash table) if we're a secondary
* structure. */
u32 link;
u32 pad1;
u32 pad2;
u32 pad3;
union {
u32 s_u32;
u64a s_u64a;
m128 s_m128;
} start;
};
/** \brief FDR runtime arguments.
*
* This structure handles read-only things that are passed extensively around
* the FDR run-time functions. They are set by the API, passed by value into
* the main function, then a pointer is passed around to all the various
* sub-functions (confirm & flood). */
struct FDR_Runtime_Args {
const u8 *buf;
size_t len;
const u8 *buf_history;
size_t len_history;
const u8 *buf_history_nocase;
size_t len_history_nocase;
size_t start_offset;
HWLMCallback cb;
void *ctxt;
hwlm_group_t *groups;
const u8 *firstFloodDetect;
const u64a histBytes;
};
#endif

216
src/fdr/fdr_loadval.h Normal file
View File

@@ -0,0 +1,216 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_LOADVAL_H
#define FDR_LOADVAL_H
#include "fdr_internal.h"
#include "ue2common.h"
#include "util/unaligned.h"
#include "util/simd_utils.h"
#define MAKE_LOADVAL(type, name) \
static really_inline type name (const u8 * ptr, UNUSED const u8 * lo, UNUSED const u8 * hi)
#define NORMAL_SAFE(type) assert(ptr >= lo && (ptr + sizeof(type) - 1) < hi)
#define ALIGNED_SAFE(type) NORMAL_SAFE(type); assert(((size_t)ptr % sizeof(type)) == 0);
// these ones need asserts to test the property that we're not handling dynamically
#define CAUTIOUS_FORWARD_SAFE(type) assert(ptr >= lo)
#define CAUTIOUS_BACKWARD_SAFE(type) assert((ptr + sizeof(type) - 1) < hi)
#define CF_INDEX_CHECK (ptr + i < hi)
#define CB_INDEX_CHECK (lo <= ptr + i)
#define CE_INDEX_CHECK (lo <= ptr + i) && (ptr + i < hi)
#define MAKE_LOOP(TYPE, COND, SHIFT_FIDDLE) \
TYPE v = 0; \
for (TYPE i = 0; i < sizeof(TYPE); i++) { \
if (COND) { \
v += (TYPE)ptr[i] << ((SHIFT_FIDDLE)*8); \
} \
} \
return v;
#define MAKE_LOOP_BE(TYPE, COND) \
MAKE_LOOP(TYPE, COND, sizeof(TYPE)-i-1)
#define MAKE_LOOP_LE(TYPE, COND) \
MAKE_LOOP(TYPE, COND, i)
#define MAKE_LOOP_BE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CF_INDEX_CHECK)
#define MAKE_LOOP_BE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CB_INDEX_CHECK)
#define MAKE_LOOP_BE_CE(TYPE) MAKE_LOOP_BE(TYPE, CE_INDEX_CHECK)
#define MAKE_LOOP_LE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CF_INDEX_CHECK)
#define MAKE_LOOP_LE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CB_INDEX_CHECK)
#define MAKE_LOOP_LE_CE(TYPE) MAKE_LOOP_LE(TYPE, CE_INDEX_CHECK)
// no suffix = normal (unaligned)
// _a = aligned
// _cf = cautious forwards, base is always in bounds, but may read over the end of the buffer (test against hi)
// _cb = cautious backwards, final byte is always in bounds, but may read over the start of the buffer (test against lo)
// _ce = cautious everywhere (in both directions); test against hi and lo
// u8 loadvals
MAKE_LOADVAL(u8, lv_u8) {
NORMAL_SAFE(u8);
return *ptr;
}
MAKE_LOADVAL(u8, lv_u8_cf) {
CAUTIOUS_FORWARD_SAFE(u8);
if (ptr < hi) {
return *ptr;
} else {
return 0;
}
}
MAKE_LOADVAL(u8, lv_u8_cb) {
CAUTIOUS_BACKWARD_SAFE(u8);
if (lo <= ptr) {
return *ptr;
} else {
return 0;
}
}
MAKE_LOADVAL(u8, lv_u8_ce) {
if ((lo <= ptr) && (ptr < hi)) {
return *ptr;
} else {
return 0;
}
}
MAKE_LOADVAL(u16, lv_u16) {
NORMAL_SAFE(u16);
return unaligned_load_u16(ptr);
}
MAKE_LOADVAL(u16, lv_u16_a) {
ALIGNED_SAFE(u16);
return *(const u16 *)ptr;
}
MAKE_LOADVAL(u32, lv_u32) {
NORMAL_SAFE(u32);
return unaligned_load_u32(ptr);
}
MAKE_LOADVAL(u32, lv_u32_a) {
ALIGNED_SAFE(u32);
return *(const u32 *)ptr;
}
MAKE_LOADVAL(u64a, lv_u64a) {
NORMAL_SAFE(u32);
return unaligned_load_u64a(ptr);
}
MAKE_LOADVAL(u64a, lv_u64a_a) {
ALIGNED_SAFE(u64a);
return *(const u64a *)ptr;
}
MAKE_LOADVAL(u16, lv_u16_cf) { MAKE_LOOP_LE_CF(u16); }
MAKE_LOADVAL(u16, lv_u16_cb) { MAKE_LOOP_LE_CB(u16); }
MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_LE_CE(u16); }
MAKE_LOADVAL(u32, lv_u32_cf) { MAKE_LOOP_LE_CF(u32); }
MAKE_LOADVAL(u32, lv_u32_cb) { MAKE_LOOP_LE_CB(u32); }
MAKE_LOADVAL(u32, lv_u32_ce) { MAKE_LOOP_LE_CE(u32); }
MAKE_LOADVAL(u64a, lv_u64a_cf) { MAKE_LOOP_LE_CF(u64a); }
MAKE_LOADVAL(u64a, lv_u64a_cb) { MAKE_LOOP_LE_CB(u64a); }
MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_LE_CE(u64a); }
MAKE_LOADVAL(m128, lv_m128) {
NORMAL_SAFE(m128);
return loadu128(ptr);
}
MAKE_LOADVAL(m128, lv_m128_a) {
ALIGNED_SAFE(m128);
assert((size_t)ptr % sizeof(m128) == 0);
return *(const m128 *)ptr;
}
// m128 cases need to be manually created
MAKE_LOADVAL(m128, lv_m128_cf) {
CAUTIOUS_FORWARD_SAFE(m128);
union {
u8 val8[16];
m128 val128;
} u;
for (u32 i = 0; i < 16; i++) {
if (ptr + i < hi) {
u.val8[i] = ptr[i];
} else {
u.val8[i] = 0;
}
}
return u.val128;
}
MAKE_LOADVAL(m128, lv_m128_cb) {
CAUTIOUS_BACKWARD_SAFE(m128);
union {
u8 val8[16];
m128 val128;
} u;
for (u32 i = 0; i < 16; i++) {
if (lo <= ptr + i) {
u.val8[i] = ptr[i];
} else {
u.val8[i] = 0;
}
}
return u.val128;
}
MAKE_LOADVAL(m128, lv_m128_ce) {
union {
u8 val8[16];
m128 val128;
} u;
for (u32 i = 0; i < 16; i++) {
if ((lo <= ptr + i) && (ptr + i < hi)) {
u.val8[i] = ptr[i];
} else {
u.val8[i] = 0;
}
}
return u.val128;
}
#endif

View File

@@ -0,0 +1,445 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_streaming_internal.h"
#include "fdr_compile_internal.h"
#include "hwlm/hwlm_build.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstring>
#include <deque>
#include <set>
#include <boost/dynamic_bitset.hpp>
using namespace std;
using boost::dynamic_bitset;
namespace ue2 {
namespace {
struct LongLitOrder {
bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const {
if (i1.nocase != i2.nocase) {
return i1.nocase < i2.nocase;
} else {
return i1.s < i2.s;
}
}
};
}
static
bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) {
return l1.s == l2.s && l1.nocase == l2.nocase;
}
static
u32 roundUpToPowerOfTwo(u32 x) {
x -= 1;
x |= (x >> 1);
x |= (x >> 2);
x |= (x >> 4);
x |= (x >> 8);
x |= (x >> 16);
return x + 1;
}
/**
* \brief Creates a long literals vector containing all literals of length > max_len.
*
* The last char of each literal is trimmed as we're not interested in full
* matches, only partial matches.
*
* Literals are sorted (by caseful/caseless, then lexicographical order) and
* made unique.
*
* The ID of each literal is set to its position in the vector.
*
* \return False if there aren't any long literals.
*/
static
bool setupLongLits(const vector<hwlmLiteral> &lits,
vector<hwlmLiteral> &long_lits, size_t max_len) {
long_lits.reserve(lits.size());
for (vector<hwlmLiteral>::const_iterator it = lits.begin();
it != lits.end(); ++it) {
if (it->s.length() > max_len) {
hwlmLiteral tmp = *it; // copy
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
tmp.id = 0; // recalc later
tmp.groups = 0; // filled in later by hash bucket(s)
long_lits.push_back(tmp);
}
}
if (long_lits.empty()) {
return false;
}
// sort long_literals by caseful/caseless and in lexicographical order,
// remove duplicates
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
vector<hwlmLiteral>::iterator new_end =
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
long_lits.erase(new_end, long_lits.end());
// fill in ids; not currently used
for (vector<hwlmLiteral>::iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
i->id = i - long_lits.begin();
}
return true;
}
// boundaries are the 'start' boundaries for each 'mode'
// so boundary[CASEFUL] is the index one above the largest caseful index
// positions[CASEFUL] is the # of positions in caseful strings (stream)
// hashedPositions[CASEFUL] is the # of positions in caseful strings
// (not returned - a temporary)
// hashEntries[CASEFUL] is the # of positions hashed for caseful strings
// (rounded up to the nearest power of two)
static
void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
u32 *boundaries, u32 *positions, u32 *hashEntries) {
u32 hashedPositions[MAX_MODES];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
boundaries[m] = verify_u32(long_lits.size());
positions[m] = 0;
hashedPositions[m] = 0;
}
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
if (i->nocase) {
boundaries[CASEFUL] = verify_u32(i - long_lits.begin());
break;
}
}
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
MODES m = i->nocase ? CASELESS : CASEFUL;
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
hashedPositions[m]++;
}
positions[m] += i->s.size();
}
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
hashEntries[m] = hashedPositions[m]
? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m]))
: 0;
}
#ifdef DEBUG_COMPILE
printf("analyzeLits:\n");
for (MODES m = CASEFUL; m < MAX_MODES; m++) {
printf("mode %s boundary %d positions %d hashedPositions %d "
"hashEntries %d\n",
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
positions[m], hashedPositions[m], hashEntries[m]);
}
printf("\n");
#endif
}
static
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) {
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
}
// sort by 'distance from start'
namespace {
struct OffsetIDFromEndOrder {
const vector<hwlmLiteral> &lits; // not currently used
explicit OffsetIDFromEndOrder(const vector<hwlmLiteral> &lits_in)
: lits(lits_in) {}
bool operator()(const pair<u32, u32> &i1, const pair<u32, u32> &i2) const {
if (i1.second != i2.second) {
// longest is 'first', so > not <
return i1.second > i2.second;
}
return i1.first < i2.first;
}
};
}
static
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
FDRSHashEntry *tab, size_t numEntries, MODES m,
map<u32, u32> &litToOffsetVal) {
const u32 nbits = lg2(numEntries);
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
map<u32, u64a> bucketToBitfield;
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
const hwlmLiteral &l = *i;
if ((m == CASELESS) != i->nocase) {
continue;
}
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
u32 h = hashLit(l, j, max_len, m);
u32 h_ent = h & ((1U << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j));
bucketToBitfield[h_ent] |= (1ULL << h_low);
}
}
// this used to be a set<u32>, but a bitset is much much faster given that
// we're using it only for membership testing.
dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default.
// sweep out bitfield entries and save the results swapped accordingly
// also, anything with bitfield entries is put in filledBuckets
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(),
e = bucketToBitfield.end();
i != e; ++i) {
u32 bucket = i->first;
u64a contents = i->second;
tab[bucket].bitfield = contents;
filledBuckets.set(bucket);
}
// store out all our chains based on free values in our hash table.
// find nearest free locations that are empty (there will always be more
// entries than strings, at present)
for (map<u32, deque<pair<u32, u32> > >::iterator
i = bucketToLitOffPairs.begin(),
e = bucketToLitOffPairs.end();
i != e; ++i) {
u32 bucket = i->first;
deque<pair<u32, u32> > &d = i->second;
// sort d by distance of the residual string (len minus our depth into
// the string). We need to put the 'furthest back' string first...
stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits));
while (1) {
// first time through is always at bucket, then we fill in links
filledBuckets.set(bucket);
FDRSHashEntry *ent = &tab[bucket];
u32 lit_id = d.front().first;
u32 offset = d.front().second;
ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len);
ent->link = (u32)LINK_INVALID;
d.pop_front();
if (d.empty()) {
break;
}
// now, if there is another value
// find a bucket for it and put in 'bucket' and repeat
// all we really need to do is find something not in filledBuckets,
// ideally something close to bucket
// we search backward and forward from bucket, trying to stay as
// close as possible.
UNUSED bool found = false;
int bucket_candidate = 0;
for (u32 k = 1; k < numEntries * 2; k++) {
bucket_candidate = bucket + (((k & 1) == 0)
? (-(int)k / 2) : (k / 2));
if (bucket_candidate < 0 ||
(size_t)bucket_candidate >= numEntries) {
continue;
}
if (!filledBuckets.test(bucket_candidate)) {
found = true;
break;
}
}
assert(found);
bucket = bucket_candidate;
ent->link = bucket;
}
}
}
static
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
size_t rv = 0;
vector<hwlmLiteral>::const_iterator it, ite;
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) {
rv = max(rv, it->msk.size());
}
return rv;
}
pair<u8 *, size_t>
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control) {
// refuse to compile if we are forced to have smaller than minimum
// history required for long-literal support, full stop
// otherwise, choose the maximum of the preferred history quantity
// (currently a fairly extravagant 32) or the already used history
// quantity - subject to the limitation of stream_control->history_max
const size_t MIN_HISTORY_REQUIRED = 32;
if (MIN_HISTORY_REQUIRED > stream_control->history_max) {
throw std::logic_error("Cannot set history to minimum history required");
}
size_t max_len =
MIN(stream_control->history_max,
MAX(MIN_HISTORY_REQUIRED, stream_control->history_min));
assert(max_len >= MIN_HISTORY_REQUIRED);
size_t max_mask_len = maxMaskLen(lits);
vector<hwlmLiteral> long_lits;
if (!setupLongLits(lits, long_lits, max_len) || false) {
// "Don't need to do anything" path, not really a fail
DEBUG_PRINTF("Streaming literal path produces no table\n");
// we want enough history to manage the longest literal and the longest
// mask.
stream_control->literal_history_required =
max(maxLen(lits), max_mask_len) - 1;
stream_control->literal_stream_state_required = 0;
return make_pair(nullptr, size_t{0});
}
// Ensure that we have enough room for the longest mask.
if (max_mask_len) {
max_len = max(max_len, max_mask_len - 1);
}
u32 boundary[MAX_MODES];
u32 positions[MAX_MODES];
u32 hashEntries[MAX_MODES];
analyzeLits(long_lits, max_len, boundary, positions, hashEntries);
// first assess the size and find our caseless threshold
size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader));
size_t litTabOffset = headerSize;
size_t litTabNumEntries = long_lits.size() + 1;
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral));
size_t wholeLitTabOffset = litTabOffset + litTabSize;
size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] +
positions[CASELESS]);
size_t htOffset[MAX_MODES];
size_t htSize[MAX_MODES];
htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize;
htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry);
htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL];
htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry);
size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]);
// need to add +2 to both of these to allow space for the actual largest
// value as well as handling the fact that we add one to the space when
// storing out a position to allow zero to mean "no stream state value"
u8 streamBits[MAX_MODES];
streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2));
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize);
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
// then fill it in
u8 * ptr = secondaryTable;
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
// fill in header
header->pseudoEngineID = (u32)0xffffffff;
header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
header->boundary[m] = boundary[m];
header->hashOffset[m] = verify_u32(htOffset[m]);
header->hashNBits[m] = lg2(hashEntries[m]);
header->streamStateBits[m] = streamBits[m];
}
assert(tot_state_bytes < sizeof(u64a));
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
ptr += headerSize;
// now fill in the rest
FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr;
ptr += litTabSize;
map<u32, u32> litToOffsetVal;
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
e = long_lits.end();
i != e; ++i) {
u32 entry = verify_u32(i - long_lits.begin());
u32 offset = verify_u32(ptr - secondaryTable);
// point the table entry to the string location
litTabPtr[entry].offset = offset;
litToOffsetVal[entry] = offset;
// copy the string into the string location
memcpy(ptr, i->s.c_str(), i->s.size());
ptr += i->s.size(); // and the string location
}
// fill in final lit table entry with current ptr (serves as end value)
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable);
// fill hash tables
ptr = secondaryTable + htOffset[CASEFUL];
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
(MODES)m, litToOffsetVal);
ptr += htSize[m];
}
// tell the world what we did
stream_control->literal_history_required = max_len;
stream_control->literal_stream_state_required = tot_state_bytes;
return make_pair(secondaryTable, tabSize);
}
} // namespace ue2

View File

@@ -0,0 +1,152 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_STREAMING_INTERNAL_H
#define FDR_STREAMING_INTERNAL_H
#include "ue2common.h"
#include "fdr_internal.h"
#include "util/unaligned.h"
// tertiary table:
// a header (FDRSTableHeader)
// long_lits.size()+1 entries holding an offset to the string in the
// 'whole literal table' (FDRSLiteral structure)
// the whole literal table - every string packed in (freeform)
// hash table (caseful) (FDRSHashEntry)
// hash table (caseless) (FDRSHashEntry)
typedef enum {
CASEFUL = 0,
CASELESS = 1,
MAX_MODES = 2
} MODES;
// We have one of these structures hanging off the 'link' of our secondary
// FDR table that handles streaming strings
struct FDRSTableHeader {
u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR
// string id one beyond the maximum entry for this type of literal
// boundary[CASEFUL] is the end of the caseful literals
// boundary[CASELESS] is the end of the caseless literals and one beyond
// the largest literal id (the size of the littab)
u32 boundary[MAX_MODES];
// offsets are 0 if no such table exists
// offset from the base of the tertiary structure to the hash table
u32 hashOffset[MAX_MODES];
u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table
u8 streamStateBits[MAX_MODES];
u8 streamStateBytes; // total size of packed stream state in bytes
u8 N; // prefix lengths
u16 pad;
};
// One of these structures per literal entry in our secondary FDR table.
struct FDRSLiteral {
u32 offset;
// potentially - another u32 to point to the 'next lesser included literal'
// which would be a literal that overlaps this one in such a way that a
// failure to match _this_ literal can leave us in a state that we might
// still match that literal. Offset information might also be called for,
// in which case we might be wanting to use a FDRSLiteralOffset
};
typedef u32 FDRSLiteralOffset;
#define LINK_INVALID 0xffffffff
// One of these structures per hash table entry in our secondary FDR table
struct FDRSHashEntry {
u64a bitfield;
FDRSLiteralOffset state;
u32 link;
};
static really_inline
u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) {
return m == CASEFUL ? 0 : h->boundary[m-1];
}
static really_inline
u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) {
return h->boundary[m];
}
static really_inline
const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
return (const struct FDRSLiteral *) (((const u8 *)h) +
ROUNDUP_16(sizeof(struct FDRSTableHeader)));
}
static really_inline
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) {
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
}
static really_inline
u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
return v - getBaseOffsetOfLits(h, m) + 1;
}
static really_inline
u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
return v + getBaseOffsetOfLits(h, m) - 1;
}
static really_inline
u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
return (ent->bitfield >> bit) & 0x1;
}
static really_inline
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) {
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
assert(len >= 32);
u64a v1 = unaligned_load_u64a(ptr);
u64a v2 = unaligned_load_u64a(ptr + 8);
u64a v3 = unaligned_load_u64a(ptr + 16);
if (mode == CASELESS) {
v1 &= CASEMASK;
v2 &= CASEMASK;
v3 &= CASEMASK;
}
v1 *= MULTIPLIER;
v2 *= (MULTIPLIER*MULTIPLIER);
v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER);
v1 >>= 32;
v2 >>= 32;
v3 >>= 32;
return v1 ^ v2 ^ v3;
}
#endif

View File

@@ -0,0 +1,365 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FDR_STREAMING_RUNTIME_H
#define FDR_STREAMING_RUNTIME_H
#include "fdr_streaming_internal.h"
#include "util/partial_store.h"
static really_inline
const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) {
const u8 * linkPtr = ((const u8 *)fdr) + fdr->link;
// test if it's not really a engineID, but a 'pseudo engine id'
assert(*(const u32 *)linkPtr == 0xffffffff);
assert(linkPtr);
return (const struct FDRSTableHeader *)linkPtr;
}
// Reads from stream state and unpacks values into stream state table.
static really_inline
void getStreamStates(const struct FDRSTableHeader * streamingTable,
const u8 * stream_state, u32 * table) {
assert(streamingTable);
assert(stream_state);
assert(table);
u8 ss_bytes = streamingTable->streamStateBytes;
u8 ssb = streamingTable->streamStateBits[CASEFUL];
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 ssb_mask = (1U << ssb) - 1;
u32 streamVal = partial_load_u32(stream_state, ss_bytes);
table[CASEFUL] = (u32)(streamVal & ssb_mask);
table[CASELESS] = (u32)(streamVal >> ssb);
return;
}
#endif
u64a ssb_mask = (1ULL << ssb) - 1;
u64a streamVal = partial_load_u64a(stream_state, ss_bytes);
table[CASEFUL] = (u32)(streamVal & ssb_mask);
table[CASELESS] = (u32)(streamVal >> (u64a)ssb);
}
#ifndef NDEBUG
// Defensive checking (used in assert) that these table values don't overflow
// outside the range available.
static really_inline UNUSED
u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) {
u32 ssb_mask = (1ULL << (ssb)) - 1;
if (table[CASEFUL] & ~ssb_mask) {
return 1;
}
u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
if (table[CASELESS] & ~ssb_nc_mask) {
return 1;
}
return 0;
}
#endif
// Reads from stream state table and packs values into stream state.
static really_inline
void setStreamStates(const struct FDRSTableHeader * streamingTable,
u8 * stream_state, u32 * table) {
assert(streamingTable);
assert(stream_state);
assert(table);
u8 ss_bytes = streamingTable->streamStateBytes;
u8 ssb = streamingTable->streamStateBits[CASEFUL];
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
assert(!streamingTableOverflow(table, ssb, ssb_nc));
#if defined(ARCH_32_BIT)
// On 32-bit hosts, we may be able to avoid having to do any u64a
// manipulation at all.
if (ss_bytes <= 4) {
u32 stagingStreamState = table[CASEFUL];
stagingStreamState |= (table[CASELESS] << ssb);
partial_store_u32(stream_state, stagingStreamState, ss_bytes);
return;
}
#endif
u64a stagingStreamState = (u64a)table[CASEFUL];
stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb);
partial_store_u64a(stream_state, stagingStreamState, ss_bytes);
}
u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
if (!stream_state) {
return 0;
}
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
u8 ss_bytes = streamingTable->streamStateBytes;
// We just care if there are any bits set, and the test below is faster
// than a partial_load_u64a (especially on 32-bit hosts).
for (u32 i = 0; i < ss_bytes; i++) {
if (*stream_state) {
return 1;
}
++stream_state;
}
return 0;
}
// binary search for the literal index that contains the current state
static really_inline
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
u32 stateValue, MODES m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 lo = get_start_lit_idx(streamingTable, m);
u32 hi = get_end_lit_idx(streamingTable, m);
// Now move stateValue back by one so that we're looking for the
// litTab entry that includes it the string, not the one 'one past' it
stateValue -= 1;
assert(lo != hi);
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
// binary search to find the entry e such that:
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
while (lo + 1 < hi) {
u32 mid = (lo + hi) / 2;
if (litTab[mid].offset <= stateValue) {
lo = mid;
} else { //(litTab[mid].offset > stateValue) {
hi = mid;
}
}
assert(litTab[lo].offset <= stateValue);
assert(litTab[hi].offset > stateValue);
return lo;
}
static really_inline
void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSLiteral * litTab,
const u32 *state_table,
const MODES m) {
if (!state_table[m]) {
return;
}
u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]);
u32 idx = findLitTabEntry(streamingTable, stateValue, m);
size_t found_offset = litTab[idx].offset;
const u8 * found_buf = found_offset + (const u8 *)streamingTable;
size_t found_sz = stateValue - found_offset;
if (m == CASEFUL) {
a->buf_history = found_buf;
a->len_history = found_sz;
} else {
a->buf_history_nocase = found_buf;
a->len_history_nocase = found_sz;
}
}
static really_inline
void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
const u8 * stream_state) {
// nothing to do if there's no stream state for the case
if (!stream_state) {
return;
}
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 state_table[MAX_MODES];
getStreamStates(streamingTable, stream_state, state_table);
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL);
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS);
}
static really_inline
u32 do_single_confirm(const struct FDRSTableHeader * streamingTable,
const struct FDR_Runtime_Args * a, u32 hashState, MODES m) {
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
u32 idx = findLitTabEntry(streamingTable, hashState, m);
size_t found_offset = litTab[idx].offset;
const u8 * s1 = found_offset + (const u8 *)streamingTable;
assert(hashState > found_offset);
size_t l1 = hashState - found_offset;
const u8 * buf = a->buf;
size_t len = a->len;
const char nocase = m != CASEFUL;
if (l1 > len) {
const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history;
size_t hist_len = nocase ? a->len_history_nocase : a->len_history;
if (l1 > len+hist_len) {
return 0; // Break out - not enough total history
}
size_t overhang = l1 - len;
assert(overhang <= hist_len);
if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) {
return 0;
}
s1 += overhang;
l1 -= overhang;
}
// if we got here, we don't need history or we compared ok out of history
assert(l1 <= len);
if (cmpForward(buf + len - l1, s1, l1, nocase)) {
return 0;
}
return hashState; // our new state
}
static really_inline
void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
u8 hash_len, u32 *hashes) {
u8 tempbuf[128];
const u8 *base;
if (hash_len > a->len) {
assert(hash_len <= 128);
size_t overhang = hash_len - a->len;
assert(overhang <= a->len_history);
memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang);
memcpy(tempbuf + overhang, a->buf, a->len);
base = tempbuf;
} else {
assert(hash_len <= a->len);
base = a->buf + a->len - hash_len;
}
if (streamingTable->hashNBits[CASEFUL]) {
hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL);
}
if (streamingTable->hashNBits[CASELESS]) {
hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS);
}
}
static really_inline
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
u32 h, const MODES m) {
u32 nbits = streamingTable->hashNBits[m];
if (!nbits) {
return NULL;
}
u32 h_ent = h & ((1 << nbits) - 1);
u32 h_low = (h >> nbits) & 63;
const struct FDRSHashEntry *tab =
(const struct FDRSHashEntry *)((const u8 *)streamingTable
+ streamingTable->hashOffset[m]);
const struct FDRSHashEntry *ent = tab + h_ent;
if (!has_bit(ent, h_low)) {
return NULL;
}
return ent;
}
static really_inline
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
const struct FDRSTableHeader *streamingTable,
const struct FDRSHashEntry *ent, const MODES m) {
assert(ent);
assert(streamingTable->hashNBits[m]);
const struct FDRSHashEntry *tab =
(const struct FDRSHashEntry *)((const u8 *)streamingTable
+ streamingTable->hashOffset[m]);
while (1) {
u32 tmp = 0;
if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) {
state_table[m] = packStateVal(streamingTable, m, tmp);
break;
}
if (ent->link == LINK_INVALID) {
break;
}
ent = tab + ent->link;
}
}
static really_inline
void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a,
u8 *stream_state) {
// nothing to do if there's no stream state for the case
if (!stream_state) {
return;
}
// get pointers to the streamer FDR and the tertiary structure
const struct FDRSTableHeader *streamingTable = getSHDR(fdr);
assert(streamingTable->N);
u32 state_table[MAX_MODES] = {0, 0};
// if we don't have enough history, we don't need to do anything
if (streamingTable->N <= a->len + a->len_history) {
u32 hashes[MAX_MODES] = {0, 0};
fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes);
const struct FDRSHashEntry *ent_ful = getEnt(streamingTable,
hashes[CASEFUL], CASEFUL);
const struct FDRSHashEntry *ent_less = getEnt(streamingTable,
hashes[CASELESS], CASELESS);
if (ent_ful) {
fdrPackStateMode(state_table, a, streamingTable, ent_ful,
CASEFUL);
}
if (ent_less) {
fdrPackStateMode(state_table, a, streamingTable, ent_less,
CASELESS);
}
}
setStreamStates(streamingTable, stream_state, state_table);
}
#endif

222
src/fdr/flood_compile.cpp Normal file
View File

@@ -0,0 +1,222 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_confirm.h"
#include "fdr_compile_internal.h"
#include "fdr_engine_description.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/charreach.h"
#include "util/compare.h"
#include "util/ue2string.h"
#include "util/verify_types.h"
#include <cstring>
#include <map>
#include <memory>
#include <string>
#include <vector>
using namespace std;
namespace ue2 {
namespace {
struct FloodComparator {
bool operator()(const FDRFlood &f1, const FDRFlood &f2) const {
return std::memcmp(&f1, &f2, sizeof(f1)) < 0;
}
};
}
static
bool isDifferent(u8 oldC, u8 c, bool caseless) {
if (caseless) {
return mytolower(oldC) != mytolower(c);
} else {
return oldC != c;
}
}
static
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
FDRFlood &fl = tmpFlood[c];
fl.suffix = MAX(fl.suffix, suffix + 1);
DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix);
}
static
void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
u32 suffix) {
FDRFlood &fl = tmpFlood[c];
fl.suffix = MAX(fl.suffix, suffix + 1);
if (fl.idCount < FDR_FLOOD_MAX_IDS) {
fl.ids[fl.idCount] = lit.id;
fl.allGroups |= lit.groups;
fl.groups[fl.idCount] = lit.groups;
fl.len[fl.idCount] = suffix;
// when idCount gets to max_ids this flood no longer happens
// only incremented one more time to avoid arithmetic overflow
DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n",
c, fl.suffix, fl.idCount, suffix);
fl.idCount++;
}
}
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
const EngineDescription &eng) {
vector<FDRFlood> tmpFlood(N_CHARS);
u32 default_suffix = eng.getDefaultFloodSuffixLength();
// zero everything to avoid spurious distinctions in the compares
memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood));
for (u32 c = 0; c < N_CHARS; c++) {
tmpFlood[c].suffix = default_suffix;
}
for (const auto &lit : lits) {
DEBUG_PRINTF("lit: '%s'%s\n", escapeString(lit.s).c_str(),
lit.nocase ? " (nocase)" : "");
u32 litSize = verify_u32(lit.s.size());
u32 maskSize = (u32)lit.msk.size();
u8 c = lit.s[litSize - 1];
bool nocase = ourisalpha(c) ? lit.nocase : false;
if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) {
c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c);
nocase = false;
}
u32 iEnd = MAX(litSize, maskSize);
u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length
// for case-less, or as a suffix length for case-sensitive;
u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix
// length;
for (u32 i = 0; i < iEnd; i++) {
if (i < litSize) {
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n",
i, c, lit.s[litSize - i - 1]);
upSuffix = MIN(upSuffix, i);
loSuffix = MIN(loSuffix, i); // makes sense only for case-less
break;
}
}
if (i < maskSize) {
u8 m = lit.msk[maskSize - i - 1];
u8 cm = lit.cmp[maskSize - i - 1] & m;
if(nocase) {
if ((mytoupper(c) & m) != cm) {
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
i, mytoupper(c), cm);
upSuffix = MIN(upSuffix, i);
}
if ((mytolower(c) & m) != cm) {
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
i, mytolower(c), cm);
loSuffix = MIN(loSuffix, i);
}
if (loSuffix != iEnd && upSuffix != iEnd) {
break;
}
} else if ((c & m) != cm) {
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, c, cm);
upSuffix = MIN(upSuffix, i);
break;
}
}
}
if(upSuffix != iEnd) {
updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix);
} else {
addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix);
}
if (nocase) {
if(loSuffix != iEnd) {
updateFloodSuffix(tmpFlood, mytolower(c), loSuffix);
} else {
addFlood(tmpFlood, mytolower(c), lit, loSuffix);
}
}
}
#ifdef DEBUG
for (u32 i = 0; i < N_CHARS; i++) {
FDRFlood &fl = tmpFlood[i];
if (!fl.idCount) {
continue;
}
printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
"%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
for (u32 j = 0; j < fl.idCount; j++) {
printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j,
fl.groups[j], fl.len[j]);
}
}
#endif
map<FDRFlood, CharReach, FloodComparator> flood2chars;
for (u32 i = 0; i < N_CHARS; i++) {
FDRFlood fl = tmpFlood[i];
flood2chars[fl].set(i);
}
u32 nDistinctFloods = flood2chars.size();
size_t floodHeaderSize = sizeof(u32) * N_CHARS;
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
assert(buf); // otherwise would have thrown std::bad_alloc
u32 *floodHeader = (u32 *)buf;
FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize);
u32 currentFloodIndex = 0;
for (const auto &m : flood2chars) {
const FDRFlood &fl = m.first;
const CharReach &cr = m.second;
layoutFlood[currentFloodIndex] = fl;
for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
floodHeader[c] = currentFloodIndex;
}
currentFloodIndex++;
}
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
floodHeaderSize, floodStructSize, totalSize);
return make_pair((u8 *)buf, totalSize);
}
} // namespace ue2

347
src/fdr/flood_runtime.h Normal file
View File

@@ -0,0 +1,347 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FLOOD_RUNTIME
#define FLOOD_RUNTIME
#if defined(ARCH_64_BIT)
#define FLOOD_64
#else
#define FLOOD_32
#endif
#define FLOOD_MINIMUM_SIZE 256
#define FLOOD_BACKOFF_START 32
static really_inline
const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) {
// if we don't have a flood at either the start or end,
// or have a very small buffer, don't bother with flood detection
if (len < FLOOD_MINIMUM_SIZE) {
return buf + len;
}
/* entry points in runtime.c prefetch relevant data */
#ifndef FLOOD_32
u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8);
u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8);
if (x11 == x12) {
return buf + floodBackoff;
}
u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8);
u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8);
if (x21 == x22) {
return buf + floodBackoff;
}
u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8);
u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8);
if (x31 == x32) {
return buf + floodBackoff;
}
#else
u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4);
u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4);
if (x11 == x12) {
return buf + floodBackoff;
}
u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4);
u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4);
if (x21 == x22) {
return buf + floodBackoff;
}
u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4);
u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4);
if (x31 == x32) {
return buf + floodBackoff;
}
#endif
return buf + len;
}
static really_inline
const u8 * floodDetect(const struct FDR * fdr,
const struct FDR_Runtime_Args * a,
const u8 ** ptrPtr,
const u8 * tryFloodDetect,
u32 * floodBackoffPtr,
hwlmcb_rv_t * control,
u32 iterBytes) {
DEBUG_PRINTF("attempting flood detection at %p\n", tryFloodDetect);
const u8 * buf = a->buf;
const size_t len = a->len;
HWLMCallback cb = a->cb;
void * ctxt = a->ctxt;
const u8 * ptr = *ptrPtr;
// tryFloodDetect is never put in places where unconditional
// reads a short distance forward or backward here
// TODO: rationale for this line needs to be rediscovered!!
size_t mainLoopLen = len > iterBytes ? len - iterBytes : 0;
const u32 i = ptr - buf;
u32 j = i;
// go from c to our FDRFlood structure
u8 c = buf[i];
const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset;
u32 fIdx = ((const u32 *)fBase)[c];
const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256);
const struct FDRFlood * fl = &fsb[fIdx];
#ifndef FLOOD_32
u64a cmpVal = c;
cmpVal |= cmpVal << 8;
cmpVal |= cmpVal << 16;
cmpVal |= cmpVal << 32;
u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8);
#else
u32 cmpVal = c;
cmpVal |= cmpVal << 8;
cmpVal |= cmpVal << 16;
u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4);
#endif
if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) {
*floodBackoffPtr *= 2;
goto floodout;
}
if (i < fl->suffix + 7) {
*floodBackoffPtr *= 2;
goto floodout;
}
j = i - fl->suffix;
#ifndef FLOOD_32
j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs
for (; j + 32 < mainLoopLen; j += 32) {
u64a v = *(const u64a *)(buf + j);
u64a v2 = *(const u64a *)(buf + j + 8);
u64a v3 = *(const u64a *)(buf + j + 16);
u64a v4 = *(const u64a *)(buf + j + 24);
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
break;
}
}
for (; j + 8 < mainLoopLen; j += 8) {
u64a v = *(const u64a *)(buf + j);
if (v != cmpVal) {
break;
}
}
#else
j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs
for (; j + 16 < mainLoopLen; j += 16) {
u32 v = *(const u32 *)(buf + j);
u32 v2 = *(const u32 *)(buf + j + 4);
u32 v3 = *(const u32 *)(buf + j + 8);
u32 v4 = *(const u32 *)(buf + j + 12);
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
break;
}
}
for (; j + 4 < mainLoopLen; j += 4) {
u32 v = *(const u32 *)(buf + j);
if (v != cmpVal) {
break;
}
}
#endif
for (; j < mainLoopLen; j++) {
u8 v = *(const u8 *)(buf + j);
if (v != c) {
break;
}
}
if (j > i ) {
j--; // needed for some reaches
u32 itersAhead = (j-i)/iterBytes;
u32 floodSize = itersAhead*iterBytes;
DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu "
"*control %016llx fl->allGroups %016llx\n",
floodSize, j, i, fl->idCount, *control, fl->allGroups);
DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n",
mainLoopLen, len);
if (fl->idCount && (*control & fl->allGroups)) {
switch (fl->idCount) {
#if !defined(FLOOD_DEBUG)
// Carefully unrolled code
case 1:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
t += 4) {
DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
u32 len0 = fl->len[0] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t + 0 - len0, i + t + 0, fl->ids[0], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
}
}
break;
case 2:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
u32 len0 = fl->len[0] - 1;
u32 len1 = fl->len[1] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
}
if (*control & fl->groups[0]) {
*control =
cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 2 - len1, i + t + 2, fl->ids[1], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 3 - len1, i + t + 3, fl->ids[1], ctxt);
}
}
break;
case 3:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
u32 len0 = fl->len[0] - 1;
u32 len1 = fl->len[1] - 1;
u32 len2 = fl->len[2] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
}
if (*control & fl->groups[2]) {
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
}
if (*control & fl->groups[2]) {
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
}
}
break;
default:
// slow generalized loop
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
u32 len0 = fl->len[0] - 1;
u32 len1 = fl->len[1] - 1;
u32 len2 = fl->len[2] - 1;
u32 len3 = fl->len[3] - 1;
if (*control & fl->groups[0]) {
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
}
if (*control & fl->groups[2]) {
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
}
if (*control & fl->groups[3]) {
*control = cb(i + t - len3, i + t, fl->ids[3], ctxt);
}
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
if (*control & fl->groups[t2]) {
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
}
}
if (*control & fl->groups[0]) {
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
}
if (*control & fl->groups[1]) {
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
}
if (*control & fl->groups[2]) {
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
}
if (*control & fl->groups[3]) {
*control = cb(i + t + 1 - len3, i + t + 1, fl->ids[3], ctxt);
}
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
if (*control & fl->groups[t2]) {
*control = cb(i + t + 1 - (fl->len[t2] - 1), i + t + 1, fl->ids[t2], ctxt);
}
}
}
break;
#else
// Fallback for debugging
default:
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
for (u32 t2 = 0; t2 < fl->idCount; t2++) {
if (*control & fl->groups[t2]) {
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
}
}
}
#endif
}
}
ptr += floodSize;
} else {
*floodBackoffPtr *= 2;
}
floodout:
if (j + *floodBackoffPtr < mainLoopLen - 128) {
tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr;
} else {
tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect
}
*ptrPtr = ptr;
DEBUG_PRINTF("finished flood detection at %p (next check %p)\n",
ptr, tryFloodDetect);
return tryFloodDetect;
}
#endif

244
src/fdr/teddy.c Normal file
View File

@@ -0,0 +1,244 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "util/simd_utils.h"
#include "util/simd_utils_ssse3.h"
static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
};
// Note: p_mask is an output param that initialises a poison mask.
UNUSED static really_inline
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
const u8 *buf_history, size_t len_history,
const u32 nMasks) {
union {
u8 val8[16];
m128 val128;
} u;
u.val128 = zeroes128();
if (ptr >= lo) {
u32 avail = (u32)(hi - ptr);
if (avail >= 16) {
*p_mask = load128((const void*)(p_mask_arr[16] + 16));
return loadu128(ptr);
}
*p_mask = load128((const void*)(p_mask_arr[avail] + 16));
for (u32 i = 0; i < avail; i++) {
u.val8[i] = ptr[i];
}
} else {
u32 need = MIN((u32)(lo - ptr), MIN(len_history, nMasks - 1));
u32 start = (u32)(lo - ptr);
u32 i;
for (i = start - need; ptr + i < lo; i++) {
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
}
u32 end = MIN(16, (u32)(hi - ptr));
*p_mask = loadu128((const void*)(p_mask_arr[end - start] + 16 - start));
for (; i < end; i++) {
u.val8[i] = ptr[i];
}
}
return u.val128;
}
#if defined(__AVX2__)
UNUSED static really_inline
m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
const u8 *buf_history, size_t len_history,
const u32 nMasks) {
m128 p_mask128;
m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, len_history, nMasks));
*p_mask = set2x128(p_mask128);
return ret;
}
static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = {
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
};
UNUSED static really_inline
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
const u8 *buf_history, size_t len_history) {
union {
u8 val8[32];
m256 val256;
} u;
if (ptr >= lo) {
u32 avail = (u32)(hi - ptr);
if (avail >= 32) {
*p_mask = load256((const void*)(p_mask_arr256[32] + 32));
return loadu256(ptr);
}
*p_mask = load256((const void*)(p_mask_arr256[avail] + 32));
for (u32 i = 0; i < avail; i++) {
u.val8[i] = ptr[i];
}
} else {
// need contains "how many chars to pull from history"
// calculate based on what we need, what we have in the buffer
// and only what we need to make primary confirm work
u32 start = (u32)(lo - ptr);
u32 i;
for (i = start; ptr + i < lo; i++) {
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
}
u32 end = MIN(32, (u32)(hi - ptr));
*p_mask = loadu256((const void*)(p_mask_arr256[end - start] + 32 - start));
for (; i < end; i++) {
u.val8[i] = ptr[i];
}
}
return u.val256;
}
#endif // __AVX2__
#define P0(cnd) unlikely(cnd)
#include "fdr.h"
#include "fdr_internal.h"
#include "flood_runtime.h"
#include "fdr_confirm.h"
#include "fdr_confirm_runtime.h"
#include "fdr_loadval.h"
#include "util/bitutils.h"
#include "teddy_internal.h"
#include "teddy_autogen.c"

545
src/fdr/teddy_autogen.py Executable file
View File

@@ -0,0 +1,545 @@
#!/usr/bin/python
# Copyright (c) 2015, Intel Corporation
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of Intel Corporation nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import sys
from autogen_utils import *
from base_autogen import *
from string import Template
class MT(MatcherBase):
def produce_confirm(self, iter, var_name, offset, bits, cautious = True):
if self.packed:
print self.produce_confirm_base(var_name, bits, iter*16 + offset, cautious, enable_confirmless = False, do_bailout = False)
else:
if self.num_masks == 1:
conf_func = "confWithBit1"
else:
conf_func = "confWithBitMany"
if cautious:
caution_string = "VECTORING"
else:
caution_string = "NOT_CAUTIOUS"
print " if (P0(!!%s)) {" % var_name
print " do {"
if bits == 64:
print " bit = findAndClearLSB_64(&%s);" % (var_name)
else:
print " bit = findAndClearLSB_32(&%s);" % (var_name)
print " byte = bit / %d + %d;" % (self.num_buckets, iter*16 + offset)
print " idx = bit %% %d;" % self.num_buckets
print " cf = confBase[idx];"
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
print " if (!(fdrc->groups & *control))"
print " continue;"
print " %s(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % (conf_func, caution_string)
print " } while(P0(!!%s));" % var_name
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
print " *a->groups = controlVal;"
print " return HWLM_TERMINATED;"
print " }"
print " }"
def produce_needed_temporaries(self, max_iterations):
print " m128 p_mask;"
for iter in range(0, max_iterations):
print " m128 val_%d;" % iter
print " m128 val_%d_lo;" % iter
print " m128 val_%d_hi;" % iter
for x in range(self.num_masks):
print " m128 res_%d_%d;" % (iter, x)
if x != 0:
print " m128 res_shifted_%d_%d;" % (iter, x)
print " m128 r_%d;" % iter
print "#ifdef ARCH_64_BIT"
print " u64a r_%d_lopart;" % iter
print " u64a r_%d_hipart;" % iter
print "#else"
print " u32 r_%d_part1;" % iter
print " u32 r_%d_part2;" % iter
print " u32 r_%d_part3;" % iter
print " u32 r_%d_part4;" % iter
print "#endif"
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
cautious, save_old):
if cautious:
print " val_%d = vectoredLoad128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
else:
print " val_%d = load128(ptr + %d);" % (iter, iter*16)
print " val_%d_lo = and128(val_%d, lomask);" % (iter, iter)
print " val_%d_hi = rshift2x64(val_%d, 4);" % (iter, iter)
print " val_%d_hi = and128(val_%d_hi, lomask);" % (iter, iter)
print
for x in range(self.num_masks):
print Template("""
res_${ITER}_${X} = and128(pshufb(maskBase[${X}*2] , val_${ITER}_lo),
pshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
if x != 0:
if iter == 0:
print " res_shifted_%d_%d = palignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
else:
print " res_shifted_%d_%d = palignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
if x != 0 and iter == effective_num_iterations - 1 and save_old:
print " res_old_%d = res_%d_%d;" % (x, iter, x)
print
if cautious:
print " r_%d = and128(res_%d_0, p_mask);" % (iter, iter)
else:
print " r_%d = res_%d_0;" % (iter, iter)
for x in range(1, self.num_masks):
print " r_%d = and128(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
print
def produce_one_iteration_confirm(self, iter, confirmCautious):
setup64 = [ (0, "r_%d_lopart" % iter, "movq(r_%d)" % iter),
(8, "r_%d_hipart" % iter, "movq(byteShiftRight128(r_%d, 8))" % iter) ]
setup32 = [ (0, "r_%d_part1" % iter, "movd(r_%d)" % iter),
(4, "r_%d_part2" % iter, "movd(byteShiftRight128(r_%d, 4))" % iter),
(8, "r_%d_part3" % iter, "movd(byteShiftRight128(r_%d, 8))" % iter),
(12, "r_%d_part4" % iter, "movd(byteShiftRight128(r_%d, 12))" % iter) ]
print " if (P0(isnonzero128(r_%d))) {" % (iter)
print "#ifdef ARCH_64_BIT"
for (off, val, init) in setup64:
print " %s = %s;" % (val, init)
for (off, val, init) in setup64:
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
print "#else"
for (off, val, init) in setup32:
print " %s = %s;" % (val, init)
for (off, val, init) in setup32:
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
print "#endif"
print " }"
def produce_one_iteration(self, iter, effective_num_iterations, cautious = False,
confirmCautious = True, save_old = True):
self.produce_one_iteration_state_calc(iter, effective_num_iterations, cautious, save_old)
self.produce_one_iteration_confirm(iter, confirmCautious)
def produce_code(self):
print self.produce_header(visible = True, header_only = False)
print self.produce_common_declarations()
print
self.produce_needed_temporaries(self.num_iterations)
print
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32));" % self.num_masks
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
' buf, len, a->start_offset);'
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
' mainStart);'
for x in range(self.num_masks):
if (x != 0):
print " m128 res_old_%d = ones128();" % x
print " m128 lomask = set16x8(0xf);"
print " if (ptr < mainStart) {"
print " ptr = mainStart - 16;"
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
print " ptr += 16;"
print " }"
print " if (ptr + 16 < buf + len) {"
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
print " ptr += 16;"
print " }"
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
print " __builtin_prefetch(ptr + (iterBytes*4));"
print self.produce_flood_check()
for iter in range(self.num_iterations):
self.produce_one_iteration(iter, self.num_iterations, cautious = False, confirmCautious = False)
print " }"
print " for (; ptr < buf + len; ptr += 16) {"
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
print " }"
print self.produce_footer()
def produce_compile_call(self):
packed_str = { False : "false", True : "true"}[self.packed]
print " { %d, %s, %d, %d, %s, %d, %d }," % (
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
self.conf_pull_back, self.conf_top_level_split)
def get_name(self):
if self.packed:
pck_string = "_pck"
else:
pck_string = ""
if self.num_buckets == 16:
type_string = "_fat"
else:
type_string = ""
return "fdr_exec_teddy_%s_msks%d%s%s" % (self.arch.name, self.num_masks, pck_string, type_string)
def __init__(self, arch, packed = False, num_masks = 1, num_buckets = 8):
self.arch = arch
self.packed = packed
self.num_masks = num_masks
self.num_buckets = num_buckets
self.num_iterations = 2
if packed:
self.conf_top_level_split = 32
else:
self.conf_top_level_split = 1
self.conf_pull_back = 0
class MTFat(MT):
def produce_needed_temporaries(self, max_iterations):
print " m256 p_mask;"
for iter in range(0, max_iterations):
print " m256 val_%d;" % iter
print " m256 val_%d_lo;" % iter
print " m256 val_%d_hi;" % iter
for x in range(self.num_masks):
print " m256 res_%d_%d;" % (iter, x)
if x != 0:
print " m256 res_shifted_%d_%d;" % (iter, x)
print " m256 r_%d;" % iter
print "#ifdef ARCH_64_BIT"
print " u64a r_%d_part1;" % iter
print " u64a r_%d_part2;" % iter
print " u64a r_%d_part3;" % iter
print " u64a r_%d_part4;" % iter
print "#else"
print " u32 r_%d_part1;" % iter
print " u32 r_%d_part2;" % iter
print " u32 r_%d_part3;" % iter
print " u32 r_%d_part4;" % iter
print " u32 r_%d_part5;" % iter
print " u32 r_%d_part6;" % iter
print " u32 r_%d_part7;" % iter
print " u32 r_%d_part8;" % iter
print "#endif"
def produce_code(self):
print self.produce_header(visible = True, header_only = False)
print self.produce_common_declarations()
print
self.produce_needed_temporaries(self.num_iterations)
print
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
print " const m256 * maskBase = (const m256 *)((const u8 *)fdr + sizeof(struct Teddy));"
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32*2));" % self.num_masks
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
' buf, len, a->start_offset);'
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
' mainStart);'
for x in range(self.num_masks):
if (x != 0):
print " m256 res_old_%d = ones256();" % x
print " m256 lomask = set32x8(0xf);"
print " if (ptr < mainStart) {"
print " ptr = mainStart - 16;"
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
print " ptr += 16;"
print " }"
print " if (ptr + 16 < buf + len) {"
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
print " ptr += 16;"
print " }"
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
print " __builtin_prefetch(ptr + (iterBytes*4));"
print self.produce_flood_check()
for iter in range(self.num_iterations):
self.produce_one_iteration(iter, self.num_iterations, False, confirmCautious = False)
print " }"
print " for (; ptr < buf + len; ptr += 16) {"
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
print " }"
print self.produce_footer()
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
cautious, save_old):
if cautious:
print " val_%d = vectoredLoad2x128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
else:
print " val_%d = load2x128(ptr + %d);" % (iter, iter*16)
print " val_%d_lo = and256(val_%d, lomask);" % (iter, iter)
print " val_%d_hi = rshift4x64(val_%d, 4);" % (iter, iter)
print " val_%d_hi = and256(val_%d_hi, lomask);" % (iter, iter)
print
for x in range(self.num_masks):
print Template("""
res_${ITER}_${X} = and256(vpshufb(maskBase[${X}*2] , val_${ITER}_lo),
vpshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
if x != 0:
if iter == 0:
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
else:
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
if x != 0 and iter == effective_num_iterations - 1 and save_old:
print " res_old_%d = res_%d_%d;" % (x, iter, x)
print
if cautious:
print " r_%d = and256(res_%d_0, p_mask);" % (iter, iter)
else:
print " r_%d = res_%d_0;" % (iter, iter)
for x in range(1, self.num_masks):
print " r_%d = and256(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
print
def produce_one_iteration_confirm(self, iter, confirmCautious):
setup64 = [ (0, "r_%d_part1" % iter, "extractlow64from256(r)"),
(4, "r_%d_part2" % iter, "extract64from256(r, 1);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
(8, "r_%d_part3" % iter, "extractlow64from256(r)"),
(12, "r_%d_part4" % iter, "extract64from256(r, 1)") ]
setup32 = [ (0, "r_%d_part1" % iter, "extractlow32from256(r)"),
(2, "r_%d_part2" % iter, "extract32from256(r, 1)"),
(4, "r_%d_part3" % iter, "extract32from256(r, 2)"),
(6, "r_%d_part4" % iter, "extract32from256(r, 3);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
(8, "r_%d_part5" % iter, "extractlow32from256(r)"),
(10, "r_%d_part6" % iter, "extract32from256(r, 1)"),
(12, "r_%d_part7" % iter, "extract32from256(r, 2)"),
(14, "r_%d_part8" % iter, "extract32from256(r, 3)") ]
print " if (P0(isnonzero256(r_%d))) {" % (iter)
print " m256 r_swap = swap128in256(r_%d);" % (iter)
print " m256 r = interleave256lo(r_%d, r_swap);" % (iter)
print "#ifdef ARCH_64_BIT"
for (off, val, init) in setup64:
print " %s = %s;" % (val, init)
for (off, val, init) in setup64:
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
print "#else"
for (off, val, init) in setup32:
print " %s = %s;" % (val, init)
for (off, val, init) in setup32:
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
print "#endif"
print " }"
class MTFast(MatcherBase):
def produce_confirm(self, cautious):
if cautious:
cautious_str = "VECTORING"
else:
cautious_str = "NOT_CAUTIOUS"
print " for (u32 i = 0; i < arrCnt; i++) {"
print " byte = bitArr[i] / 8;"
if self.packed:
conf_split_mask = IntegerType(32).constant_to_string(
self.conf_top_level_split - 1)
print " bitRem = bitArr[i] % 8;"
print " confSplit = *(ptr+byte) & 0x1f;"
print " idx = confSplit * %d + bitRem;" % self.num_buckets
print " cf = confBase[idx];"
print " if (!cf)"
print " continue;"
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
print " if (!(fdrc->groups & *control))"
print " continue;"
print " confWithBit(fdrc, a, ptr - buf + byte, %s, 0, control, &last_match);" % cautious_str
else:
print " cf = confBase[bitArr[i] % 8];"
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
print " confWithBit1(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % cautious_str
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
print " *a->groups = controlVal;"
print " return HWLM_TERMINATED;"
print " }"
print " }"
def produce_needed_temporaries(self, max_iterations):
print " u32 arrCnt;"
print " u16 bitArr[512];"
print " m256 p_mask;"
print " m256 val_0;"
print " m256 val_0_lo;"
print " m256 val_0_hi;"
print " m256 res_0;"
print " m256 res_1;"
print " m128 lo_part;"
print " m128 hi_part;"
print "#ifdef ARCH_64_BIT"
print " u64a r_0_part;"
print "#else"
print " u32 r_0_part;"
print "#endif"
def produce_bit_scan(self, offset, bits):
print " while (P0(!!r_0_part)) {"
if bits == 64:
print " bitArr[arrCnt++] = (u16)findAndClearLSB_64(&r_0_part) + 64 * %d;" % (offset)
else:
print " bitArr[arrCnt++] = (u16)findAndClearLSB_32(&r_0_part) + 32 * %d;" % (offset)
print " }"
def produce_bit_check_128(self, var_name, offset):
print " if (P0(isnonzero128(%s))) {" % (var_name)
print "#ifdef ARCH_64_BIT"
print " r_0_part = movq(%s);" % (var_name)
self.produce_bit_scan(offset, 64)
print " r_0_part = movq(byteShiftRight128(%s, 8));" % (var_name)
self.produce_bit_scan(offset + 1, 64)
print "#else"
print " r_0_part = movd(%s);" % (var_name)
self.produce_bit_scan(offset * 2, 32)
for step in range(1, 4):
print " r_0_part = movd(byteShiftRight128(%s, %d));" % (var_name, step * 4)
self.produce_bit_scan(offset * 2 + step, 32)
print "#endif"
print " }"
def produce_bit_check_256(self, iter, single_iter, cautious):
print " if (P0(isnonzero256(res_%d))) {" % (iter)
if single_iter:
print " arrCnt = 0;"
print " lo_part = cast256to128(res_%d);" % (iter)
print " hi_part = cast256to128(swap128in256(res_%d));" % (iter)
self.produce_bit_check_128("lo_part", iter * 4)
self.produce_bit_check_128("hi_part", iter * 4 + 2)
if single_iter:
self.produce_confirm(cautious)
print " }"
def produce_one_iteration_state_calc(self, iter, cautious):
if cautious:
print " val_0 = vectoredLoad256(&p_mask, ptr + %d, buf+a->start_offset, buf+len, a->buf_history, a->len_history);" % (iter * 32)
else:
print " val_0 = load256(ptr + %d);" % (iter * 32)
print " val_0_lo = and256(val_0, lomask);"
print " val_0_hi = rshift4x64(val_0, 4);"
print " val_0_hi = and256(val_0_hi, lomask);"
print " res_%d = and256(vpshufb(maskLo , val_0_lo), vpshufb(maskHi, val_0_hi));" % (iter)
if cautious:
print " res_%d = and256(res_%d, p_mask);" % (iter, iter)
def produce_code(self):
print self.produce_header(visible = True, header_only = False)
print self.produce_common_declarations()
print
self.produce_needed_temporaries(self.num_iterations)
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
print " const m256 maskLo = set2x128(maskBase[0]);"
print " const m256 maskHi = set2x128(maskBase[1]);"
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + 32);"
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 32);"
print " const size_t iterBytes = %d;" % (self.num_iterations * 32)
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
' buf, len, a->start_offset);'
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
' mainStart);'
print " const m256 lomask = set32x8(0xf);"
print " if (ptr < mainStart) {"
print " ptr = mainStart - 32;"
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
print " ptr += 32;"
print " }"
print " if (ptr + 32 < buf + len) {"
self.produce_one_iteration_state_calc(iter = 0, cautious = False)
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
print " ptr += 32;"
print " }"
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
print " __builtin_prefetch(ptr + (iterBytes*4));"
print self.produce_flood_check()
for iter in range (0, self.num_iterations):
self.produce_one_iteration_state_calc(iter = iter, cautious = False)
print " arrCnt = 0;"
for iter in range (0, self.num_iterations):
self.produce_bit_check_256(iter = iter, single_iter = False, cautious = False)
self.produce_confirm(cautious = False)
print " }"
print " for (; ptr < buf + len; ptr += 32) {"
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
print " }"
print self.produce_footer()
def get_name(self):
if self.packed:
pck_string = "_pck"
else:
pck_string = ""
return "fdr_exec_teddy_%s_msks%d%s_fast" % (self.arch.name, self.num_masks, pck_string)
def produce_compile_call(self):
packed_str = { False : "false", True : "true"}[self.packed]
print " { %d, %s, %d, %d, %s, %d, %d }," % (
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
self.conf_pull_back, self.conf_top_level_split)
def __init__(self, arch, packed = False):
self.arch = arch
self.packed = packed
self.num_masks = 1
self.num_buckets = 8
self.num_iterations = 2
self.conf_top_level_split = 1
self.conf_pull_back = 0
if packed:
self.conf_top_level_split = 32
else:
self.conf_top_level_split = 1
self.conf_pull_back = 0

459
src/fdr/teddy_compile.cpp Normal file
View File

@@ -0,0 +1,459 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile_internal.h"
#include "fdr_confirm.h"
#include "fdr_engine_description.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/popcount.h"
#include "util/target_info.h"
#include "util/verify_types.h"
#include "teddy_compile.h"
#include "teddy_internal.h"
#include "teddy_engine_description.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <boost/core/noncopyable.hpp>
using namespace std;
namespace ue2 {
namespace {
//#define TEDDY_DEBUG
class TeddyCompiler : boost::noncopyable {
const TeddyEngineDescription &eng;
const vector<hwlmLiteral> &lits;
bool make_small;
public:
TeddyCompiler(const vector<hwlmLiteral> &lits_in,
const TeddyEngineDescription &eng_in, bool make_small_in)
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
};
class TeddySet {
const vector<hwlmLiteral> &lits;
u32 len;
// nibbleSets is a series of bitfields over 16 predicates
// that represent the whether shufti nibble set
// so for num_masks = 4 we will represent our strings by
// 8 u16s in the vector that indicate what a shufti bucket
// would have to look like
vector<u16> nibbleSets;
set<u32> litIds;
public:
TeddySet(const vector<hwlmLiteral> &lits_in, u32 len_in)
: lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {}
const set<u32> & getLits() const { return litIds; }
size_t litCount() const { return litIds.size(); }
bool operator<(const TeddySet & s) const {
return litIds < s.litIds;
}
#ifdef TEDDY_DEBUG
void dump() const {
printf("TS: ");
for (u32 i = 0; i < nibbleSets.size(); i++) {
printf("%04x ", (u32)nibbleSets[i]);
}
printf("\nnlits: %zu\nLit ids: ", litCount());
printf("Prob: %llu\n", probability());
for (set<u32>::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) {
printf("%u ", *i);
}
printf("\n");
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
}
#endif
bool identicalTail(const TeddySet & ts) const {
return nibbleSets == ts.nibbleSets;
}
void addLiteral(u32 lit_id) {
const string &s = lits[lit_id].s;
for (u32 i = 0; i < len; i++) {
if (i < s.size()) {
u8 c = s[s.size() - i - 1];
u8 c_hi = (c >> 4) & 0xf;
u8 c_lo = c & 0xf;
nibbleSets[i*2] = 1 << c_lo;
if (lits[lit_id].nocase && ourisalpha(c)) {
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
} else {
nibbleSets[i*2+1] = 1 << c_hi;
}
} else {
nibbleSets[i*2] = nibbleSets[i*2+1] = 0xffff;
}
}
litIds.insert(lit_id);
}
void merge(const TeddySet &ts) {
for (u32 i = 0; i < nibbleSets.size(); i++) {
nibbleSets[i] |= ts.nibbleSets[i];
}
litIds.insert(ts.litIds.begin(), ts.litIds.end());
}
// return a value p from 0 .. MAXINT64 that gives p/MAXINT64
// likelihood of this TeddySet firing a first-stage accept
// if it was given a bucket of its own and random data were
// to be passed in
u64a probability() const {
u64a val = 1;
for (size_t i = 0; i < nibbleSets.size(); i++) {
val *= popcount32((u32)nibbleSets[i]);
}
return val;
}
// return a score based around the chance of this hitting times
// a small fixed cost + the cost of traversing some sort of followup
// (assumption is that the followup is linear)
u64a heuristic() const {
return probability() * (2+litCount());
}
bool isRunProne() const {
u16 lo_and = 0xffff;
u16 hi_and = 0xffff;
for (u32 i = 0; i < len; i++) {
lo_and &= nibbleSets[i*2];
hi_and &= nibbleSets[i*2+1];
}
// we're not flood-prone if there's no way to get
// through with a flood
if (!lo_and || !hi_and) {
return false;
}
return true;
}
};
bool TeddyCompiler::pack(map<BucketIndex,
std::vector<LiteralIndex> > &bucketToLits) {
set<TeddySet> sts;
for (u32 i = 0; i < lits.size(); i++) {
TeddySet ts(lits, eng.numMasks);
ts.addLiteral(i);
sts.insert(ts);
}
while (1) {
#ifdef TEDDY_DEBUG
printf("Size %zu\n", sts.size());
for (set<TeddySet>::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
printf("\n"); i1->dump();
}
printf("\n===============================================\n");
#endif
set<TeddySet>::iterator m1 = sts.end(), m2 = sts.end();
u64a best = 0xffffffffffffffffULL;
for (set<TeddySet>::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
set<TeddySet>::iterator i2 = i1;
++i2;
const TeddySet &s1 = *i1;
for (set<TeddySet>::iterator e2 = sts.end(); i2 != e2; ++i2) {
const TeddySet &s2 = *i2;
// be more conservative if we don't absolutely need to
// keep packing
if ((sts.size() <= eng.getNumBuckets()) &&
!s1.identicalTail(s2)) {
continue;
}
TeddySet tmpSet(lits, eng.numMasks);
tmpSet.merge(s1);
tmpSet.merge(s2);
u64a newScore = tmpSet.heuristic();
u64a oldScore = s1.heuristic() + s2.heuristic();
if (newScore < oldScore) {
m1 = i1;
m2 = i2;
break;
} else {
u64a score = newScore - oldScore;
bool oldRunProne = s1.isRunProne() && s2.isRunProne();
bool newRunProne = tmpSet.isRunProne();
if (newRunProne && !oldRunProne) {
continue;
}
if (score < best) {
best = score;
m1 = i1;
m2 = i2;
}
}
}
}
// if we didn't find a merge candidate, bail out
if ((m1 == sts.end()) || (m2 == sts.end())) {
break;
}
// do the merge
TeddySet nts(lits, eng.numMasks);
nts.merge(*m1);
nts.merge(*m2);
#ifdef TEDDY_DEBUG
printf("Merging\n");
printf("m1 = \n");
m1->dump();
printf("m2 = \n");
m2->dump();
printf("nts = \n");
nts.dump();
printf("\n===============================================\n");
#endif
sts.erase(m1);
sts.erase(m2);
sts.insert(nts);
}
u32 cnt = 0;
if (sts.size() > eng.getNumBuckets()) {
return false;
}
for (set<TeddySet>::const_iterator i = sts.begin(), e = sts.end(); i != e;
++i) {
for (set<u32>::const_iterator i2 = i->getLits().begin(),
e2 = i->getLits().end();
i2 != e2; ++i2) {
bucketToLits[cnt].push_back(*i2);
}
cnt++;
}
return true;
}
aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
return nullptr;
}
#ifdef TEDDY_DEBUG
for (size_t i = 0; i < lits.size(); i++) {
printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
lits[i].nocase ? "caseless" : "caseful");
for (size_t j = 0; j < lits[i].s.size(); j++) {
printf("%02x", ((u32)lits[i].s[j])&0xff);
}
printf("\n");
}
#endif
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
if(eng.needConfirm(lits)) {
if (!pack(bucketToLits)) {
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
lits.size(), eng.getNumBuckets());
return nullptr;
}
} else {
for (u32 i = 0; i < lits.size(); i++) {
bucketToLits[i].push_back(i);
}
}
u32 maskWidth = eng.getNumBuckets() / 8;
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
pair<u8 *, size_t> confirmTmp
= setupFullMultiConfs(lits, eng, bucketToLits, make_small);
size_t size = ROUNDUP_N(sizeof(Teddy) +
maskLen +
confirmTmp.second +
floodControlTmp.second +
link.second, 16 * maskWidth);
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
assert(fdr); // otherwise would have thrown std::bad_alloc
Teddy *teddy = (Teddy *)fdr.get(); // ugly
u8 *teddy_base = (u8 *)teddy;
teddy->size = size;
teddy->engineID = eng.getID();
teddy->maxStringLen = verify_u32(maxLen(lits));
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
memcpy(ptr, confirmTmp.first, confirmTmp.second);
ptr += confirmTmp.second;
aligned_free(confirmTmp.first);
teddy->floodOffset = verify_u32(ptr - teddy_base);
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
ptr += floodControlTmp.second;
aligned_free(floodControlTmp.first);
if (link.first) {
teddy->link = verify_u32(ptr - teddy_base);
memcpy(ptr, link.first, link.second);
aligned_free(link.first);
} else {
teddy->link = 0;
}
u8 *baseMsk = teddy_base + sizeof(Teddy);
for (map<BucketIndex, std::vector<LiteralIndex> >::const_iterator
i = bucketToLits.begin(),
e = bucketToLits.end();
i != e; ++i) {
const u32 bucket_id = i->first;
const vector<LiteralIndex> &ids = i->second;
const u8 bmsk = 1U << (bucket_id % 8);
for (vector<LiteralIndex>::const_iterator i2 = ids.begin(),
e2 = ids.end();
i2 != e2; ++i2) {
LiteralIndex lit_id = *i2;
const hwlmLiteral & l = lits[lit_id];
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
const u32 sz = verify_u32(l.s.size());
// fill in masks
for (u32 j = 0; j < eng.numMasks; j++) {
u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
// if we don't have a char at this position, fill in i
// locations in these masks with '1'
if (j >= sz) {
for (u32 n = 0; n < 16; n++) {
baseMsk[msk_id_lo * 16 + n] |= bmsk;
baseMsk[msk_id_hi * 16 + n] |= bmsk;
}
} else {
u8 c = l.s[sz - 1 - j];
// if we do have a char at this position
const u32 hiShift = 4;
u32 n_hi = (c >> hiShift) & 0xf;
u32 n_lo = c & 0xf;
if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) {
u8 m = l.msk[l.msk.size() - 1 - j];
u8 m_hi = (m >> hiShift) & 0xf;
u8 m_lo = m & 0xf;
u8 cmp = l.cmp[l.msk.size() - 1 - j];
u8 cmp_lo = cmp & 0xf;
u8 cmp_hi = (cmp >> hiShift) & 0xf;
for (u8 cm = 0; cm < 0x10; cm++) {
if ((cm & m_lo) == (cmp_lo & m_lo)) {
baseMsk[msk_id_lo * 16 + cm] |= bmsk;
}
if ((cm & m_hi) == (cmp_hi & m_hi)) {
baseMsk[msk_id_hi * 16 + cm] |= bmsk;
}
}
} else{
if (l.nocase && ourisalpha(c)) {
u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
baseMsk[msk_id_hi * 16 + (n_hi & cmHalfClear)] |= bmsk;
baseMsk[msk_id_hi * 16 + (n_hi | cmHalfSet )] |= bmsk;
} else {
baseMsk[msk_id_hi * 16 + n_hi] |= bmsk;
}
baseMsk[msk_id_lo * 16 + n_lo] |= bmsk;
}
}
}
}
}
#ifdef TEDDY_DEBUG
for (u32 i = 0; i < eng.numMasks * 2; i++) {
for (u32 j = 0; j < 16; j++) {
u8 val = baseMsk[i * 16 + j];
for (u32 k = 0; k < 8; k++) {
printf("%s", ((val >> k) & 0x1) ? "1" : "0");
}
printf(" ");
}
printf("\n");
}
#endif
return fdr;
}
} // namespace
aligned_unique_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
bool make_small, u32 hint,
const target_t &target,
pair<u8 *, size_t> link) {
unique_ptr<TeddyEngineDescription> des;
if (hint == HINT_INVALID) {
des = chooseTeddyEngine(target, lits);
} else {
des = getTeddyDescription(hint);
}
if (!des) {
return nullptr;
}
TeddyCompiler tc(lits, *des, make_small);
return tc.build(link);
}
} // namespace ue2

56
src/fdr/teddy_compile.h Normal file
View File

@@ -0,0 +1,56 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief FDR literal matcher: Teddy build API.
*/
#ifndef TEDDY_COMPILE_H
#define TEDDY_COMPILE_H
#include "ue2common.h"
#include "util/alloc.h"
#include <vector>
#include <utility> // std::pair
struct FDR;
struct target_t;
namespace ue2 {
struct hwlmLiteral;
ue2::aligned_unique_ptr<FDR>
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
u32 hint, const target_t &target,
std::pair<u8 *, size_t> link);
} // namespace ue2
#endif // TEDDY_COMPILE_H

View File

@@ -0,0 +1,207 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fdr.h"
#include "fdr_internal.h"
#include "fdr_compile_internal.h"
#include "fdr_confirm.h"
#include "ue2common.h"
#include "hs_internal.h"
#include "fdr_engine_description.h"
#include "teddy_internal.h"
#include "teddy_engine_description.h"
#include "util/make_unique.h"
#include <cmath>
using namespace std;
namespace ue2 {
TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
def.numBuckets, def.confirmPullBackDistance,
def.confirmTopLevelSplit),
numMasks(def.numMasks), packed(def.packed) {}
u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
return numMasks;
}
bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const {
if (packed || lits.size() > getNumBuckets()) {
return true;
}
for (const auto &lit : lits) {
if (lit.s.size() > numMasks || !lit.msk.empty()) {
return true;
}
}
return false;
}
#include "teddy_autogen_compiler.cpp"
static
size_t maxFloodTailLen(const vector<hwlmLiteral> &vl) {
size_t max_flood_tail = 0;
for (const auto &lit : vl) {
const string &s = lit.s;
assert(!s.empty());
size_t j;
for (j = 1; j < s.length(); j++) {
if (s[s.length() - j - 1] != s[s.length() - 1]) {
break;
}
}
max_flood_tail = max(max_flood_tail, j);
}
return max_flood_tail;
}
/**
* \brief True if this Teddy engine is qualified to handle this set of literals
* on this target.
*/
static
bool isAllowed(const vector<hwlmLiteral> &vl, const TeddyEngineDescription &eng,
const size_t max_lit_len, const target_t &target) {
if (!eng.isValidOnTarget(target)) {
DEBUG_PRINTF("%u disallowed: not valid on target\n", eng.getID());
return false;
}
if (eng.getNumBuckets() < vl.size() && !eng.packed) {
DEBUG_PRINTF("%u disallowed: num buckets < num lits and not packed\n",
eng.getID());
return false;
}
if (eng.getNumBuckets() * TEDDY_BUCKET_LOAD < vl.size()) {
DEBUG_PRINTF("%u disallowed: too many lits for num buckets\n",
eng.getID());
return false;
}
if (eng.numMasks > max_lit_len) {
DEBUG_PRINTF("%u disallowed: more masks than max lit len (%zu)\n",
eng.getID(), max_lit_len);
return false;
}
if (vl.size() > 40) {
u32 n_small_lits = 0;
for (const auto &lit : vl) {
if (lit.s.length() < eng.numMasks) {
n_small_lits++;
}
}
if (n_small_lits * 5 > vl.size()) {
DEBUG_PRINTF("too many short literals (%u)\n", n_small_lits);
return false;
}
}
return true;
}
unique_ptr<TeddyEngineDescription>
chooseTeddyEngine(const target_t &target, const vector<hwlmLiteral> &vl) {
vector<TeddyEngineDescription> descs;
getTeddyDescriptions(&descs);
const TeddyEngineDescription *best = nullptr;
const size_t max_lit_len = maxLen(vl);
const size_t max_flood_tail = maxFloodTailLen(vl);
DEBUG_PRINTF("%zu lits, max_lit_len=%zu, max_flood_tail=%zu\n", vl.size(),
max_lit_len, max_flood_tail);
u32 best_score = 0;
for (size_t engineID = 0; engineID < descs.size(); engineID++) {
const TeddyEngineDescription &eng = descs[engineID];
if (!isAllowed(vl, eng, max_lit_len, target)) {
continue;
}
u32 score = 0;
// We prefer unpacked Teddy models.
if (!eng.packed) {
score += 100;
}
// If we're heavily loaded, we prefer to have more masks.
if (vl.size() > 4 * eng.getNumBuckets()) {
score += eng.numMasks * 4;
} else {
// Lightly loaded cases are great.
score += 100;
}
// We want enough masks to avoid becoming flood-prone.
if (eng.numMasks > max_flood_tail) {
score += 50;
}
// We prefer having 3 masks. 3 is just right.
score += 6 / (abs(3 - (int)eng.numMasks) + 1);
// We prefer cheaper, smaller Teddy models.
score += 16 / eng.getNumBuckets();
DEBUG_PRINTF("teddy %u: masks=%u, buckets=%u, packed=%u "
"-> score=%u\n",
eng.getID(), eng.numMasks, eng.getNumBuckets(),
eng.packed ? 1U : 0U, score);
if (!best || score > best_score) {
best = &eng;
best_score = score;
}
}
if (!best) {
DEBUG_PRINTF("failed to find engine\n");
return nullptr;
}
DEBUG_PRINTF("using engine %u\n", best->getID());
return ue2::make_unique<TeddyEngineDescription>(*best);
}
unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID) {
vector<TeddyEngineDescription> descs;
getTeddyDescriptions(&descs);
for (const auto &desc : descs) {
if (desc.getID() == engineID) {
return ue2::make_unique<TeddyEngineDescription>(desc);
}
}
return nullptr;
}
} // namespace ue2

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TEDDY_ENGINE_DESCRIPTION_H
#define TEDDY_ENGINE_DESCRIPTION_H
#include "engine_description.h"
#include "fdr_compile_internal.h"
#include <memory>
#include <vector>
namespace ue2 {
#define TEDDY_BUCKET_LOAD 6
struct TeddyEngineDef {
u32 id;
u64a cpu_features;
u32 numMasks;
u32 numBuckets;
bool packed;
u32 confirmPullBackDistance;
u32 confirmTopLevelSplit;
};
class TeddyEngineDescription : public EngineDescription {
public:
u32 numMasks;
bool packed;
explicit TeddyEngineDescription(const TeddyEngineDef &def);
u32 getDefaultFloodSuffixLength() const override;
bool needConfirm(const std::vector<hwlmLiteral> &lits) const;
};
std::unique_ptr<TeddyEngineDescription>
chooseTeddyEngine(const target_t &target, const std::vector<hwlmLiteral> &vl);
std::unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID);
void getTeddyDescriptions(std::vector<TeddyEngineDescription> *out);
} // namespace ue2
#endif

46
src/fdr/teddy_internal.h Normal file
View File

@@ -0,0 +1,46 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TEDDY_INTERNAL_H
#define TEDDY_INTERNAL_H
#include "ue2common.h"
// first part is compatible with an FDR
struct Teddy {
u32 engineID;
u32 size;
u32 maxStringLen;
u32 floodOffset;
u32 link;
u32 pad1;
u32 pad2;
u32 pad3;
};
#endif

374
src/grey.cpp Normal file
View File

@@ -0,0 +1,374 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "grey.h"
#include "ue2common.h"
#include <algorithm>
#include <cstdlib> // exit
#include <string>
#include <vector>
#define DEFAULT_MAX_HISTORY 60
using namespace std;
namespace ue2 {
Grey::Grey(void) :
optimiseComponentTree(true),
performGraphSimplification(true),
prefilterReductions(true),
removeEdgeRedundancy(true),
allowGough(true),
allowHaigLit(true),
allowLitHaig(true),
allowLbr(true),
allowMcClellan(true),
allowPuff(true),
allowRose(true),
allowExtendedNFA(true), /* bounded repeats of course */
allowLimExNFA(true),
allowSidecar(true),
allowAnchoredAcyclic(true),
allowSmallLiteralSet(true),
allowCastle(true),
allowDecoratedLiteral(true),
allowNoodle(true),
fdrAllowTeddy(true),
puffImproveHead(true),
castleExclusive(true),
mergeSEP(true), /* short exhaustible passthroughs */
mergeRose(true), // roses inside rose
mergeSuffixes(true), // suffix nfas inside rose
mergeOutfixes(true),
onlyOneOutfix(false),
allowShermanStates(true),
allowMcClellan8(true),
highlanderPruneDFA(true),
minimizeDFA(true),
accelerateDFA(true),
accelerateNFA(true),
reverseAccelerate(true),
squashNFA(true),
compressNFAState(true),
numberNFAStatesWrong(false), /* debugging only */
highlanderSquash(true),
allowZombies(true),
floodAsPuffette(false),
nfaForceSize(0),
nfaForceShifts(0),
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
minHistoryAvailable(0), /* debugging only */
maxAnchoredRegion(63), /* for rose's atable to run over */
minRoseLiteralLength(3),
minRoseNetflowLiteralLength(2),
maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
minExtBoundedRepeatSize(32),
goughCopyPropagate(true),
goughRegisterAllocate(true),
shortcutLiterals(true),
roseGraphReduction(true),
roseRoleAliasing(true),
roseMasks(true),
roseMaxBadLeafLength(5),
roseConvertInfBadLeaves(true),
roseConvertFloodProneSuffixes(true),
roseMergeRosesDuringAliasing(true),
roseMultiTopRoses(true),
roseHamsterMasks(true),
roseLookaroundMasks(true),
roseMcClellanPrefix(1),
roseMcClellanSuffix(1),
roseMcClellanOutfix(2),
roseTransformDelay(true),
roseDesiredSplit(4),
earlyMcClellanPrefix(true),
earlyMcClellanInfix(true),
earlyMcClellanSuffix(true),
allowCountingMiracles(true),
allowSomChain(true),
somMaxRevNfaLength(126),
hamsterAccelForward(true),
hamsterAccelReverse(false),
miracleHistoryBonus(16),
equivalenceEnable(true),
allowSmallWrite(true), // McClellan dfas for small patterns
smallWriteLargestBuffer(70), // largest buffer that can be
// considered a small write
// all blocks larger than this
// are given to rose &co
smallWriteLargestBufferBad(35),
limitSmallWriteOutfixSize(1048576), // 1 MB
dumpFlags(0),
limitPatternCount(8000000), // 8M patterns
limitPatternLength(16000), // 16K bytes
limitGraphVertices(500000), // 500K vertices
limitGraphEdges(1000000), // 1M edges
limitReportCount(4*8000000),
limitLiteralCount(8000000), // 8M literals
limitLiteralLength(16000),
limitLiteralMatcherChars(1073741824), // 1 GB
limitLiteralMatcherSize(1073741824), // 1 GB
limitRoseRoleCount(4*8000000),
limitRoseEngineCount(8000000), // 8M engines
limitRoseAnchoredSize(1073741824), // 1 GB
limitEngineSize(1073741824), // 1 GB
limitDFASize(1073741824), // 1 GB
limitNFASize(1048576), // 1 MB
limitLBRSize(1048576) // 1 MB
{
assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
}
} // namespace ue2
#ifndef RELEASE_BUILD
#include <boost/lexical_cast.hpp>
using boost::lexical_cast;
namespace ue2 {
void applyGreyOverrides(Grey *g, const string &s) {
string::const_iterator p = s.begin();
string::const_iterator pe = s.end();
string help = "help:0";
bool invalid_key_seen = false;
Grey defaultg;
if (s == "help" || s == "help:") {
printf("Valid grey overrides:\n");
p = help.begin();
pe = help.end();
}
while (p != pe) {
string::const_iterator ke = find(p, pe, ':');
if (ke == pe) {
break;
}
string key(p, ke);
string::const_iterator ve = find(ke, pe, ';');
unsigned int value = lexical_cast<unsigned int>(string(ke + 1, ve));
bool done = false;
/* surely there exists a nice template to go with this macro to make
* all the boring code disappear */
#define G_UPDATE(k) do { \
if (key == ""#k) { g->k = value; done = 1;} \
if (key == "help") { \
printf("\t%-30s\tdefault: %s\n", #k, \
lexical_cast<string>(defaultg.k).c_str()); \
} \
} while (0)
G_UPDATE(optimiseComponentTree);
G_UPDATE(performGraphSimplification);
G_UPDATE(prefilterReductions);
G_UPDATE(removeEdgeRedundancy);
G_UPDATE(allowGough);
G_UPDATE(allowHaigLit);
G_UPDATE(allowLitHaig);
G_UPDATE(allowLbr);
G_UPDATE(allowMcClellan);
G_UPDATE(allowPuff);
G_UPDATE(allowRose);
G_UPDATE(allowExtendedNFA);
G_UPDATE(allowLimExNFA);
G_UPDATE(allowSidecar);
G_UPDATE(allowAnchoredAcyclic);
G_UPDATE(allowSmallLiteralSet);
G_UPDATE(allowCastle);
G_UPDATE(allowDecoratedLiteral);
G_UPDATE(allowNoodle);
G_UPDATE(fdrAllowTeddy);
G_UPDATE(puffImproveHead);
G_UPDATE(castleExclusive);
G_UPDATE(mergeSEP);
G_UPDATE(mergeRose);
G_UPDATE(mergeSuffixes);
G_UPDATE(mergeOutfixes);
G_UPDATE(onlyOneOutfix);
G_UPDATE(allowShermanStates);
G_UPDATE(allowMcClellan8);
G_UPDATE(highlanderPruneDFA);
G_UPDATE(minimizeDFA);
G_UPDATE(accelerateDFA);
G_UPDATE(accelerateNFA);
G_UPDATE(reverseAccelerate);
G_UPDATE(squashNFA);
G_UPDATE(compressNFAState);
G_UPDATE(numberNFAStatesWrong);
G_UPDATE(allowZombies);
G_UPDATE(floodAsPuffette);
G_UPDATE(nfaForceSize);
G_UPDATE(nfaForceShifts);
G_UPDATE(highlanderSquash);
G_UPDATE(maxHistoryAvailable);
G_UPDATE(minHistoryAvailable);
G_UPDATE(maxAnchoredRegion);
G_UPDATE(minRoseLiteralLength);
G_UPDATE(minRoseNetflowLiteralLength);
G_UPDATE(maxRoseNetflowEdges);
G_UPDATE(minExtBoundedRepeatSize);
G_UPDATE(goughCopyPropagate);
G_UPDATE(goughRegisterAllocate);
G_UPDATE(shortcutLiterals);
G_UPDATE(roseGraphReduction);
G_UPDATE(roseRoleAliasing);
G_UPDATE(roseMasks);
G_UPDATE(roseMaxBadLeafLength);
G_UPDATE(roseConvertInfBadLeaves);
G_UPDATE(roseConvertFloodProneSuffixes);
G_UPDATE(roseMergeRosesDuringAliasing);
G_UPDATE(roseMultiTopRoses);
G_UPDATE(roseHamsterMasks);
G_UPDATE(roseLookaroundMasks);
G_UPDATE(roseMcClellanPrefix);
G_UPDATE(roseMcClellanSuffix);
G_UPDATE(roseMcClellanOutfix);
G_UPDATE(roseTransformDelay);
G_UPDATE(roseDesiredSplit);
G_UPDATE(earlyMcClellanPrefix);
G_UPDATE(earlyMcClellanInfix);
G_UPDATE(earlyMcClellanSuffix);
G_UPDATE(allowSomChain);
G_UPDATE(allowCountingMiracles);
G_UPDATE(somMaxRevNfaLength);
G_UPDATE(hamsterAccelForward);
G_UPDATE(hamsterAccelReverse);
G_UPDATE(miracleHistoryBonus);
G_UPDATE(equivalenceEnable);
G_UPDATE(allowSmallWrite);
G_UPDATE(smallWriteLargestBuffer);
G_UPDATE(smallWriteLargestBufferBad);
G_UPDATE(limitSmallWriteOutfixSize);
G_UPDATE(limitPatternCount);
G_UPDATE(limitPatternLength);
G_UPDATE(limitGraphVertices);
G_UPDATE(limitGraphEdges);
G_UPDATE(limitReportCount);
G_UPDATE(limitLiteralCount);
G_UPDATE(limitLiteralLength);
G_UPDATE(limitLiteralMatcherChars);
G_UPDATE(limitLiteralMatcherSize);
G_UPDATE(limitRoseRoleCount);
G_UPDATE(limitRoseEngineCount);
G_UPDATE(limitRoseAnchoredSize);
G_UPDATE(limitEngineSize);
G_UPDATE(limitDFASize);
G_UPDATE(limitNFASize);
G_UPDATE(limitLBRSize);
#undef G_UPDATE
if (key == "simple_som") {
g->allowHaigLit = false;
g->allowLitHaig = false;
g->allowSomChain = false;
g->somMaxRevNfaLength = 0;
done = true;
}
if (key == "forceOutfixesNFA") {
g->allowAnchoredAcyclic = false;
g->allowCastle = false;
g->allowDecoratedLiteral = false;
g->allowGough = false;
g->allowHaigLit = false;
g->allowLbr = false;
g->allowLimExNFA = true;
g->allowLitHaig = false;
g->allowMcClellan = false;
g->allowPuff = false;
g->allowRose = false;
g->allowSmallLiteralSet = false;
g->roseMasks = false;
done = true;
}
if (key == "forceOutfixesDFA") {
g->allowAnchoredAcyclic = false;
g->allowCastle = false;
g->allowDecoratedLiteral = false;
g->allowGough = false;
g->allowHaigLit = false;
g->allowLbr = false;
g->allowLimExNFA = false;
g->allowLitHaig = false;
g->allowMcClellan = true;
g->allowPuff = false;
g->allowRose = false;
g->allowSmallLiteralSet = false;
g->roseMasks = false;
done = true;
}
if (key == "forceOutfixes") {
g->allowAnchoredAcyclic = false;
g->allowCastle = false;
g->allowDecoratedLiteral = false;
g->allowGough = true;
g->allowHaigLit = false;
g->allowLbr = false;
g->allowLimExNFA = true;
g->allowLitHaig = false;
g->allowMcClellan = true;
g->allowPuff = false;
g->allowRose = false;
g->allowSmallLiteralSet = false;
g->roseMasks = false;
done = true;
}
if (!done && key != "help") {
printf("Invalid grey override key %s:%u\n", key.c_str(), value);
invalid_key_seen = true;
}
p = ve;
if (p != pe) {
++p;
}
}
if (invalid_key_seen) {
applyGreyOverrides(g, "help");
exit(1);
}
assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
}
} // namespace ue2
#endif

197
src/grey.h Normal file
View File

@@ -0,0 +1,197 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GREY_H
#define GREY_H
#include <vector>
#include <string>
#include "ue2common.h"
namespace ue2 {
struct Grey {
Grey(void);
bool optimiseComponentTree;
bool performGraphSimplification;
bool prefilterReductions;
bool removeEdgeRedundancy;
bool allowGough;
bool allowHaigLit;
bool allowLitHaig;
bool allowLbr;
bool allowMcClellan;
bool allowPuff;
bool allowRose;
bool allowExtendedNFA;
bool allowLimExNFA;
bool allowSidecar;
bool allowAnchoredAcyclic;
bool allowSmallLiteralSet;
bool allowCastle;
bool allowDecoratedLiteral;
bool allowNoodle;
bool fdrAllowTeddy;
bool puffImproveHead;
bool castleExclusive; // enable castle mutual exclusion analysis
bool mergeSEP;
bool mergeRose;
bool mergeSuffixes;
bool mergeOutfixes;
bool onlyOneOutfix; // if > 1 outfix, fail compile
bool allowShermanStates;
bool allowMcClellan8;
bool highlanderPruneDFA;
bool minimizeDFA;
bool accelerateDFA;
bool accelerateNFA;
bool reverseAccelerate;
bool squashNFA;
bool compressNFAState;
bool numberNFAStatesWrong;
bool highlanderSquash;
bool allowZombies;
bool floodAsPuffette;
u32 nfaForceSize;
u32 nfaForceShifts;
u32 maxHistoryAvailable;
u32 minHistoryAvailable;
u32 maxAnchoredRegion;
u32 minRoseLiteralLength;
u32 minRoseNetflowLiteralLength;
u32 maxRoseNetflowEdges;
u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */
bool goughCopyPropagate;
bool goughRegisterAllocate;
bool shortcutLiterals;
bool roseGraphReduction;
bool roseRoleAliasing;
bool roseMasks;
u32 roseMaxBadLeafLength;
bool roseConvertInfBadLeaves;
bool roseConvertFloodProneSuffixes;
bool roseMergeRosesDuringAliasing;
bool roseMultiTopRoses;
bool roseHamsterMasks;
bool roseLookaroundMasks;
u32 roseMcClellanPrefix; /* 0 = off, 1 = only if large nfa, 2 = always */
u32 roseMcClellanSuffix; /* 0 = off, 1 = only if very large nfa, 2 =
* always */
u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */
bool roseTransformDelay;
u32 roseDesiredSplit;
bool earlyMcClellanPrefix;
bool earlyMcClellanInfix;
bool earlyMcClellanSuffix;
bool allowCountingMiracles;
bool allowSomChain;
u32 somMaxRevNfaLength;
bool hamsterAccelForward;
bool hamsterAccelReverse; // currently not implemented
u32 miracleHistoryBonus; /* cheap hack to make miracles better, TODO
* something dignified */
bool equivalenceEnable;
// SmallWrite engine
bool allowSmallWrite;
u32 smallWriteLargestBuffer; // largest buffer that can be small write
u32 smallWriteLargestBufferBad;// largest buffer that can be small write
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
enum DumpFlags {
DUMP_NONE = 0,
DUMP_BASICS = 1 << 0, // Dump basic textual data
DUMP_PARSE = 1 << 1, // Dump component tree to .txt
DUMP_INT_GRAPH = 1 << 2, // Dump non-implementation graphs
DUMP_IMPL = 1 << 3 // Dump implementation graphs
};
u32 dumpFlags;
std::string dumpPath;
/* Resource limits. These are somewhat arbitrary, but are intended to bound
* the input to many of our internal structures. Exceeding one of these
* limits will cause an error to be returned to the user.
*
* NOTE: Raising these limitations make cause smoke to come out of parts of
* the runtime. */
u32 limitPatternCount; //!< max number of patterns
u32 limitPatternLength; //!< max number of characters in a regex
u32 limitGraphVertices; //!< max number of states in built NFA graph
u32 limitGraphEdges; //!< max number of edges in build NFA graph
u32 limitReportCount; //!< max number of ReportIDs allocated internally
// HWLM literal matcher limits.
u32 limitLiteralCount; //!< max number of literals in an HWLM table
u32 limitLiteralLength; //!< max number of characters in a literal
u32 limitLiteralMatcherChars; //!< max characters in an HWLM literal matcher
u32 limitLiteralMatcherSize; //!< max size of an HWLM matcher (in bytes)
// Rose limits.
u32 limitRoseRoleCount; //!< max number of Rose roles
u32 limitRoseEngineCount; //!< max prefix/infix/suffix/outfix engines
u32 limitRoseAnchoredSize; //!< max total size of anchored DFAs (bytes)
// Engine (DFA/NFA/etc) limits.
u32 limitEngineSize; //!< max size of an engine (in bytes)
u32 limitDFASize; //!< max size of a DFA (in bytes)
u32 limitNFASize; //!< max size of an NFA (in bytes)
u32 limitLBRSize; //!< max size of an LBR engine (in bytes)
};
#ifndef RELEASE_BUILD
#include <string>
void applyGreyOverrides(Grey *g, const std::string &overrides);
#endif
} // namespace ue2
#endif

419
src/hs.cpp Normal file
View File

@@ -0,0 +1,419 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Compiler front-end, including public API calls for compilation.
*/
#include "allocator.h"
#include "ue2common.h"
#include "grey.h"
#include "hs_compile.h"
#include "hs_internal.h"
#include "database.h"
#include "compiler/compiler.h"
#include "compiler/error.h"
#include "nfagraph/ng.h"
#include "nfagraph/ng_expr_info.h"
#include "parser/parse_error.h"
#include "parser/Parser.h"
#include "parser/prefilter.h"
#include "util/compile_error.h"
#include "util/cpuid_flags.h"
#include "util/depth.h"
#include "util/popcount.h"
#include "util/target_info.h"
#include <cassert>
#include <cstddef>
#include <cstring>
#include <limits.h>
#include <string>
#include <vector>
using namespace std;
using namespace ue2;
/** \brief Cheap check that no unexpected mode flags are on. */
static
bool validModeFlags(unsigned int mode) {
static const unsigned allModeFlags = HS_MODE_BLOCK
| HS_MODE_STREAM
| HS_MODE_VECTORED
| HS_MODE_SOM_HORIZON_LARGE
| HS_MODE_SOM_HORIZON_MEDIUM
| HS_MODE_SOM_HORIZON_SMALL;
return !(mode & ~allModeFlags);
}
/** \brief Validate mode flags. */
static
bool checkMode(unsigned int mode, hs_compile_error **comp_error) {
// First, check that only bits with meaning are on.
if (!validModeFlags(mode)) {
*comp_error = generateCompileError("Invalid parameter: "
"unrecognised mode flags.", -1);
return false;
}
// Our mode must be ONE of (block, streaming, vectored).
unsigned checkmode
= mode & (HS_MODE_STREAM | HS_MODE_BLOCK | HS_MODE_VECTORED);
if (popcount32(checkmode) != 1) {
*comp_error = generateCompileError(
"Invalid parameter: mode must have one "
"(and only one) of HS_MODE_BLOCK, HS_MODE_STREAM or "
"HS_MODE_VECTORED set.",
-1);
return false;
}
// If you specify SOM precision, you must be in streaming mode and you only
// get to have one.
unsigned somMode = mode & (HS_MODE_SOM_HORIZON_LARGE |
HS_MODE_SOM_HORIZON_MEDIUM |
HS_MODE_SOM_HORIZON_SMALL);
if (somMode) {
if (!(mode & HS_MODE_STREAM)) {
*comp_error = generateCompileError("Invalid parameter: the "
"HS_MODE_SOM_HORIZON_ mode flags may only be set in "
"streaming mode.", -1);
return false;
}
if ((somMode & (somMode - 1)) != 0) {
*comp_error = generateCompileError("Invalid parameter: only one "
"HS_MODE_SOM_HORIZON_ mode flag can be set.", -1);
return false;
}
}
return true;
}
static
bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
#define HS_TUNE_LAST HS_TUNE_FAMILY_BDW
#define HS_CPU_FEATURES_ALL (HS_CPU_FEATURES_AVX2)
if (!p) {
return true;
}
if (p->cpu_features & ~HS_CPU_FEATURES_ALL) {
*comp_error = generateCompileError("Invalid cpu features specified in "
"the platform information.", -1);
return false;
}
if (p->tune > HS_TUNE_LAST) {
*comp_error = generateCompileError("Invalid tuning value specified in "
"the platform information.", -1);
return false;
}
return true;
}
/** \brief Convert from SOM mode to bytes of precision. */
static
unsigned getSomPrecision(unsigned mode) {
if (mode & HS_MODE_VECTORED) {
/* always assume full precision for vectoring */
return 8;
}
if (mode & HS_MODE_SOM_HORIZON_LARGE) {
return 8;
} else if (mode & HS_MODE_SOM_HORIZON_MEDIUM) {
return 4;
} else if (mode & HS_MODE_SOM_HORIZON_SMALL) {
return 2;
}
return 0;
}
namespace ue2 {
hs_error_t
hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
const unsigned *ids, const hs_expr_ext *const *ext,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform, hs_database_t **db,
hs_compile_error_t **comp_error, const Grey &g) {
// Check the args: note that it's OK for flags, ids or ext to be null.
if (!comp_error) {
if (db) {
*db = nullptr;
}
// nowhere to write the string, but we can still report an error code
return HS_COMPILER_ERROR;
}
if (!db) {
*comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
return HS_COMPILER_ERROR;
}
if (!expressions) {
*db = nullptr;
*comp_error
= generateCompileError("Invalid parameter: expressions is NULL",
-1);
return HS_COMPILER_ERROR;
}
if (elements == 0) {
*db = nullptr;
*comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
return HS_COMPILER_ERROR;
}
if (!checkMode(mode, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkMode.
return HS_COMPILER_ERROR;
}
if (!checkPlatform(platform, comp_error)) {
*db = nullptr;
assert(*comp_error); // set by checkPlatform.
return HS_COMPILER_ERROR;
}
if (elements > g.limitPatternCount) {
*db = nullptr;
*comp_error = generateCompileError("Number of patterns too large", -1);
return HS_COMPILER_ERROR;
}
// This function is simply a wrapper around both the parser and compiler
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
bool isVectored = mode & HS_MODE_VECTORED;
unsigned somPrecision = getSomPrecision(mode);
target_t target_info = platform ? target_t(*platform)
: get_current_target();
CompileContext cc(isStreaming, isVectored, target_info, g);
NG ng(cc, somPrecision);
try {
for (unsigned int i = 0; i < elements; i++) {
// Add this expression to the compiler
try {
addExpression(ng, i, expressions[i], flags ? flags[i] : 0,
ext ? ext[i] : nullptr, ids ? ids[i] : 0);
} catch (CompileError &e) {
/* Caught a parse error:
* throw it upstream as a CompileError with a specific index */
e.setExpressionIndex(i);
throw; /* do not slice */
}
}
unsigned length = 0;
struct hs_database *out = build(ng, &length);
assert(out); // should have thrown exception on error
assert(length);
*db = out;
*comp_error = nullptr;
return HS_SUCCESS;
}
catch (const CompileError &e) {
// Compiler error occurred
*db = nullptr;
*comp_error = generateCompileError(e.reason,
e.hasIndex ? (int)e.index : -1);
return HS_COMPILER_ERROR;
}
catch (std::bad_alloc) {
*db = nullptr;
*comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
}
catch (...) {
assert(!"Internal error, unexpected exception");
*db = nullptr;
*comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
return HS_COMPILER_ERROR;
}
}
} // namespace ue2
extern "C" HS_PUBLIC_API
hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode,
const hs_platform_info_t *platform, hs_database_t **db,
hs_compile_error_t **error) {
if (expression == nullptr) {
*db = nullptr;
*error = generateCompileError("Invalid parameter: expression is NULL",
-1);
return HS_COMPILER_ERROR;
}
unsigned id = 0; // single expressions get zero as an ID
const hs_expr_ext * const *ext = nullptr; // unused for this call.
return hs_compile_multi_int(&expression, &flags, &id, ext, 1, mode,
platform, db, error, Grey());
}
extern "C" HS_PUBLIC_API
hs_error_t hs_compile_multi(const char * const *expressions,
const unsigned *flags, const unsigned *ids,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error) {
const hs_expr_ext * const *ext = nullptr; // unused for this call.
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
platform, db, error, Grey());
}
extern "C" HS_PUBLIC_API
hs_error_t hs_compile_ext_multi(const char * const *expressions,
const unsigned *flags, const unsigned *ids,
const hs_expr_ext * const *ext,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **error) {
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
platform, db, error, Grey());
}
static
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
unsigned int mode, hs_expr_info_t **info,
hs_compile_error_t **error) {
if (!error) {
// nowhere to write an error, but we can still return an error code.
return HS_COMPILER_ERROR;
}
if (!info) {
*error = generateCompileError("Invalid parameter: info is NULL", -1);
return HS_COMPILER_ERROR;
}
if (!expression) {
*error = generateCompileError("Invalid parameter: expression is NULL",
-1);
return HS_COMPILER_ERROR;
}
*info = nullptr;
*error = nullptr;
hs_expr_info local_info;
memset(&local_info, 0, sizeof(local_info));
try {
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
bool isVectored = mode & HS_MODE_VECTORED;
CompileContext cc(isStreaming, isVectored, get_current_target(),
Grey());
// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
throw ParseError("Pattern length exceeds limit.");
}
ReportManager rm(cc.grey);
ParsedExpression pe(0, expression, flags, 0);
assert(pe.component);
// Apply prefiltering transformations if desired.
if (pe.prefilter) {
prefilterTree(pe.component, ParseMode(flags));
}
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, pe);
if (!g) {
DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
throw ParseError("Internal error.");
}
fillExpressionInfo(rm, *g, &local_info);
}
catch (const CompileError &e) {
// Compiler error occurred
*error = generateCompileError(e);
return HS_COMPILER_ERROR;
}
catch (std::bad_alloc) {
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
}
catch (...) {
assert(!"Internal error, unexpected exception");
*error = const_cast<hs_compile_error_t *>(&hs_einternal);
return HS_COMPILER_ERROR;
}
hs_expr_info *rv = (hs_expr_info *)hs_misc_alloc(sizeof(*rv));
if (!rv) {
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
return HS_COMPILER_ERROR;
}
*rv = local_info;
*info = rv;
return HS_SUCCESS;
}
extern "C" HS_PUBLIC_API
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
hs_expr_info_t **info,
hs_compile_error_t **error) {
return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info,
error);
}
extern "C" HS_PUBLIC_API
hs_error_t hs_populate_platform(hs_platform_info_t *platform) {
if (!platform) {
return HS_INVALID;
}
memset(platform, 0, sizeof(*platform));
platform->cpu_features = cpuid_flags();
platform->tune = cpuid_tune();
return HS_SUCCESS;
}
extern "C" HS_PUBLIC_API
hs_error_t hs_free_compile_error(hs_compile_error_t *error) {
freeCompileError(error);
return HS_SUCCESS;
}

45
src/hs.h Normal file
View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_H_
#define HS_H_
/**
* @file
* @brief The complete Hyperscan API definition.
*
* Hyperscan is a high speed regular expression engine.
*
* This header includes both the Hyperscan compiler and runtime components. See
* the individual component headers for documentation.
*/
#include "hs_compile.h"
#include "hs_runtime.h"
#endif /* HS_H_ */

509
src/hs_common.h Normal file
View File

@@ -0,0 +1,509 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_COMMON_H_
#define HS_COMMON_H_
#include <stdlib.h>
/**
* @file
* @brief The Hyperscan common API definition.
*
* Hyperscan is a high speed regular expression engine.
*
* This header contains functions available to both the Hyperscan compiler and
* runtime.
*/
#ifdef __cplusplus
extern "C"
{
#endif
struct hs_database;
/**
* A Hyperscan pattern database.
*
* Generated by one of the Hyperscan compiler functions:
* - @ref hs_compile()
* - @ref hs_compile_multi()
* - @ref hs_compile_ext_multi()
*/
typedef struct hs_database hs_database_t;
/**
* A type for errors returned by Hyperscan functions.
*/
typedef int hs_error_t;
/**
* Free a compiled pattern database.
*
* The free callback set by @ref hs_set_database_allocator() (or @ref
* hs_set_allocator()) will be used by this function.
*
* @param db
* A compiled pattern database. NULL may also be safely provided, in which
* case the function does nothing.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_free_database(hs_database_t *db);
/**
* Serialize a pattern database to a stream of bytes.
*
* The allocator callback set by @ref hs_set_misc_allocator() (or @ref
* hs_set_allocator()) will be used by this function.
*
* @param db
* A compiled pattern database.
*
* @param bytes
* On success, a pointer to an array of bytes will be returned here.
* These bytes can be subsequently relocated or written to disk. The
* caller is responsible for freeing this block.
*
* @param length
* On success, the number of bytes in the generated byte array will be
* returned here.
*
* @return
* @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be
* allocated, other values may be returned if errors are detected.
*/
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
size_t *length);
/**
* Reconstruct a pattern database from a stream of bytes previously generated
* by @ref hs_serialize_database().
*
* This function will allocate sufficient space for the database using the
* allocator set with @ref hs_set_database_allocator() (or @ref
* hs_set_allocator()); to use a pre-allocated region of memory, use the @ref
* hs_deserialize_database_at() function.
*
* @param bytes
* A byte array generated by @ref hs_serialize_database() representing a
* compiled pattern database.
*
* @param length
* The length of the byte array generated by @ref hs_serialize_database().
* This should be the same value as that returned by @ref
* hs_serialize_database().
*
* @param db
* On success, a pointer to a newly allocated @ref hs_database_t will be
* returned here. This database can then be used for scanning, and
* eventually freed by the caller using @ref hs_free_database().
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
hs_database_t **db);
/**
* Reconstruct a pattern database from a stream of bytes previously generated
* by @ref hs_serialize_database() at a given memory location.
*
* This function (unlike @ref hs_deserialize_database()) will write the
* reconstructed database to the memory location given in the @a db parameter.
* The amount of space required at this location can be determined with the
* @ref hs_serialized_database_size() function.
*
* @param bytes
* A byte array generated by @ref hs_serialize_database() representing a
* compiled pattern database.
*
* @param length
* The length of the byte array generated by @ref hs_serialize_database().
* This should be the same value as that returned by @ref
* hs_serialize_database().
*
* @param db
* Pointer to an 8-byte aligned block of memory of sufficient size to hold
* the deserialized database. On success, the reconstructed database will
* be written to this location. This database can then be used for pattern
* matching. The user is responsible for freeing this memory; the @ref
* hs_free_database() call should not be used.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
hs_database_t *db);
/**
* Provides the size of the stream state allocated by a single stream opened
* against the given database.
*
* @param database
* Pointer to a compiled (streaming mode) pattern database.
*
* @param stream_size
* On success, the size in bytes of an individual stream opened against the
* given database is placed in this parameter.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size);
/**
* Provides the size of the given database in bytes.
*
* @param database
* Pointer to compiled pattern database.
*
* @param database_size
* On success, the size of the compiled database in bytes is placed in this
* parameter.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_database_size(const hs_database_t *database,
size_t *database_size);
/**
* Utility function for reporting the size that would be required by a
* database if it were deserialized.
*
* This can be used to allocate a shared memory region or other "special"
* allocation prior to deserializing with the @ref hs_deserialize_database_at()
* function.
*
* @param bytes
* Pointer to a byte array generated by @ref hs_serialize_database()
* representing a compiled pattern database.
*
* @param length
* The length of the byte array generated by @ref hs_serialize_database().
* This should be the same value as that returned by @ref
* hs_serialize_database().
*
* @param deserialized_size
* On success, the size of the compiled database that would be generated
* by @ref hs_deserialize_database_at() is returned here.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
size_t *deserialized_size);
/**
* Utility function providing information about a database.
*
* @param database
* Pointer to a compiled database.
*
* @param info
* On success, a string containing the version and platform information for
* the supplied database is placed in the parameter. The string is
* allocated using the allocator supplied in @ref hs_set_misc_allocator()
* (or malloc() if no allocator was set) and should be freed by the caller.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_database_info(const hs_database_t *database, char **info);
/**
* Utility function providing information about a serialized database.
*
* @param bytes
* Pointer to a serialized database.
*
* @param length
* Length in bytes of the serialized database.
*
* @param info
* On success, a string containing the version and platform information
* for the supplied serialized database is placed in the parameter. The
* string is allocated using the allocator supplied in @ref
* hs_set_misc_allocator() (or malloc() if no allocator was set) and
* should be freed by the caller.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
char **info);
/**
* The type of the callback function that will be used by Hyperscan to allocate
* more memory at runtime as required, for example in @ref hs_open_stream() to
* allocate stream state.
*
* If Hyperscan is to be used in a multi-threaded, or similarly concurrent
* environment, the allocation function will need to be re-entrant, or
* similarly safe for concurrent use.
*
* @param size
* The number of bytes to allocate.
* @return
* A pointer to the region of memory allocated, or NULL on error.
*/
typedef void *(*hs_alloc_t)(size_t size);
/**
* The type of the callback function that will be used by Hyperscan to free
* memory regions previously allocated using the @ref hs_alloc_t function.
*
* @param ptr
* The region of memory to be freed.
*/
typedef void (*hs_free_t)(void *ptr);
/**
* Set the allocate and free functions used by Hyperscan for allocating
* memory at runtime for stream state, scratch space, database bytecode,
* and various other data structure returned by the Hyperscan API.
*
* The function is equivalent to calling @ref hs_set_stream_allocator(),
* @ref hs_set_scratch_allocator(), @ref hs_set_database_allocator() and
* @ref hs_set_misc_allocator() with the provided parameters.
*
* This call will override any previous allocators that have been set.
*
* Note: there is no way to change the allocator used for temporary objects
* created during the various compile calls (@ref hs_compile(), @ref
* hs_compile_multi(), @ref hs_compile_ext_multi()).
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
/**
* Set the allocate and free functions used by Hyperscan for allocating memory
* for database bytecode produced by the compile calls (@ref hs_compile(), @ref
* hs_compile_multi(), @ref hs_compile_ext_multi()) and by database
* deserialization (@ref hs_deserialize_database()).
*
* If no database allocation functions are set, or if NULL is used in place of
* both parameters, then memory allocation will default to standard methods
* (such as the system malloc() and free() calls).
*
* This call will override any previous database allocators that have been set.
*
* Note: the database allocator may also be set by calling @ref
* hs_set_allocator().
*
* Note: there is no way to change how temporary objects created during the
* various compile calls (@ref hs_compile(), @ref hs_compile_multi(), @ref
* hs_compile_ext_multi()) are allocated.
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func,
hs_free_t free_func);
/**
* Set the allocate and free functions used by Hyperscan for allocating memory
* for items returned by the Hyperscan API such as @ref hs_compile_error_t, @ref
* hs_expr_info_t and serialized databases.
*
* If no misc allocation functions are set, or if NULL is used in place of both
* parameters, then memory allocation will default to standard methods (such as
* the system malloc() and free() calls).
*
* This call will override any previous misc allocators that have been set.
*
* Note: the misc allocator may also be set by calling @ref hs_set_allocator().
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
/**
* Set the allocate and free functions used by Hyperscan for allocating memory
* for scratch space by @ref hs_alloc_scratch() and @ref hs_clone_scratch().
*
* If no scratch allocation functions are set, or if NULL is used in place of
* both parameters, then memory allocation will default to standard methods
* (such as the system malloc() and free() calls).
*
* This call will override any previous scratch allocators that have been set.
*
* Note: the scratch allocator may also be set by calling @ref
* hs_set_allocator().
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
/**
* Set the allocate and free functions used by Hyperscan for allocating memory
* for stream state by @ref hs_open_stream().
*
* If no stream allocation functions are set, or if NULL is used in place of
* both parameters, then memory allocation will default to standard methods
* (such as the system malloc() and free() calls).
*
* This call will override any previous stream allocators that have been set.
*
* Note: the stream allocator may also be set by calling @ref
* hs_set_allocator().
*
* @param alloc_func
* A callback function pointer that allocates memory. This function must
* return memory suitably aligned for the largest representable data type
* on this platform.
*
* @param free_func
* A callback function pointer that frees allocated memory.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
/**
* Utility function for identifying this release version.
*
* @return
* A string containing the version number of this release build and the
* date of the build. It is allocated statically, so it does not need to
* be freed by the caller.
*/
const char *hs_version(void);
/**
* @defgroup HS_ERROR hs_error_t values
*
* @{
*/
/**
* The engine completed normally.
*/
#define HS_SUCCESS 0
/**
* A parameter passed to this function was invalid.
*/
#define HS_INVALID (-1)
/**
* A memory allocation failed.
*/
#define HS_NOMEM (-2)
/**
* The engine was terminated by callback.
*
* This return value indicates that the target buffer was partially scanned,
* but that the callback function requested that scanning cease after a match
* was located.
*/
#define HS_SCAN_TERMINATED (-3)
/**
* The pattern compiler failed, and the @ref hs_compile_error_t should be
* inspected for more detail.
*/
#define HS_COMPILER_ERROR (-4)
/**
* The given database was built for a different version of Hyperscan.
*/
#define HS_DB_VERSION_ERROR (-5)
/**
* The given database was built for a different platform (i.e., CPU type).
*/
#define HS_DB_PLATFORM_ERROR (-6)
/**
* The given database was built for a different mode of operation. This error
* is returned when streaming calls are used with a block or vectored database
* and vice versa.
*/
#define HS_DB_MODE_ERROR (-7)
/**
* A parameter passed to this function was not correctly aligned.
*/
#define HS_BAD_ALIGN (-8)
/**
* The memory allocator (either malloc() or the allocator set with @ref
* hs_set_allocator()) did not correctly return memory suitably aligned for the
* largest representable data type on this platform.
*/
#define HS_BAD_ALLOC (-9)
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HS_COMMON_H_ */

848
src/hs_compile.h Normal file
View File

@@ -0,0 +1,848 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_COMPILE_H_
#define HS_COMPILE_H_
/**
* @file
* @brief The Hyperscan compiler API definition.
*
* Hyperscan is a high speed regular expression engine.
*
* This header contains functions for compiling regular expressions into
* Hyperscan databases that can be used by the Hyperscan runtime.
*/
#include "hs_common.h"
#ifdef __cplusplus
extern "C"
{
#endif
/**
* A type containing error details that is returned by the compile calls (@ref
* hs_compile(), @ref hs_compile_multi() and @ref hs_compile_ext_multi()) on
* failure. The caller may inspect the values returned in this type to
* determine the cause of failure.
*
* Common errors generated during the compile process include:
*
* - *Invalid parameter*
*
* An invalid argument was specified in the compile call.
*
* - *Unrecognised flag*
*
* An unrecognised value was passed in the flags argument.
*
* - *Pattern matches empty buffer*
*
* By default, Hyperscan only supports patterns that will *always*
* consume at least one byte of input. Patterns that do not have this
* property (such as `/(abc)?/`) will produce this error unless
* the @ref HS_FLAG_ALLOWEMPTY flag is supplied. Note that such
* patterns will produce a match for *every* byte when scanned.
*
* - *Embedded anchors not supported*
*
* Hyperscan only supports the use of anchor meta-characters (such as
* `^` and `$`) in patterns where they could *only* match
* at the start or end of a buffer. A pattern containing an embedded
* anchor, such as `/abc^def/`, can never match, as there is no
* way for `abc` to precede the start of the data stream.
*
* - *Bounded repeat is too large*
*
* The pattern contains a repeated construct with very large finite
* bounds.
*
* - *Unsupported component type*
*
* An unsupported PCRE construct was used in the pattern.
*
* - *Unable to generate bytecode*
*
* This error indicates that Hyperscan was unable to compile a pattern
* that is syntactically valid. The most common cause is a pattern that is
* very long and complex or contains a large repeated subpattern.
*
* - *Unable to allocate memory*
*
* The library was unable to allocate temporary storage used during
* compilation time.
*
* - *Internal error*
*
* An unexpected error occurred: if this error is reported, please contact
* the Hyperscan team with a description of the situation.
*/
typedef struct hs_compile_error {
/**
* A human-readable error message describing the error.
*/
char *message;
/**
* The zero-based number of the expression that caused the error (if this
* can be determined). If the error is not specific to an expression, then
* this value will be less than zero.
*/
int expression;
} hs_compile_error_t;
/**
* A type containing information on the target platform which may optionally be
* provided to the compile calls (@ref hs_compile(), @ref hs_compile_multi(),
* @ref hs_compile_ext_multi()).
*
* A hs_platform_info structure may be populated for the current platform by
* using the @ref hs_populate_platform() call.
*/
typedef struct hs_platform_info {
/**
* Information about the target platform which may be used to guide the
* optimisation process of the compile.
*
* Use of this field does not limit the processors that the resulting
* database can run on, but may impact the performance of the resulting
* database.
*/
unsigned int tune;
/**
* Relevant CPU features available on the target platform
*
* This value may be produced by combining HS_CPU_FEATURE_* flags (such as
* @ref HS_CPU_FEATURES_AVX2). Multiple CPU features may be or'ed together
* to produce the value.
*/
unsigned long long cpu_features;
/**
* Reserved for future use.
*/
unsigned long long reserved1;
/**
* Reserved for future use.
*/
unsigned long long reserved2;
} hs_platform_info_t;
/**
* A type containing information related to an expression that is returned by
* @ref hs_expression_info().
*/
typedef struct hs_expr_info {
/**
* The minimum length in bytes of a match for the pattern.
*/
unsigned int min_width;
/**
* The maximum length in bytes of a match for the pattern. If the pattern
* has an unbounded maximum width, this will be set to the maximum value of
* an unsigned int (UINT_MAX).
*/
unsigned int max_width;
/**
* Whether this expression can produce matches that are not returned in
* order, such as those produced by assertions. Zero if false, non-zero if
* true.
*/
char unordered_matches;
/**
* Whether this expression can produce matches at end of data (EOD). In
* streaming mode, EOD matches are raised during @ref hs_close_stream(),
* since it is only when @ref hs_close_stream() is called that the EOD
* location is known. Zero if false, non-zero if true.
*
* Note: trailing `\b` word boundary assertions may also result in EOD
* matches as end-of-data can act as a word boundary.
*/
char matches_at_eod;
/**
* Whether this expression can *only* produce matches at end of data (EOD).
* In streaming mode, all matches for this expression are raised during
* @ref hs_close_stream(). Zero if false, non-zero if true.
*/
char matches_only_at_eod;
} hs_expr_info_t;
/**
* A structure containing additional parameters related to an expression,
* passed in at build time to @ref hs_compile_ext_multi().
*
* These parameters allow the set of matches produced by a pattern to be
* constrained at compile time, rather than relying on the application to
* process unwanted matches at runtime.
*/
typedef struct hs_expr_ext {
/**
* Flags governing which parts of this structure are to be used by the
* compiler. See @ref HS_EXT_FLAG.
*/
unsigned long long flags;
/**
* The minimum end offset in the data stream at which this expression
* should match successfully. To use this parameter, set the
* @ref HS_EXT_FLAG_MIN_OFFSET flag in the hs_expr_ext::flags field.
*/
unsigned long long min_offset;
/**
* The maximum end offset in the data stream at which this expression
* should match successfully. To use this parameter, set the
* @ref HS_EXT_FLAG_MAX_OFFSET flag in the hs_expr_ext::flags field.
*/
unsigned long long max_offset;
/**
* The minimum match length (from start to end) required to successfully
* match this expression. To use this parameter, set the
* @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field.
*/
unsigned long long min_length;
} hs_expr_ext_t;
/**
* @defgroup HS_EXT_FLAG hs_expr_ext_t flags
*
* These flags are used in @ref hs_expr_ext_t::flags to indicate which fields
* are used.
*
* @{
*/
/** Flag indicating that the hs_expr_ext::min_offset field is used. */
#define HS_EXT_FLAG_MIN_OFFSET 1ULL
/** Flag indicating that the hs_expr_ext::max_offset field is used. */
#define HS_EXT_FLAG_MAX_OFFSET 2ULL
/** Flag indicating that the hs_expr_ext::min_length field is used. */
#define HS_EXT_FLAG_MIN_LENGTH 4ULL
/** @} */
/**
* The basic regular expression compiler.
*
* This is the function call with which an expression is compiled into a
* Hyperscan database which can be passed to the runtime functions (such as
* @ref hs_scan(), @ref hs_open_stream(), etc.)
*
* @param expression
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @a flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
* flags.
*
* @param flags
* Flags which modify the behaviour of the expression. Multiple flags may
* be used by ORing them together. Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
* expression per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t hs_compile(const char *expression, unsigned int flags,
unsigned int mode, const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
/**
* The multiple regular expression compiler.
*
* This is the function call with which a set of expressions is compiled into a
* database which can be passed to the runtime functions (such as @ref
* hs_scan(), @ref hs_open_stream(), etc.) Each expression can be labelled with
* a unique integer which is passed into the match callback to identify the
* pattern that has matched.
*
* @param expressions
* Array of NULL-terminated expressions to compile. Note that (as for @ref
* hs_compile()) these strings must contain only the pattern to be
* matched, with no delimiters or flags. For example, the expression
* `/abc?def/i` should be compiled by providing `abc?def` as the first
* string in the @a expressions array, and @ref HS_FLAG_CASELESS as the
* first value in the @a flags array.
*
* @param flags
* Array of flags which modify the behaviour of each expression. Multiple
* flags may be used by ORing them together. Specifying the NULL pointer
* in place of an array will set the flags value for all patterns to zero.
* Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
* with this match id per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
* corresponding pattern in the expressions array. Specifying the NULL
* pointer in place of an array will set the ID value for all patterns to
* zero.
*
* @param elements
* The number of elements in the input arrays.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the @a error
* parameter.
*
*/
hs_error_t hs_compile_multi(const char *const *expressions,
const unsigned int *flags, const unsigned int *ids,
unsigned int elements, unsigned int mode,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
/**
* The multiple regular expression compiler with extended pattern support.
*
* This function call compiles a group of expressions into a database in the
* same way as @ref hs_compile_multi(), but allows additional parameters to be
* specified via an @ref hs_expr_ext_t structure per expression.
*
* @param expressions
* Array of NULL-terminated expressions to compile. Note that (as for @ref
* hs_compile()) these strings must contain only the pattern to be
* matched, with no delimiters or flags. For example, the expression
* `/abc?def/i` should be compiled by providing `abc?def` as the first
* string in the @a expressions array, and @ref HS_FLAG_CASELESS as the
* first value in the @a flags array.
*
* @param flags
* Array of flags which modify the behaviour of each expression. Multiple
* flags may be used by ORing them together. Specifying the NULL pointer
* in place of an array will set the flags value for all patterns to zero.
* Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
* with this match id per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param ids
* An array of integers specifying the ID number to be associated with the
* corresponding pattern in the expressions array. Specifying the NULL
* pointer in place of an array will set the ID value for all patterns to
* zero.
*
* @param ext
* An array of pointers to filled @ref hs_expr_ext_t structures that
* define extended behaviour for each pattern. NULL may be specified if no
* extended behaviour is needed for an individual pattern, or in place of
* the whole array if it is not needed for any expressions. Memory used by
* these structures must be both allocated and freed by the caller.
*
* @param elements
* The number of elements in the input arrays.
*
* @param mode
* Compiler mode flags that affect the database as a whole. One of @ref
* HS_MODE_STREAM, @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
* supplied, to select between the generation of a streaming, block or
* vectored database. In addition, other flags (beginning with HS_MODE_)
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
* more details.
*
* @param platform
* If not NULL, the platform structure is used to determine the target
* platform for the database. If NULL, a database suitable for running
* on the current host platform is produced.
*
* @param db
* On success, a pointer to the generated database will be returned in
* this parameter, or NULL on failure. The caller is responsible for
* deallocating the buffer using the @ref hs_free_database() function.
*
* @param error
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the @a error
* parameter.
*
*/
hs_error_t hs_compile_ext_multi(const char *const *expressions,
const unsigned int *flags,
const unsigned int *ids,
const hs_expr_ext_t *const *ext,
unsigned int elements, unsigned int mode,
const hs_platform_info_t *platform,
hs_database_t **db, hs_compile_error_t **error);
/**
* Free an error structure generated by @ref hs_compile(), @ref
* hs_compile_multi() or @ref hs_compile_ext_multi().
*
* @param error
* The @ref hs_compile_error_t to be freed. NULL may also be safely
* provided.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_free_compile_error(hs_compile_error_t *error);
/**
* Utility function providing information about a regular expression. The
* information provided in @ref hs_expr_info_t includes the minimum and maximum
* width of a pattern match.
*
* @param expression
* The NULL-terminated expression to parse. Note that this string must
* represent ONLY the pattern to be matched, with no delimiters or flags;
* any global flags should be specified with the @a flags argument. For
* example, the expression `/abc?def/i` should be compiled by providing
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
* flags.
*
* @param flags
* Flags which modify the behaviour of the expression. Multiple flags may
* be used by ORing them together. Valid values are:
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
* expression per stream.
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
* empty string, such as `.*`.
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
* - HS_FLAG_UCP - Use Unicode properties for character classes.
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
*
* @param info
* On success, a pointer to the pattern information will be returned in
* this parameter, or NULL on failure. This structure is allocated using
* the allocator supplied in @ref hs_set_allocator() (or malloc() if no
* allocator was set) and should be freed by the caller.
*
* @param error
* If the call fails, a pointer to a @ref hs_compile_error_t will be
* returned, providing details of the error condition. The caller is
* responsible for deallocating the buffer using the @ref
* hs_free_compile_error() function.
*
* @return
* @ref HS_SUCCESS is returned on successful compilation; @ref
* HS_COMPILER_ERROR on failure, with details provided in the error
* parameter.
*/
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
hs_expr_info_t **info,
hs_compile_error_t **error);
/**
* Populates the platform information based on the current host.
*
* @param platform
* On success, the pointed to structure is populated based on the current
* host.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_populate_platform(hs_platform_info_t *platform);
/**
* @defgroup HS_PATTERN_FLAG Pattern flags
*
* @{
*/
/**
* Compile flag: Set case-insensitive matching.
*
* This flag sets the expression to be matched case-insensitively by default.
* The expression may still use PCRE tokens (notably `(?i)` and
* `(?-i)`) to switch case-insensitive matching on and off.
*/
#define HS_FLAG_CASELESS 1
/**
* Compile flag: Matching a `.` will not exclude newlines.
*
* This flag sets any instances of the `.` token to match newline characters as
* well as all other characters. The PCRE specification states that the `.`
* token does not match newline characters by default, so without this flag the
* `.` token will not cross line boundaries.
*/
#define HS_FLAG_DOTALL 2
/**
* Compile flag: Set multi-line anchoring.
*
* This flag instructs the expression to make the `^` and `$` tokens match
* newline characters as well as the start and end of the stream. If this flag
* is not specified, the `^` token will only ever match at the start of a
* stream, and the `$` token will only ever match at the end of a stream within
* the guidelines of the PCRE specification.
*/
#define HS_FLAG_MULTILINE 4
/**
* Compile flag: Set single-match only mode.
*
* This flag sets the expression's match ID to match at most once. In streaming
* mode, this means that the expression will return only a single match over
* the lifetime of the stream, rather than reporting every match as per
* standard Hyperscan semantics. In block mode or vectored mode, only the first
* match for each invocation of @ref hs_scan() or @ref hs_scan_vector() will be
* returned.
*
* If multiple expressions in the database share the same match ID, then they
* either must all specify @ref HS_FLAG_SINGLEMATCH or none of them specify
* @ref HS_FLAG_SINGLEMATCH. If a group of expressions sharing a match ID
* specify the flag, then at most one match with the match ID will be generated
* per stream.
*
* Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
* is not currently supported.
*/
#define HS_FLAG_SINGLEMATCH 8
/**
* Compile flag: Allow expressions that can match against empty buffers.
*
* This flag instructs the compiler to allow expressions that can match against
* empty buffers, such as `.?`, `.*`, `(a|)`. Since Hyperscan can return every
* possible match for an expression, such expressions generally execute very
* slowly; the default behaviour is to return an error when an attempt to
* compile one is made. Using this flag will force the compiler to allow such
* an expression.
*/
#define HS_FLAG_ALLOWEMPTY 16
/**
* Compile flag: Enable UTF-8 mode for this expression.
*
* This flag instructs Hyperscan to treat the pattern as a sequence of UTF-8
* characters. The results of scanning invalid UTF-8 sequences with a Hyperscan
* library that has been compiled with one or more patterns using this flag are
* undefined.
*/
#define HS_FLAG_UTF8 32
/**
* Compile flag: Enable Unicode property support for this expression.
*
* This flag instructs Hyperscan to use Unicode properties, rather than the
* default ASCII interpretations, for character mnemonics like `\w` and `\s` as
* well as the POSIX character classes. It is only meaningful in conjunction
* with @ref HS_FLAG_UTF8.
*/
#define HS_FLAG_UCP 64
/**
* Compile flag: Enable prefiltering mode for this expression.
*
* This flag instructs Hyperscan to compile an "approximate" version of this
* pattern for use in a prefiltering application, even if Hyperscan does not
* support the pattern in normal operation.
*
* The set of matches returned when this flag is used is guaranteed to be a
* superset of the matches specified by the non-prefiltering expression.
*
* If the pattern contains pattern constructs not supported by Hyperscan (such
* as zero-width assertions, back-references or conditional references) these
* constructs will be replaced internally with broader constructs that may
* match more often.
*
* Furthermore, in prefiltering mode Hyperscan may simplify a pattern that
* would otherwise return a "Pattern too large" error at compile time, or for
* performance reasons (subject to the matching guarantee above).
*
* It is generally expected that the application will subsequently confirm
* prefilter matches with another regular expression matcher that can provide
* exact matches for the pattern.
*
* Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
* is not currently supported.
*/
#define HS_FLAG_PREFILTER 128
/**
* Compile flag: Enable leftmost start of match reporting.
*
* This flag instructs Hyperscan to report the leftmost possible start of match
* offset when a match is reported for this expression. (By default, no start
* of match is returned.)
*
* Enabling this behaviour may reduce performance and increase stream state
* requirements in streaming mode.
*/
#define HS_FLAG_SOM_LEFTMOST 256
/** @} */
/**
* @defgroup HS_CPU_FEATURES_FLAG CPU feature support flags
*
* @{
*/
/**
* CPU features flag - Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2)
*
* Setting this flag indicates that the target platform supports AVX2
* instructions.
*/
#define HS_CPU_FEATURES_AVX2 (1ULL << 2)
/** @} */
/**
* @defgroup HS_TUNE_FLAG Tuning flags
*
* @{
*/
/**
* Tuning Parameter - Generic
*
* This indicates that the compiled database should not be tuned for any
* particular target platform.
*/
#define HS_TUNE_FAMILY_GENERIC 0
/**
* Tuning Parameter - Intel(R) microarchitecture code name Sandy Bridge
*
* This indicates that the compiled database should be tuned for the
* Sandy Bridge microarchitecture.
*/
#define HS_TUNE_FAMILY_SNB 1
/**
* Tuning Parameter - Intel(R) microarchitecture code name Ivy Bridge
*
* This indicates that the compiled database should be tuned for the
* Ivy Bridge microarchitecture.
*/
#define HS_TUNE_FAMILY_IVB 2
/**
* Tuning Parameter - Intel(R) microarchitecture code name Haswell
*
* This indicates that the compiled database should be tuned for the
* Haswell microarchitecture.
*/
#define HS_TUNE_FAMILY_HSW 3
/**
* Tuning Parameter - Intel(R) microarchitecture code name Silvermont
*
* This indicates that the compiled database should be tuned for the
* Silvermont microarchitecture.
*/
#define HS_TUNE_FAMILY_SLM 4
/**
* Tuning Parameter - Intel(R) microarchitecture code name Broadwell
*
* This indicates that the compiled database should be tuned for the
* Broadwell microarchitecture.
*/
#define HS_TUNE_FAMILY_BDW 5
/** @} */
/**
* @defgroup HS_MODE_FLAG Compile mode flags
*
* The mode flags are used as values for the mode parameter of the various
* compile calls (@ref hs_compile(), @ref hs_compile_multi() and @ref
* hs_compile_ext_multi()).
*
* A mode value can be built by ORing these flag values together; the only
* required flag is one of @ref HS_MODE_BLOCK, @ref HS_MODE_STREAM or @ref
* HS_MODE_VECTORED. Other flags may be added to enable support for additional
* features.
*
* @{
*/
/**
* Compiler mode flag: Block scan (non-streaming) database.
*/
#define HS_MODE_BLOCK 1
/**
* Compiler mode flag: Alias for @ref HS_MODE_BLOCK.
*/
#define HS_MODE_NOSTREAM 1
/**
* Compiler mode flag: Streaming database.
*/
#define HS_MODE_STREAM 2
/**
* Compiler mode flag: Vectored scanning database.
*/
#define HS_MODE_VECTORED 4
/**
* Compiler mode flag: use full precision to track start of match offsets in
* stream state.
*
* This mode will use the most stream state per pattern, but will always return
* an accurate start of match offset regardless of how far back in the past it
* was found.
*
* One of the SOM_HORIZON modes must be selected to use the @ref
* HS_FLAG_SOM_LEFTMOST expression flag.
*/
#define HS_MODE_SOM_HORIZON_LARGE (1U << 24)
/**
* Compiler mode flag: use medium precision to track start of match offsets in
* stream state.
*
* This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
* will limit start of match accuracy to offsets within 2^32 bytes of the
* end of match offset reported.
*
* One of the SOM_HORIZON modes must be selected to use the @ref
* HS_FLAG_SOM_LEFTMOST expression flag.
*/
#define HS_MODE_SOM_HORIZON_MEDIUM (1U << 25)
/**
* Compiler mode flag: use limited precision to track start of match offsets in
* stream state.
*
* This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
* will limit start of match accuracy to offsets within 2^16 bytes of the
* end of match offset reported.
*
* One of the SOM_HORIZON modes must be selected to use the @ref
* HS_FLAG_SOM_LEFTMOST expression flag.
*/
#define HS_MODE_SOM_HORIZON_SMALL (1U << 26)
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HS_COMPILE_H_ */

78
src/hs_internal.h Normal file
View File

@@ -0,0 +1,78 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Internal-use only definitions. Available to internal tools.
*/
#ifndef HS_INTERNAL_H
#define HS_INTERNAL_H
#include "ue2common.h"
#include "hs.h"
#ifdef __cplusplus
namespace ue2 {
struct Grey;
/** \brief Internal use only: takes a Grey argument so that we can use it in
* tools. */
hs_error_t hs_compile_multi_int(const char *const *expressions,
const unsigned *flags, const unsigned *ids,
const hs_expr_ext *const *ext,
unsigned elements, unsigned mode,
const hs_platform_info_t *platform,
hs_database_t **db,
hs_compile_error_t **comp_error, const Grey &g);
} // namespace ue2
extern "C"
{
#endif
#define HS_MATCH_FLAG_ADJUSTED 1U
/** \brief Bitmask of all valid Hyperscan flags. */
#define HS_FLAG_ALL ( HS_FLAG_CASELESS \
| HS_FLAG_DOTALL \
| HS_FLAG_MULTILINE \
| HS_FLAG_UTF8 \
| HS_FLAG_UCP \
| HS_FLAG_PREFILTER \
| HS_FLAG_SINGLEMATCH \
| HS_FLAG_ALLOWEMPTY \
| HS_FLAG_SOM_LEFTMOST)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

493
src/hs_runtime.h Normal file
View File

@@ -0,0 +1,493 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_RUNTIME_H_
#define HS_RUNTIME_H_
#include <stdlib.h>
/**
* @file
* @brief The Hyperscan runtime API definition.
*
* Hyperscan is a high speed regular expression engine.
*
* This header contains functions for using compiled Hyperscan databases for
* scanning data at runtime.
*/
#include "hs_common.h"
#ifdef __cplusplus
extern "C"
{
#endif
/**
* Definition of the stream identifier type.
*/
struct hs_stream;
/**
* The stream identifier returned by @ref hs_open_stream().
*/
typedef struct hs_stream hs_stream_t;
struct hs_scratch;
/**
* A Hyperscan scratch space.
*/
typedef struct hs_scratch hs_scratch_t;
/**
* Definition of the match event callback function type.
*
* A callback function matching the defined type must be provided by the
* application calling the @ref hs_scan(), @ref hs_scan_vector() or @ref
* hs_scan_stream() functions (or other streaming calls which can produce
* matches).
*
* This callback function will be invoked whenever a match is located in the
* target data during the execution of a scan. The details of the match are
* passed in as parameters to the callback function, and the callback function
* should return a value indicating whether or not matching should continue on
* the target data. If no callbacks are desired from a scan call, NULL may be
* provided in order to suppress match production.
*
* This callback function should not attempt to call Hyperscan API functions on
* the same stream nor should it attempt to reuse the scratch space allocated
* for the API calls that caused it to be triggered. Making another call to the
* Hyperscan library with completely independent parameters should work (for
* example, scanning a different database in a new stream and with new scratch
* space), but reusing data structures like stream state and/or scratch space
* will produce undefined behavior.
*
* @param id
* The ID number of the expression that matched. If the expression was a
* single expression compiled with @ref hs_compile(), this value will be
* zero.
*
* @param from
* - If a start of match flag is enabled for the current pattern, this
* argument will be set to the start of match for the pattern assuming
* that that start of match value lies within the current 'start of match
* horizon' chosen by one of the SOM_HORIZON mode flags.
* - If the start of match value lies outside this horizon (possible only
* when the SOM_HORIZON value is not @ref HS_MODE_SOM_HORIZON_LARGE),
* the @a from value will be set to @ref HS_OFFSET_PAST_HORIZON.
* - This argument will be set to zero if the Start of Match flag is not
* enabled for the given pattern.
*
* @param to
* The offset after the last byte that matches the expression.
*
* @param flags
* This is provided for future use and is unused at present.
*
* @param context
* The pointer supplied by the user to the @ref hs_scan(), @ref
* hs_scan_vector() or @ref hs_scan_stream() function.
*
* @return
* Non-zero if the matching should cease, else zero. If scanning is
* performed in streaming mode and a non-zero value is returned, any
* subsequent calls to @ref hs_scan_stream() for that stream will
* immediately return with @ref HS_SCAN_TERMINATED.
*/
typedef int (*match_event_handler)(unsigned int id,
unsigned long long from,
unsigned long long to,
unsigned int flags,
void *context);
/**
* Open and initialise a stream.
*
* @param db
* A compiled pattern database.
*
* @param flags
* Flags modifying the behaviour of the stream. This parameter is provided
* for future use and is unused at present.
*
* @param stream
* On success, a pointer to the generated @ref hs_stream_t will be
* returned; NULL on failure.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags,
hs_stream_t **stream);
/**
* Write data to be scanned to the opened stream.
*
* This is the function call in which the actual pattern matching takes place
* as data is written to the stream. Matches will be returned via the @ref
* match_event_handler callback supplied.
*
* @param id
* The stream ID (returned by @ref hs_open_stream()) to which the data
* will be written.
*
* @param data
* Pointer to the data to be scanned.
*
* @param length
* The number of bytes to scan.
*
* @param flags
* Flags modifying the behaviour of the stream. This parameter is provided
* for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param ctxt
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t hs_scan_stream(hs_stream_t *id, const char *data,
unsigned int length, unsigned int flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *ctxt);
/**
* Close a stream.
*
* This function must be called for any stream created with @ref
* hs_open_stream(), even if scanning has been terminated by a non-zero return
* from the match callback function.
*
* Note: This operation may result in matches being returned (via calls to the
* match event callback) for expressions anchored to the end of the data stream
* (for example, via the use of the `$` meta-character). If these matches are
* not desired, NULL may be provided as the @ref match_event_handler callback.
*
* If NULL is provided as the @ref match_event_handler callback, it is
* permissible to provide a NULL scratch.
*
* @param id
* The stream ID returned by @ref hs_open_stream().
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
* allowed to be NULL only if the @a onEvent callback is also NULL.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param ctxt
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* Returns @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent, void *ctxt);
/**
* Reset a stream to an initial state.
*
* Conceptually, this is equivalent to performing @ref hs_close_stream() on the
* given stream, followed by a @ref hs_open_stream(). This new stream replaces
* the original stream in memory, avoiding the overhead of freeing the old
* stream and allocating the new one.
*
* Note: This operation may result in matches being returned (via calls to the
* match event callback) for expressions anchored to the end of the original
* data stream (for example, via the use of the `$` meta-character). If these
* matches are not desired, NULL may be provided as the @ref match_event_handler
* callback.
*
* Note: the stream will also be tied to the same database.
*
* @param id
* The stream (as created by @ref hs_open_stream()) to be replaced.
*
* @param flags
* Flags modifying the behaviour of the stream. This parameter is provided
* for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *context);
/**
* Duplicate the given stream. The new stream will have the same state as the
* original including the current stream offset.
*
* @param to_id
* On success, a pointer to the new, copied @ref hs_stream_t will be
* returned; NULL on failure.
*
* @param from_id
* The stream (as created by @ref hs_open_stream()) to be copied.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
/**
* Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
* will first be reset (reporting any EOD matches if a non-NULL @a onEvent
* callback handler is provided).
*
* Note: the 'to' stream and the 'from' stream must be open against the same
* database.
*
* @param to_id
* On success, a pointer to the new, copied @ref hs_stream_t will be
* returned; NULL on failure.
*
* @param from_id
* The stream (as created by @ref hs_open_stream()) to be copied.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function
* when a match occurs.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
const hs_stream_t *from_id,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context);
/**
* The block (non-streaming) regular expression scanner.
*
* This is the function call in which the actual pattern matching takes place
* for block-mode pattern databases.
*
* @param db
* A compiled pattern database.
*
* @param data
* Pointer to the data to be scanned.
*
* @param length
* The number of bytes to scan.
*
* @param flags
* Flags modifying the behaviour of this function. This parameter is
* provided for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch() for this
* database.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
* match callback indicated that scanning should stop; other values on
* error.
*/
hs_error_t hs_scan(const hs_database_t *db, const char *data,
unsigned int length, unsigned int flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *context);
/**
* The vectored regular expression scanner.
*
* This is the function call in which the actual pattern matching takes place
* for vectoring-mode pattern databases.
*
* @param db
* A compiled pattern database.
*
* @param data
* An array of pointers to the data blocks to be scanned.
*
* @param length
* An array of lengths (in bytes) of each data block to scan.
*
* @param count
* Number of data blocks to scan. This should correspond to the size of
* of the @a data and @a length arrays.
*
* @param flags
* Flags modifying the behaviour of this function. This parameter is
* provided for future use and is unused at present.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch() for
* this database.
*
* @param onEvent
* Pointer to a match event callback function. If a NULL pointer is given,
* no matches will be returned.
*
* @param context
* The user defined pointer which will be passed to the callback function.
*
* @return
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
* callback indicated that scanning should stop; other values on error.
*/
hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data,
const unsigned int *length, unsigned int count,
unsigned int flags, hs_scratch_t *scratch,
match_event_handler onEvent, void *context);
/**
* Allocate a "scratch" space for use by Hyperscan.
*
* This is required for runtime use, and one scratch space per thread, or
* concurrent caller, is required. Any allocator callback set by @ref
* hs_set_scratch_allocator() or @ref hs_set_allocator() will be used by this
* function.
*
* @param db
* The database, as produced by @ref hs_compile().
*
* @param scratch
* On first allocation, a pointer to NULL should be provided so a new
* scratch can be allocated. If a scratch block has been previously
* allocated, then a pointer to it should be passed back in to see if it
* is valid for this database block. If a new scratch block is required,
* the original will be freed and the new one returned, otherwise the
* previous scratch block will be returned. On success, the scratch block
* will be suitable for use with the provided database in addition to any
* databases that original scratch space was suitable for.
*
* @return
* @ref HS_SUCCESS on successful allocation; @ref HS_NOMEM if the
* allocation fails. Other errors may be returned if invalid parameters
* are specified.
*/
hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch);
/**
* Allocate a scratch space that is a clone of an existing scratch space.
*
* This is useful when multiple concurrent threads will be using the same set
* of compiled databases, and another scratch space is required. Any allocator
* callback set by @ref hs_set_scratch_allocator() or @ref hs_set_allocator()
* will be used by this function.
*
* @param src
* The existing @ref hs_scratch_t to be cloned.
*
* @param dest
* A pointer to the new scratch space will be returned here.
*
* @return
* @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
* Other errors may be returned if invalid parameters are specified.
*/
hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest);
/**
* Provides the size of the given scratch space.
*
* @param scratch
* A per-thread scratch space allocated by @ref hs_alloc_scratch() or @ref
* hs_clone_scratch().
*
* @param scratch_size
* On success, the size of the scratch space in bytes is placed in this
* parameter.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size);
/**
* Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
* hs_clone_scratch().
*
* The free callback set by @ref hs_set_scratch_allocator() or @ref
* hs_set_allocator() will be used by this function.
*
* @param scratch
* The scratch block to be freed. NULL may also be safely provided.
*
* @return
* @ref HS_SUCCESS on success, other values on failure.
*/
hs_error_t hs_free_scratch(hs_scratch_t *scratch);
/**
* Callback 'from' return value, indicating that the start of this match was
* too early to be tracked with the requested SOM_HORIZON precision.
*/
#define HS_OFFSET_PAST_HORIZON (~0ULL)
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HS_RUNTIME_H_ */

36
src/hs_version.c Normal file
View File

@@ -0,0 +1,36 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "ue2common.h"
#include "hs_common.h"
#include "hs_version.h"
HS_PUBLIC_API
const char *hs_version(void) {
return HS_VERSION_STRING;
}

40
src/hs_version.h.in Normal file
View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_VERSION_H_C6428FAF8E3713
#define HS_VERSION_H_C6428FAF8E3713
/**
* A version string to identify this release of Hyperscan.
*/
#define HS_VERSION_STRING "@HS_VERSION@ @BUILD_DATE@"
#define HS_VERSION_32BIT ((@HS_MAJOR_VERSION@ << 24) | (@HS_MINOR_VERSION@ << 16) | (@HS_PATCH_VERSION@ << 8) | 0)
#endif /* HS_VERSION_H_C6428FAF8E3713 */

240
src/hwlm/hwlm.c Normal file
View File

@@ -0,0 +1,240 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: runtime.
*/
#include "hwlm.h"
#include "hwlm_internal.h"
#include "noodle_engine.h"
#include "scratch.h"
#include "ue2common.h"
#include "fdr/fdr.h"
#include "nfa/accel.h"
#include "nfa/shufti.h"
#include "nfa/vermicelli.h"
#include <string.h>
#define MIN_ACCEL_LEN_BLOCK 16
#define MIN_ACCEL_LEN_STREAM 16
static really_inline
const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
const u8 *end) {
switch (aux->accel_type) {
case ACCEL_VERM:
DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
return vermicelliExec(aux->verm.c, 0, ptr, end);
case ACCEL_VERM_NOCASE:
DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
return vermicelliExec(aux->verm.c, 1, ptr, end);
case ACCEL_DVERM:
DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n", aux->dverm.c1,
aux->dverm.c2);
return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
case ACCEL_DVERM_NOCASE:
DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end);
case ACCEL_SHUFTI:
DEBUG_PRINTF("single shufti\n");
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
default:
/* no acceleration, fall through and return current ptr */
return ptr;
}
}
static really_inline
void do_accel_block(const union AccelAux *aux, const u8 *buf, size_t len,
size_t *start) {
if (len - *start < MIN_ACCEL_LEN_BLOCK) {
return;
}
const u8 *ptr = buf + *start;
const u8 *end = buf + len;
const u8 offset = aux->generic.offset;
ptr = run_hwlm_accel(aux, ptr, end);
if (offset) {
ptr -= offset;
if (ptr < buf) {
ptr = buf;
}
}
assert(ptr >= buf);
*start = ptr - buf;
}
static really_inline
int inaccurate_accel(u8 type) {
/* accels which don't always catch up to the boundary
* DSHUFTI is also inaccurate but it is not used by the hamsters */
return type == ACCEL_DVERM_NOCASE || type == ACCEL_DVERM;
}
static never_inline
void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
const u8 *buf, size_t len, size_t *start) {
if (aux->accel_type == ACCEL_NONE || len - *start < MIN_ACCEL_LEN_STREAM) {
return;
}
const u8 offset = aux->generic.offset;
DEBUG_PRINTF("using accel %hhu offset %hhu\n", aux->accel_type, offset);
// Scan history buffer, but only if the start offset (which always refers to
// buf) is zero.
if (!*start && hlen) {
const u8 *ptr1 = hbuf;
const u8 *end1 = hbuf + hlen;
if (hlen >= 16) {
ptr1 = run_hwlm_accel(aux, ptr1, end1);
}
if ((hlen <= 16 || inaccurate_accel(aux->accel_type))
&& end1 != ptr1 && end1 - ptr1 <= 16) {
DEBUG_PRINTF("already scanned %zu/%zu\n", ptr1 - hbuf, hlen);
/* see if we can finish off the history buffer completely */
u8 ALIGN_DIRECTIVE temp[17];
ptrdiff_t tlen = end1 - ptr1;
memcpy(temp, ptr1, tlen);
memset(temp + tlen, 0, 17 - tlen);
if (len) { /* for dverm */
temp[end1 - ptr1] = *buf;
}
const u8 *tempp = run_hwlm_accel(aux, temp, temp + 17);
if (tempp - temp >= tlen) {
ptr1 = end1;
}
DEBUG_PRINTF("got %zu\n", tempp - temp);
}
if (ptr1 != end1) {
DEBUG_PRINTF("bailing in history\n");
return;
}
}
DEBUG_PRINTF("scanning main buffer, start=%zu, len=%zu\n", *start, len);
const u8 *ptr2 = buf + *start;
const u8 *end2 = buf + len;
const u8 *found = run_hwlm_accel(aux, ptr2, end2);
if (found >= ptr2 + offset) {
size_t delta = found - offset - ptr2;
DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
*start += delta;
} else if (hlen) {
UNUSED size_t remaining = offset + ptr2 - found;
DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen);
}
}
hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
hwlm_group_t groups) {
DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups);
if (!groups) {
DEBUG_PRINTF("groups all off\n");
return HWLM_SUCCESS;
}
assert(start < len);
if (t->type == HWLM_ENGINE_NOOD) {
DEBUG_PRINTF("calling noodExec\n");
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, cb,
ctxt);
} else {
assert(t->type == HWLM_ENGINE_FDR);
const union AccelAux *aa = &t->accel0;
if ((groups & ~t->accel1_groups) == 0) {
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
do_accel_block(aa, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
start);
return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups);
}
}
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
size_t len, size_t start, HWLMCallback cb,
void *ctxt, hwlm_group_t groups,
u8 *stream_state) {
const u8 *hbuf = scratch->core_info.hbuf;
const size_t hlen = scratch->core_info.hlen;
const u8 *buf = scratch->core_info.buf;
DEBUG_PRINTF("hbuf len=%zu, buf len=%zu, start=%zu, groups=%llx\n", hlen,
len, start, groups);
if (!groups) {
return HWLM_SUCCESS;
}
assert(start < len);
if (t->type == HWLM_ENGINE_NOOD) {
DEBUG_PRINTF("calling noodExec\n");
// If we've been handed a start offset, we can use a block mode scan at
// that offset.
if (start) {
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start,
cb, ctxt);
} else {
return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb,
ctxt, scratch->fdr_temp_buf,
FDR_TEMP_BUF_SIZE);
}
} else {
// t->type == HWLM_ENGINE_FDR
const union AccelAux *aa = &t->accel0;
if ((groups & ~t->accel1_groups) == 0) {
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
// if no active stream state, use acceleration
if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) {
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
}
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
start);
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len,
start, cb, ctxt, groups, stream_state);
}
}

142
src/hwlm/hwlm.h Normal file
View File

@@ -0,0 +1,142 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: runtime API.
*/
#ifndef HWLM_H
#define HWLM_H
#include "ue2common.h"
#ifdef __cplusplus
extern "C"
{
#endif
/** \brief Error return type for exec functions. */
typedef int hwlm_error_t;
/** \brief Type representing a set of groups as a bitmap. */
typedef u64a hwlm_group_t;
/** \brief HWLM callback return type. */
typedef hwlm_group_t hwlmcb_rv_t;
/** \brief Value representing all possible literal groups. */
#define HWLM_ALL_GROUPS ((hwlm_group_t)~0ULL)
/** \brief Callback return value indicating that we should continue matching. */
#define HWLM_CONTINUE_MATCHING HWLM_ALL_GROUPS
/** \brief Callback return value indicating that we should halt matching. */
#define HWLM_TERMINATE_MATCHING 0
/** \brief Matching finished without being terminated by the user. */
#define HWLM_SUCCESS 0
/** \brief The user terminated matching by returning HWLM_TERMINATE_MATCHING
* from the match callback. */
#define HWLM_TERMINATED 1
/** \brief An error occurred during matching.
*
* This should only be used if an unsupported engine was called (like one
* designed for a different architecture). */
#define HWLM_ERROR_UNKNOWN 2
struct hs_scratch;
struct HWLM;
/** \brief The type for an HWLM callback.
*
* This callback receives a start-of-match offset, an end-of-match offset, the
* ID of the match and the context pointer that was passed into \ref
* hwlmExec or \ref hwlmExecStreaming.
*
* A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching.
*
* A callback return of \ref HWLM_CONTINUE_MATCHING continues matching.
*
* An arbitrary group mask may be given as the return value. This will be taken
* as a hint by the underlying engine that only literals with groups
* overlapping the provided mask need to be reported.
*
* The underlying engine may choose not to report a match if there is no group
* belonging to the literal which was active at the when the end match location
* was first reached.
*/
typedef hwlmcb_rv_t (*HWLMCallback)(size_t start, size_t end, u32 id,
void *context);
/** \brief Match strings in table.
*
* If a match occurs, the callback function given will be called with the index
* of the last character in the string and the \p context (passed through
* without interpretation).
*
* Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback
* returning \ref HWLM_TERMINATE_MATCHING.
*
* \p start is the first offset at which a match may start.
*
* The underlying engine may choose not to report any match which starts before
* the first possible match of a literal which is in the initial group mask.
*/
hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
size_t start, HWLMCallback callback, void *context,
hwlm_group_t groups);
/** \brief As for \ref hwlmExec, but a streaming case across two buffers.
*
* \p scratch is used to access fdr_temp_buf and to access the history buffer,
* history length and the main buffer.
*
* \p len is the length of the main buffer to be scanned.
*
* \p start is an advisory hint representing the first offset at which a match
* may start. Some underlying literal matches may not respect it.
*
* Two buffers/lengths are provided. Matches that occur entirely within
* the history buffer will not be reported by this function. The offsets
* reported for the main buffer are relative to the start of that buffer (a
* match at byte 10 of the main buffer is reported as 10). Matches that start
* in the history buffer will have starts reported with 'negative' values.
*/
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab,
struct hs_scratch *scratch, size_t len,
size_t start, HWLMCallback callback,
void *context, hwlm_group_t groups,
u8 *stream_state);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

635
src/hwlm/hwlm_build.cpp Normal file
View File

@@ -0,0 +1,635 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: build code.
*/
#include "grey.h"
#include "hwlm.h"
#include "hwlm_build.h"
#include "hwlm_internal.h"
#include "noodle_engine.h"
#include "noodle_build.h"
#include "ue2common.h"
#include "fdr/fdr_compile.h"
#include "fdr/fdr.h"
#include "nfa/shufticompile.h"
#include "util/alloc.h"
#include "util/bitutils.h"
#include "util/charreach.h"
#include "util/compare.h"
#include "util/compile_context.h"
#include "util/compile_error.h"
#include "util/dump_charclass.h"
#include "util/target_info.h"
#include "util/ue2string.h"
#include "util/verify_types.h"
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
using namespace std;
namespace ue2 {
static const unsigned int MAX_ACCEL_OFFSET = 16;
static const unsigned int MAX_SHUFTI_WIDTH = 240;
static
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
const hwlmLiteral &first = *lits.front();
struct candidate {
candidate(void)
: c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {}
candidate(const hwlmLiteral &base, u32 offset)
: c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0),
b5insens(false), valid(true) {}
char c1;
char c2;
u32 max_offset;
bool b5insens;
bool valid;
bool operator>(const candidate &other) const {
if (!valid) {
return false;
}
if (!other.valid) {
return true;
}
if (other.cdiffers() && !cdiffers()) {
return false;
}
if (!other.cdiffers() && cdiffers()) {
return true;
}
if (!other.b5insens && b5insens) {
return false;
}
if (other.b5insens && !b5insens) {
return true;
}
if (max_offset > other.max_offset) {
return false;
}
return true;
}
bool cdiffers(void) const {
if (!b5insens) {
return c1 != c2;
}
return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR);
}
};
candidate best;
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) {
candidate curr(first, i);
/* check to see if this pair appears in each string */
for (const auto &lit_ptr : lits) {
const hwlmLiteral &lit = *lit_ptr;
if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) {
curr.b5insens = true; /* no choice but to be case insensitive */
}
bool found = false;
bool found_nc = false;
for (u32 j = 0;
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) {
found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
if (curr.b5insens) {
found = found_nc;
}
}
if (!curr.b5insens && !found && found_nc) {
curr.b5insens = true;
found = true;
}
if (!found) {
goto next_candidate;
}
}
/* check to find the max offset where this appears */
for (const auto &lit_ptr : lits) {
const hwlmLiteral &lit = *lit_ptr;
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1;
j++) {
bool found = false;
if (curr.b5insens) {
found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
} else {
found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
}
if (found) {
curr.max_offset = MAX(curr.max_offset, j);
break;
}
}
}
if (curr > best) {
best = curr;
}
next_candidate:;
}
if (!best.valid) {
return false;
}
aux->dverm.offset = verify_u8(best.max_offset);
if (!best.b5insens) {
aux->dverm.accel_type = ACCEL_DVERM;
aux->dverm.c1 = best.c1;
aux->dverm.c2 = best.c2;
DEBUG_PRINTF("built dverm for %02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
} else {
aux->dverm.accel_type = ACCEL_DVERM_NOCASE;
aux->dverm.c1 = best.c1 & CASE_CLEAR;
aux->dverm.c2 = best.c2 & CASE_CLEAR;
DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
}
return true;
}
static
bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
const hwlmLiteral &first = *lits.front();
struct candidate {
candidate(void)
: c(0), max_offset(0), b5insens(false), valid(false) {}
candidate(const hwlmLiteral &base, u32 offset)
: c(base.s[offset]), max_offset(0),
b5insens(false), valid(true) {}
char c;
u32 max_offset;
bool b5insens;
bool valid;
bool operator>(const candidate &other) const {
if (!valid) {
return false;
}
if (!other.valid) {
return true;
}
if (!other.b5insens && b5insens) {
return false;
}
if (other.b5insens && !b5insens) {
return true;
}
if (max_offset > other.max_offset) {
return false;
}
return true;
}
};
candidate best;
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) {
candidate curr(first, i);
/* check to see if this pair appears in each string */
for (const auto &lit_ptr : lits) {
const hwlmLiteral &lit = *lit_ptr;
if (lit.nocase && ourisalpha(curr.c)) {
curr.b5insens = true; /* no choice but to be case insensitive */
}
bool found = false;
bool found_nc = false;
for (u32 j = 0;
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
found |= curr.c == lit.s[j];
found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
if (curr.b5insens) {
found = found_nc;
}
}
if (!curr.b5insens && !found && found_nc) {
curr.b5insens = true;
found = true;
}
if (!found) {
goto next_candidate;
}
}
/* check to find the max offset where this appears */
for (const auto &lit_ptr : lits) {
const hwlmLiteral &lit = *lit_ptr;
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
bool found = false;
if (curr.b5insens) {
found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
} else {
found = curr.c == lit.s[j];
}
if (found) {
curr.max_offset = MAX(curr.max_offset, j);
break;
}
}
}
if (curr > best) {
best = curr;
}
next_candidate:;
}
if (!best.valid) {
return false;
}
if (!best.b5insens) {
aux->verm.accel_type = ACCEL_VERM;
aux->verm.c = best.c;
DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c);
} else {
aux->verm.accel_type = ACCEL_VERM_NOCASE;
aux->verm.c = best.c & CASE_CLEAR;
DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c);
}
aux->verm.offset = verify_u8(best.max_offset);
return true;
}
static
void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
vector<const hwlmLiteral *> *filtered_lits, u32 *min_len) {
*min_len = MAX_ACCEL_OFFSET;
for (const auto &lit : lits) {
if (!(lit.groups & expected_groups)) {
continue;
}
const size_t lit_len = lit.s.length();
if (lit_len < *min_len) {
*min_len = verify_u32(lit_len);
}
filtered_lits->push_back(&lit);
#ifdef DEBUG
DEBUG_PRINTF("lit:");
for (u32 i = 0; i < lit.s.length(); i++) {
printf("%02hhx", lit.s[i]);
}
printf("\n");
#endif
}
}
static
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
hwlm_group_t expected_groups, AccelAux *aux) {
DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups);
u32 min_len = MAX_ACCEL_OFFSET;
vector<const hwlmLiteral *> filtered_lits;
filterLits(lits, expected_groups, &filtered_lits, &min_len);
if (filtered_lits.empty()) {
return;
}
if (findDVerm(filtered_lits, aux)
|| findSVerm(filtered_lits, aux)) {
return;
}
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
for (const auto &lit : lits) {
if (!(lit.groups & expected_groups)) {
continue;
}
for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) {
unsigned char c = lit.s[i];
if (lit.nocase) {
DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i);
DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i);
reach[i].set(mytoupper(c));
reach[i].set(mytolower(c));
} else {
DEBUG_PRINTF("adding %02hhx to %u\n", c, i);
reach[i].set(c);
}
}
}
u32 min_count = ~0U;
u32 min_offset = ~0U;
for (u32 i = 0; i < min_len; i++) {
size_t count = reach[i].count();
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
describeClass(reach[i]).c_str(), count);
if (count < min_count) {
min_count = (u32)count;
min_offset = i;
}
}
assert(min_offset <= min_len);
if (min_count > MAX_SHUFTI_WIDTH) {
DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count);
return;
}
const CharReach &cr = reach[min_offset];
if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) {
DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
describeClass(cr).c_str(), cr.count(), min_offset);
aux->shufti.accel_type = ACCEL_SHUFTI;
aux->shufti.offset = verify_u8(min_offset);
return;
}
DEBUG_PRINTF("fail\n");
}
static
void buildForwardAccel(HWLM *h, const vector<hwlmLiteral> &lits,
hwlm_group_t expected_groups) {
findForwardAccelScheme(lits, expected_groups, &h->accel1);
findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0);
h->accel1_groups = expected_groups;
}
static
void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
#ifdef DEBUG
DEBUG_PRINTF("building lit table for:\n");
for (const auto &lit : lits) {
printf("\t%u:%016llx %s%s\n", lit.id, lit.groups,
escapeString(lit.s).c_str(), lit.nocase ? " (nc)" : "");
}
#endif
}
#ifndef NDEBUG
// Called by an assertion.
static
bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
for (const auto &lit : lits) {
if (!lit.groups) {
return false;
}
}
return true;
}
#endif
static
bool isNoodleable(const vector<hwlmLiteral> &lits,
const hwlmStreamingControl *stream_control,
const CompileContext &cc) {
if (!cc.grey.allowNoodle) {
return false;
}
if (lits.size() != 1) {
DEBUG_PRINTF("too many literals for noodle\n");
return false;
}
if (stream_control) { // nullptr if in block mode
if (lits.front().s.length() + 1 > stream_control->history_max) {
DEBUG_PRINTF("length of %zu too long for history max %zu\n",
lits.front().s.length(),
stream_control->history_max);
return false;
}
}
if (!lits.front().msk.empty()) {
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
return false;
}
return true;
}
aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control,
bool make_small, const CompileContext &cc,
hwlm_group_t expected_groups) {
assert(!lits.empty());
dumpLits(lits);
if (stream_control) {
assert(stream_control->history_min <= stream_control->history_max);
}
// Check that we haven't exceeded the maximum number of literals.
if (lits.size() > cc.grey.limitLiteralCount) {
throw ResourceLimitError();
}
// Safety and resource limit checks.
u64a total_chars = 0;
for (const auto &lit : lits) {
assert(!lit.s.empty());
if (lit.s.length() > cc.grey.limitLiteralLength) {
throw ResourceLimitError();
}
total_chars += lit.s.length();
if (total_chars > cc.grey.limitLiteralMatcherChars) {
throw ResourceLimitError();
}
// We do not allow the all-ones ID, as we reserve that for internal use
// within literal matchers.
if (lit.id == 0xffffffffu) {
assert(!"reserved id 0xffffffff used");
throw CompileError("Internal error.");
}
}
u8 engType = 0;
size_t engSize = 0;
shared_ptr<void> eng;
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
assert(everyoneHasGroups(lits));
if (isNoodleable(lits, stream_control, cc)) {
DEBUG_PRINTF("build noodle table\n");
engType = HWLM_ENGINE_NOOD;
const hwlmLiteral &lit = lits.front();
auto noodle = noodBuildTable((const u8 *)lit.s.c_str(), lit.s.length(),
lit.nocase, lit.id);
if (noodle) {
engSize = noodSize(noodle.get());
}
if (stream_control) {
// For now, a single literal still goes to noodle and asks
// for a great big history
stream_control->literal_history_required = lit.s.length() - 1;
assert(stream_control->literal_history_required
<= stream_control->history_max);
stream_control->literal_stream_state_required = 0;
}
eng = move(noodle);
} else {
DEBUG_PRINTF("building a new deal\n");
engType = HWLM_ENGINE_FDR;
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey,
stream_control);
if (fdr) {
engSize = fdrSize(fdr.get());
}
eng = move(fdr);
}
if (!eng) {
return nullptr;
}
assert(engSize);
if (engSize > cc.grey.limitLiteralMatcherSize) {
throw ResourceLimitError();
}
auto h = aligned_zmalloc_unique<HWLM>(ROUNDUP_CL(sizeof(HWLM)) + engSize);
h->type = engType;
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) {
buildForwardAccel(h.get(), lits, expected_groups);
}
if (stream_control) {
DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n",
stream_control->literal_history_required,
stream_control->history_max);
assert(stream_control->literal_history_required
<= stream_control->history_max);
}
return h;
}
size_t hwlmSize(const HWLM *h) {
size_t engSize = 0;
switch (h->type) {
case HWLM_ENGINE_NOOD:
engSize = noodSize((const noodTable *)HWLM_C_DATA(h));
break;
case HWLM_ENGINE_FDR:
engSize = fdrSize((const FDR *)HWLM_C_DATA(h));
break;
}
if (!engSize) {
return 0;
}
return engSize + ROUNDUP_CL(sizeof(*h));
}
size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc) {
const size_t NO_LIMIT = ~(size_t)0;
// NOTE: this function contains a number of magic numbers which are
// conservative estimates of flood-proneness based on internal details of
// the various literal engines that fall under the HWLM aegis. If you
// change those engines, you might need to change this function too.
DEBUG_PRINTF("%zu literals\n", numLiterals);
if (cc.grey.allowNoodle && numLiterals <= 1) {
DEBUG_PRINTF("noodle\n");
return NO_LIMIT;
}
if (cc.grey.fdrAllowTeddy) {
if (numLiterals <= 48) {
DEBUG_PRINTF("teddy\n");
return 3;
}
if (cc.target_info.has_avx2() && numLiterals <= 96) {
DEBUG_PRINTF("avx2 teddy\n");
return 3;
}
}
// TODO: we had thought we could push this value up to 9, but it seems that
// hurts performance on floods in some FDR models. Super-conservative for
// now.
DEBUG_PRINTF("fdr\n");
return 3;
}
} // namespace ue2

104
src/hwlm/hwlm_build.h Normal file
View File

@@ -0,0 +1,104 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: build API.
*/
#ifndef HWLM_BUILD_H
#define HWLM_BUILD_H
#include "hwlm.h"
#include "hwlm_literal.h"
#include "ue2common.h"
#include "util/alloc.h"
#include <memory>
#include <vector>
struct HWLM;
namespace ue2 {
struct CompileContext;
struct Grey;
struct target_t;
/** \brief Structure gathering together the input/output parameters related to
* streaming mode operation. */
struct hwlmStreamingControl {
/** \brief IN parameter: Upper limit on the amount of history that can be
* requested. */
size_t history_max;
/** \brief IN parameter: History already known to be used before literal
* analysis. */
size_t history_min;
/** \brief OUT parameter: History required by the literal matcher to
* correctly match all literals. */
size_t literal_history_required;
/** OUT parameter: Stream state required by literal matcher in bytes. Can
* be zero, and generally will be small (0-8 bytes). */
size_t literal_stream_state_required;
};
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
* literals.
*
* \param lits The group of literals.
* \param stream_control Streaming control parameters. If the matcher will
* operate in non-streaming (block) mode, this pointer should be NULL.
* \param make_small Optimise matcher for small size.
* \param cc Compile context.
* \param expected_groups FIXME: document me!
*
* Build failures are generally a result of memory allocation failure. These
* may result in a nullptr return value, or a std::bad_alloc exception being
* thrown.
*/
aligned_unique_ptr<HWLM>
hwlmBuild(const std::vector<hwlmLiteral> &lits,
hwlmStreamingControl *stream_control, bool make_small,
const CompileContext &cc,
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
/**
* Returns an estimate of the number of repeated characters on the end of a
* literal that will make a literal set of size \a numLiterals suffer
* performance degradation.
*/
size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc);
/** \brief Return the size in bytes of an HWLM structure. */
size_t hwlmSize(const HWLM *h);
} // namespace
#endif // HWLM_BUILD_H

70
src/hwlm/hwlm_dump.cpp Normal file
View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: dump code.
*/
#include "config.h"
#include "hwlm_dump.h"
#include "hwlm_internal.h"
#include "noodle_build.h"
#include "ue2common.h"
#include "fdr/fdr_dump.h"
#include "nfa/accel_dump.h"
#include <cstdio>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
namespace ue2 {
void hwlmPrintStats(const HWLM *h, FILE *f) {
switch (h->type) {
case HWLM_ENGINE_NOOD:
noodPrintStats((const noodTable *)HWLM_C_DATA(h), f);
break;
case HWLM_ENGINE_FDR:
fdrPrintStats((const FDR *)HWLM_C_DATA(h), f);
break;
default:
fprintf(f, "<unknown hwlm subengine>\n");
}
fprintf(f, "accel1_groups: %016llx\n", h->accel1_groups);
fprintf(f, "accel1:");
dumpAccelInfo(f, h->accel1);
fprintf(f, "accel0:");
dumpAccelInfo(f, h->accel0);
}
} // namespace ue2

50
src/hwlm/hwlm_dump.h Normal file
View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: dump API.
*/
#ifndef HWLM_DUMP_H
#define HWLM_DUMP_H
#ifdef DUMP_SUPPORT
#include <cstdio>
struct HWLM;
namespace ue2 {
/** \brief Dump some information about the give HWLM structure. */
void hwlmPrintStats(const HWLM *h, FILE *f);
} // namespace ue2
#endif
#endif

62
src/hwlm/hwlm_internal.h Normal file
View File

@@ -0,0 +1,62 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: data structures.
*/
#ifndef HWLM_INTERNAL_H
#define HWLM_INTERNAL_H
#include "hwlm.h"
#include "ue2common.h"
#include "nfa/accel.h"
/** \brief Underlying engine is FDR. */
#define HWLM_ENGINE_FDR 12
/** \brief Underlying engine is Noodle. */
#define HWLM_ENGINE_NOOD 16
/** \brief Main Hamster Wheel Literal Matcher header. Followed by
* engine-specific structure. */
struct HWLM {
u8 type; /**< HWLM_ENGINE_NOOD or HWLM_ENGINE_FDR */
hwlm_group_t accel1_groups; /**< accelerable groups. */
union AccelAux accel1; /**< used if group mask is subset of accel1_groups */
union AccelAux accel0; /**< fallback accel scheme */
};
/** \brief Fetch a const pointer to the underlying engine. */
#define HWLM_C_DATA(p) ((const void *)((const char *)(p) \
+ ROUNDUP_CL(sizeof(struct HWLM))))
/** \brief Fetch a pointer to the underlying engine. */
#define HWLM_DATA(p) ((void *)((char *)(p) + ROUNDUP_CL(sizeof(struct HWLM))))
#endif

111
src/hwlm/hwlm_literal.cpp Normal file
View File

@@ -0,0 +1,111 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: literal representation at build time.
*/
#include "hwlm_literal.h"
#include "util/bitutils.h" // for CASE_BIT
#include "util/compare.h" // for ourisalpha
#include "util/ue2string.h" // for escapeString
#include <iomanip>
#include <sstream>
#include <boost/algorithm/cxx11/all_of.hpp>
using namespace std;
using namespace boost::algorithm;
namespace ue2 {
#ifdef DEBUG
static UNUSED
std::string dumpMask(const vector<u8> &v) {
ostringstream oss;
vector<u8>::const_iterator it, ite;
for (it = v.begin(), ite = v.end(); it != ite; ++it) {
oss << setfill('0') << setw(2) << hex << (unsigned int)*it;
}
return oss.str();
}
#endif
bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
const vector<u8> &cmp) {
string::const_reverse_iterator si = s.rbegin();
vector<u8>::const_reverse_iterator mi = msk.rbegin(), ci = cmp.rbegin();
for (; si != s.rend() && mi != msk.rend(); ++si, ++mi, ++ci) {
u8 c = *si, m = *mi, v = *ci;
if (nocase && ourisalpha(c)) {
m &= ~CASE_BIT;
v &= ~CASE_BIT;
}
assert(ci != cmp.rend());
if ((c & m) != v) {
DEBUG_PRINTF("c = %02hhx; *ci = %02hhx m =%02hhx\n", c, *ci, m);
DEBUG_PRINTF("s = %s; dist = %zd\n", s.c_str(), si - s.rbegin());
return false;
}
}
return true;
}
/** \brief Complete constructor, takes group information and msk/cmp.
*
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
* \ref HWLM_MASKLEN. */
hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
bool noruns_in, u32 id_in, hwlm_group_t groups_in,
const vector<u8> &msk_in, const vector<u8> &cmp_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
groups(groups_in), msk(msk_in), cmp(cmp_in) {
assert(msk.size() <= HWLM_MASKLEN);
assert(msk.size() == cmp.size());
DEBUG_PRINTF("literal '%s', msk=%s, cmp=%s\n",
escapeString(s).c_str(), dumpMask(msk).c_str(),
dumpMask(cmp).c_str());
// Mask and compare vectors MUST be the same size.
assert(msk.size() == cmp.size());
// We must have been passed a msk/cmp that can be applied to s.
assert(maskIsConsistent(s, nocase, msk, cmp));
// In the name of good hygiene, zap msk/cmp if msk is all zeroes.
if (all_of_equal(msk.begin(), msk.end(), 0)) {
msk.clear();
cmp.clear();
}
}
} // namespace ue2

121
src/hwlm/hwlm_literal.h Normal file
View File

@@ -0,0 +1,121 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Hamster Wheel Literal Matcher: literal representation at build time.
*/
#ifndef HWLM_LITERAL_H
#define HWLM_LITERAL_H
#include "hwlm.h"
#include "ue2common.h"
#include <string>
#include <vector>
namespace ue2 {
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
#define HWLM_MASKLEN 8
/** \brief Class representing a literal, fed to \ref hwlmBuild. */
struct hwlmLiteral {
std::string s; //!< \brief The literal itself.
/** \brief The ID to pass to the callback if this literal matches.
*
* Note that the special value 0xFFFFFFFF is reserved for internal use and
* should not be used. */
u32 id;
bool nocase; //!< \brief True if literal is case-insensitive.
/** \brief Matches for runs of this literal can be quashed.
*
* Advisory flag meaning that there is no value in returning runs of
* additional matches for a literal after the first one, so such matches
* can be quashed by the literal matcher. */
bool noruns;
/** \brief Set of groups that literal belongs to.
*
* Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of
* the groups that are switched on. */
hwlm_group_t groups;
/** \brief Supplementary comparison mask.
*
* These two values add a supplementary comparison that is done over the
* final 8 bytes of the string -- if v is those bytes, then the string must
* match as well as (v & msk) == cmp.
*
* An empty msk is the safe way of not adding any comparison to the string
* unnecessarily filling in msk may turn off optimizations.
*
* The msk/cmp mechanism must NOT place a value into the literal that
* conflicts with the contents of the string, but can be allowed to add
* additional power within the string -- for example, to allow some case
* sensitivity within a case-insensitive string.
* Values are stored in memory order -- i.e. the last byte of the mask
* corresponds to the last byte of the string. Both vectors must be the
* same size, and must not exceed \ref HWLM_MASKLEN in length.
*/
std::vector<u8> msk;
/** \brief Supplementary comparison value.
*
* See documentation for \ref msk.
*/
std::vector<u8> cmp;
/** \brief Simple constructor: no group information, no msk/cmp. */
hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(false),
groups(HWLM_ALL_GROUPS), msk(0), cmp(0) {}
/** \brief Complete constructor, takes group information and msk/cmp.
*
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
* \ref HWLM_MASKLEN. */
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
u32 id_in, hwlm_group_t groups_in,
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
};
/**
* Consistency test; returns false if the given msk/cmp test can never match
* the literal string s.
*/
bool maskIsConsistent(const std::string &s, bool nocase,
const std::vector<u8> &msk, const std::vector<u8> &cmp);
} // namespace ue2
#endif // HWLM_LITERAL_H

110
src/hwlm/noodle_build.cpp Normal file
View File

@@ -0,0 +1,110 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Noodle literal matcher: build code.
*/
#include <cstring> // for memcpy
#include "noodle_build.h"
#include "noodle_internal.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/verify_types.h"
namespace ue2 {
static
size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) {
size_t offset = 0;
for (size_t i = 0; i + 1 < len; i++) {
int diff = 0;
const char c = lit[i];
const char d = lit[i + 1];
if (nocase && ourisalpha(c)) {
diff = (mytoupper(c) != mytoupper(d));
} else {
diff = (c != d);
}
offset = i;
if (diff) {
break;
}
}
return offset;
}
/** \brief Construct a Noodle matcher for the given literal. */
aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
bool nocase, u32 id) {
size_t noodle_len = sizeof(noodTable) + len;
aligned_unique_ptr<noodTable> n =
aligned_zmalloc_unique<noodTable>(noodle_len);
assert(n);
size_t key_offset = findNoodFragOffset(lit, len, nocase);
n->id = id;
n->len = verify_u32(len);
n->key_offset = verify_u32(key_offset);
n->nocase = nocase ? 1 : 0;
memcpy(n->str, lit, len);
return n;
}
size_t noodSize(const noodTable *n) {
assert(n); // shouldn't call with null
return sizeof(*n) + n->len;
}
} // namespace ue2
#ifdef DUMP_SUPPORT
#include <cctype>
namespace ue2 {
void noodPrintStats(const noodTable *n, FILE *f) {
fprintf(f, "Noodle table\n");
fprintf(f, "Len: %u Key Offset: %u\n", n->len, n->key_offset);
fprintf(f, "String: ");
for (u32 i = 0; i < n->len; i++) {
if (isgraph(n->str[i]) && n->str[i] != '\\') {
fprintf(f, "%c", n->str[i]);
} else {
fprintf(f, "\\x%02hhx", n->str[i]);
}
}
fprintf(f, "\n");
}
} // namespace ue2
#endif

64
src/hwlm/noodle_build.h Normal file
View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Noodle literal matcher: build code.
*/
#ifndef NOODLE_BUILD_H_048A1A6D585A9A
#define NOODLE_BUILD_H_048A1A6D585A9A
#include "ue2common.h"
#include "util/alloc.h"
struct noodTable;
namespace ue2 {
/** \brief Construct a Noodle matcher for the given literal. */
ue2::aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
bool nocase, u32 id);
size_t noodSize(const noodTable *n);
} // namespace ue2
#ifdef DUMP_SUPPORT
#include <cstdio>
namespace ue2 {
void noodPrintStats(const noodTable *n, FILE *f);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif /* NOODLE_BUILD_H_048A1A6D585A9A */

364
src/hwlm/noodle_engine.c Normal file
View File

@@ -0,0 +1,364 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Noodle literal matcher: runtime.
*/
#include "hwlm.h"
#include "noodle_engine.h"
#include "noodle_internal.h"
#include "ue2common.h"
#include "util/bitutils.h"
#include "util/compare.h"
#include "util/masked_move.h"
#include "util/simd_utils.h"
#include <ctype.h>
#include <stdbool.h>
#include <string.h>
/** \brief Noodle runtime context. */
struct cb_info {
HWLMCallback cb; //!< callback function called on match
u32 id; //!< ID to pass to callback on match
void *ctx; //!< caller-supplied context to pass to callback
size_t offsetAdj; //!< used in streaming mode
};
#define RETURN_IF_TERMINATED(x) \
{ \
if ((x) == HWLM_TERMINATED) { \
return HWLM_TERMINATED; \
} \
}
#define SINGLE_ZSCAN() \
do { \
while (unlikely(z)) { \
u32 pos = findAndClearLSB_32(&z); \
size_t matchPos = d - buf + pos; \
hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \
matchPos); \
RETURN_IF_TERMINATED(rv); \
} \
} while (0)
#define DOUBLE_ZSCAN() \
do { \
while (unlikely(z)) { \
u32 pos = findAndClearLSB_32(&z); \
size_t matchPos = d - buf + pos - 1; \
hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \
noCase, cbi, matchPos); \
RETURN_IF_TERMINATED(rv); \
} \
} while (0)
static really_inline
u8 caseClear8(u8 x, bool noCase) {
return (u8)(noCase ? (x & (u8)0xdf) : x);
}
// Make sure the rest of the string is there. The single character scanner
// is used only for single chars with case insensitivity used correctly,
// so it can go straight to the callback if we get this far.
static really_inline
hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
size_t keyOffset, bool is_double, bool noCase,
const struct cb_info *cbi, size_t pos) {
pos -= keyOffset;
if (is_double) {
if (pos + keyLen > len) {
return HWLM_SUCCESS;
}
if (cmpForward(buf + pos, key, keyLen, noCase)) { // ret 1 on mismatch
return HWLM_SUCCESS;
}
}
pos += cbi->offsetAdj;
DEBUG_PRINTF("match @ %zu->%zu\n", pos, (pos + keyLen - 1));
hwlmcb_rv_t rv = cbi->cb(pos, (pos + keyLen - 1), cbi->id, cbi->ctx);
if (rv == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATED;
}
return HWLM_SUCCESS;
}
#if defined(__AVX2__)
#define CHUNKSIZE 32
#define MASK_TYPE m256
#include "noodle_engine_avx2.c"
#else
#define CHUNKSIZE 16
#define MASK_TYPE m128
#include "noodle_engine_sse.c"
#endif
static really_inline
hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
bool noCase, const struct cb_info *cbi) {
hwlm_error_t rv;
size_t end = len;
const MASK_TYPE mask1 = getMask(key[0], noCase);
const MASK_TYPE caseMask = getCaseMask();
if (len < CHUNKSIZE) {
rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len);
return rv;
}
if (len == CHUNKSIZE) {
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
0, len);
return rv;
}
uintptr_t data = (uintptr_t)buf;
uintptr_t s2Start = ROUNDUP_N(data, CHUNKSIZE) - data;
uintptr_t last = data + end;
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
uintptr_t s3Start = len - CHUNKSIZE;
if (s2Start) {
// first scan out to the fast scan starting point
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
0, s2Start);
RETURN_IF_TERMINATED(rv);
}
if (likely(s2Start != s2End)) {
// scan as far as we can, bounded by the last point this key can
// possibly match
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End);
rv = scanSingleFast(buf, len, key, noCase, caseMask, mask1, cbi,
s2Start, s2End);
RETURN_IF_TERMINATED(rv);
}
// if we are done bail out
if (s2End == end) {
return HWLM_SUCCESS;
}
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, end);
rv = scanSingleUnaligned(buf, len, s3Start, key, noCase, caseMask, mask1,
cbi, s2End, end);
return rv;
}
static really_inline
hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
const struct cb_info *cbi) {
hwlm_error_t rv;
// we stop scanning for the key-fragment when the rest of the key can't
// possibly fit in the remaining buffer
size_t end = len - keyLen + keyOffset + 2;
const MASK_TYPE caseMask = getCaseMask();
const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase);
const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase);
if (end - keyOffset < CHUNKSIZE) {
rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
mask1, mask2, cbi, keyOffset, end);
return rv;
}
if (end - keyOffset == CHUNKSIZE) {
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
noCase, caseMask, mask1, mask2, cbi, keyOffset,
end);
return rv;
}
uintptr_t data = (uintptr_t)buf;
uintptr_t s2Start = ROUNDUP_N(data + keyOffset, CHUNKSIZE) - data;
uintptr_t s1End = s2Start + 1;
uintptr_t last = data + end;
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
uintptr_t s3Start = end - CHUNKSIZE;
uintptr_t off = keyOffset;
if (s2Start != keyOffset) {
// first scan out to the fast scan starting point plus one char past to
// catch the key on the overlap
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
noCase, caseMask, mask1, mask2, cbi, off,
s1End);
RETURN_IF_TERMINATED(rv);
}
off = s1End;
if (s2Start >= end) {
DEBUG_PRINTF("s2 == mL %zu\n", end);
return HWLM_SUCCESS;
}
if (likely(s2Start != s2End)) {
// scan as far as we can, bounded by the last point this key can
// possibly match
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start);
rv = scanDoubleFast(buf, len, key, keyLen, keyOffset, noCase, caseMask,
mask1, mask2, cbi, s2Start, s2End);
RETURN_IF_TERMINATED(rv);
off = s2End;
}
// if there isn't enough data left to match the key, bail out
if (s2End == end) {
return HWLM_SUCCESS;
}
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end);
rv = scanDoubleUnaligned(buf, len, s3Start, key, keyLen, keyOffset, noCase,
caseMask, mask1, mask2, cbi, off, end);
return rv;
}
static really_inline
hwlm_error_t scanSingleNoCase(const u8 *buf, size_t len, const u8 *key,
const struct cb_info *cbi) {
return scanSingleMain(buf, len, key, 1, cbi);
}
static really_inline
hwlm_error_t scanSingleCase(const u8 *buf, size_t len, const u8 *key,
const struct cb_info *cbi) {
return scanSingleMain(buf, len, key, 0, cbi);
}
// Single-character specialisation, used when keyLen = 1
static really_inline
hwlm_error_t scanSingle(const u8 *buf, size_t len, const u8 *key, bool noCase,
const struct cb_info *cbi) {
if (!ourisalpha(key[0])) {
noCase = 0; // force noCase off if we don't have an alphabetic char
}
// kinda ugly, but this forces constant propagation
if (noCase) {
return scanSingleNoCase(buf, len, key, cbi);
} else {
return scanSingleCase(buf, len, key, cbi);
}
}
static really_inline
hwlm_error_t scanDoubleNoCase(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset,
const struct cb_info *cbi) {
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 1, cbi);
}
static really_inline
hwlm_error_t scanDoubleCase(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset,
const struct cb_info *cbi) {
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 0, cbi);
}
static really_inline
hwlm_error_t scanDouble(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
size_t keyOffset, bool noCase,
const struct cb_info *cbi) {
// kinda ugly, but this forces constant propagation
if (noCase) {
return scanDoubleNoCase(buf, len, key, keyLen, keyOffset, cbi);
} else {
return scanDoubleCase(buf, len, key, keyLen, keyOffset, cbi);
}
}
// main entry point for the scan code
static really_inline
hwlm_error_t scan(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
size_t keyOffset, bool noCase, const struct cb_info *cbi) {
if (len < keyLen) {
// can't find string of length keyLen in a shorter buffer
return HWLM_SUCCESS;
}
if (keyLen == 1) {
assert(keyOffset == 0);
return scanSingle(buf, len, key, noCase, cbi);
} else {
return scanDouble(buf, len, key, keyLen, keyOffset, noCase, cbi);
}
}
/** \brief Block-mode scanner. */
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
size_t offset_adj, HWLMCallback cb, void *ctxt) {
assert(n && buf);
struct cb_info cbi = { cb, n->id, ctxt, offset_adj };
DEBUG_PRINTF("nood scan of %zu bytes for %*s\n", len, n->len, n->str);
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
}
/** \brief Streaming-mode scanner. */
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
HWLMCallback cb, void *ctxt, u8 *temp_buf,
UNUSED size_t temp_buffer_size) {
assert(n);
struct cb_info cbi = {cb, n->id, ctxt, 0};
hwlm_error_t rv;
if (hlen) {
assert(hbuf);
size_t tl1 = MIN(n->len - 1, hlen);
size_t tl2 = MIN(n->len - 1, len);
size_t temp_len = tl1 + tl2;
assert(temp_len < temp_buffer_size);
memcpy(temp_buf, hbuf + hlen - tl1, tl1);
memcpy(temp_buf + tl1, buf, tl2);
cbi.offsetAdj = -tl1;
rv = scan(temp_buf, temp_len, n->str, n->len, n->key_offset, n->nocase,
&cbi);
if (rv == HWLM_TERMINATED) {
return HWLM_TERMINATED;
}
}
assert(buf);
cbi.offsetAdj = 0;
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
}

59
src/hwlm/noodle_engine.h Normal file
View File

@@ -0,0 +1,59 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Noodle literal matcher: runtime API.
*/
#ifndef NOODLE_ENGINE_H
#define NOODLE_ENGINE_H
#include "hwlm.h"
#ifdef __cplusplus
extern "C"
{
#endif
struct noodTable;
/** \brief Block-mode scanner. */
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
size_t offset_adj, HWLMCallback cb, void *ctxt);
/** \brief Streaming-mode scanner. */
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
HWLMCallback cb, void *ctxt, u8 *temp_buf,
size_t temp_buffer_size);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif

View File

@@ -0,0 +1,234 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* noodle scan parts for AVX */
static really_inline m256 getMask(u8 c, bool noCase) {
u8 k = caseClear8(c, noCase);
return set32x8(k);
}
static really_inline m256 getCaseMask(void) {
return set32x8(0xdf);
}
static really_inline
hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
const u8 *key, bool noCase, m256 caseMask,
m256 mask1, const struct cb_info *cbi,
size_t start, size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
const size_t l = end - start;
m256 v = loadu256(d);
if (noCase) {
v = and256(v, caseMask);
}
u32 z = movemask256(eq256(mask1, v));
u32 buf_off = start - offset;
u32 mask = (u32)((u64a)(1ULL << l) - 1) << buf_off;
DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
z &= mask;
SINGLE_ZSCAN();
return HWLM_SUCCESS;
}
static really_inline
hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
const u8 *key, size_t keyLen, size_t keyOffset,
bool noCase, m256 caseMask, m256 mask1,
m256 mask2, const struct cb_info *cbi,
size_t start, size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
size_t l = end - start;
m256 v = loadu256(d);
if (noCase) {
v = and256(v, caseMask);
}
u32 z0 = movemask256(eq256(mask1, v));
u32 z1 = movemask256(eq256(mask2, v));
u32 z = (z0 << 1) & z1;
// mask out where we can't match
u32 buf_off = start - offset;
u32 mask = (u32)((u64a)(1ULL << l) - 1) << buf_off;
DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
z &= mask;
DOUBLE_ZSCAN();
return HWLM_SUCCESS;
}
// The short scan routine. It is used both to scan data up to an
// alignment boundary if needed and to finish off data that the aligned scan
// function can't handle (due to small/unaligned chunk at end)
static really_inline
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
bool noCase, m256 caseMask, m256 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start;
size_t l = end - start;
DEBUG_PRINTF("l %zu\n", l);
assert(l <= 32);
if (!l) {
return HWLM_SUCCESS;
}
m256 v;
if (l < 4) {
u8 *vp = (u8*)&v;
switch (l) {
case 3: vp[2] = d[2];
case 2: vp[1] = d[1];
case 1: vp[0] = d[0];
}
} else {
v = masked_move256_len(d, l);
}
if (noCase) {
v = and256(v, caseMask);
}
// mask out where we can't match
u32 mask = (0xFFFFFFFF >> (32 - l));
u32 z = mask & movemask256(eq256(mask1, v));
SINGLE_ZSCAN();
return HWLM_SUCCESS;
}
static really_inline
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m256 caseMask, m256 mask1, m256 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start;
size_t l = end - start;
if (!l) {
return HWLM_SUCCESS;
}
assert(l <= 32);
m256 v;
DEBUG_PRINTF("d %zu\n", d - buf);
if (l < 4) {
u8 *vp = (u8*)&v;
switch (l) {
case 3: vp[2] = d[2];
case 2: vp[1] = d[1];
case 1: vp[0] = d[0];
}
} else {
v = masked_move256_len(d, l);
}
if (noCase) {
v = and256(v, caseMask);
}
u32 z0 = movemask256(eq256(mask1, v));
u32 z1 = movemask256(eq256(mask2, v));
u32 z = (z0 << 1) & z1;
// mask out where we can't match
u32 mask = (0xFFFFFFFF >> (32 - l));
z &= mask;
DOUBLE_ZSCAN();
return HWLM_SUCCESS;
}
static really_inline
hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
bool noCase, m256 caseMask, m256 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start, *e = buf + end;
assert(d < e);
for (; d < e; d += 32) {
m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
u32 z = movemask256(eq256(mask1, v));
// On large packet buffers, this prefetch appears to get us about 2%.
__builtin_prefetch(d + 128);
SINGLE_ZSCAN();
}
return HWLM_SUCCESS;
}
static really_inline
hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m256 caseMask, m256 mask1, m256 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start, *e = buf + end;
DEBUG_PRINTF("start %zu end %zu \n", start, end);
assert(d < e);
u8 lastz0 = 0;
for (; d < e; d += 32) {
m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
// we have to pull the masks out of the AVX registers because we can't
// byte shift between the lanes
u32 z0 = movemask256(eq256(mask1, v));
u32 z1 = movemask256(eq256(mask2, v));
u32 z = (lastz0 | (z0 << 1)) & z1;
lastz0 = (z0 & 0x80000000) >> 31;
// On large packet buffers, this prefetch appears to get us about 2%.
__builtin_prefetch(d + 128);
DOUBLE_ZSCAN();
}
return HWLM_SUCCESS;
}

View File

@@ -0,0 +1,202 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/* noodle scan parts for SSE */
static really_inline m128 getMask(u8 c, bool noCase) {
u8 k = caseClear8(c, noCase);
return set16x8(k);
}
static really_inline m128 getCaseMask(void) {
return set16x8(0xdf);
}
static really_inline
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
bool noCase, m128 caseMask, m128 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start;
size_t l = end - start;
DEBUG_PRINTF("l %zu\n", l);
assert(l <= 16);
if (!l) {
return HWLM_SUCCESS;
}
m128 v = zeroes128();
// we don't have a clever way of doing this move yet
memcpy(&v, d, l);
if (noCase) {
v = and128(v, caseMask);
}
// mask out where we can't match
u32 mask = (0xFFFF >> (16 - l));
u32 z = mask & movemask128(eq128(mask1, v));
SINGLE_ZSCAN();
return HWLM_SUCCESS;
}
static really_inline
hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
const u8 *key, bool noCase, m128 caseMask,
m128 mask1, const struct cb_info *cbi,
size_t start, size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
const size_t l = end - start;
m128 v = loadu128(d);
if (noCase) {
v = and128(v, caseMask);
}
u32 buf_off = start - offset;
u32 mask = ((1 << l) - 1) << buf_off;
u32 z = mask & movemask128(eq128(mask1, v));
DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
z &= mask;
SINGLE_ZSCAN();
return HWLM_SUCCESS;
}
static really_inline
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m128 caseMask, m128 mask1, m128 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start;
size_t l = end - start;
if (!l) {
return HWLM_SUCCESS;
}
assert(l <= 32);
DEBUG_PRINTF("d %zu\n", d - buf);
m128 v = zeroes128();
memcpy(&v, d, l);
if (noCase) {
v = and128(v, caseMask);
}
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
// mask out where we can't match
u32 mask = (0xFFFF >> (16 - l));
z &= mask;
DOUBLE_ZSCAN();
return HWLM_SUCCESS;
}
static really_inline
hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
const u8 *key, size_t keyLen, size_t keyOffset,
bool noCase, m128 caseMask, m128 mask1,
m128 mask2, const struct cb_info *cbi,
size_t start, size_t end) {
const u8 *d = buf + offset;
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
size_t l = end - start;
m128 v = loadu128(d);
if (noCase) {
v = and128(v, caseMask);
}
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
// mask out where we can't match
u32 buf_off = start - offset;
u32 mask = ((1 << l) - 1) << buf_off;
DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
z &= mask;
DOUBLE_ZSCAN();
return HWLM_SUCCESS;
}
static really_inline
hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
bool noCase, m128 caseMask, m128 mask1,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start, *e = buf + end;
assert(d < e);
for (; d < e; d += 16) {
m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
u32 z = movemask128(eq128(mask1, v));
// On large packet buffers, this prefetch appears to get us about 2%.
__builtin_prefetch(d + 128);
SINGLE_ZSCAN();
}
return HWLM_SUCCESS;
}
static really_inline
hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
size_t keyLen, size_t keyOffset, bool noCase,
m128 caseMask, m128 mask1, m128 mask2,
const struct cb_info *cbi, size_t start,
size_t end) {
const u8 *d = buf + start, *e = buf + end;
assert(d < e);
m128 lastz1 = zeroes128();
for (; d < e; d += 16) {
m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
m128 z1 = eq128(mask1, v);
m128 z2 = eq128(mask2, v);
u32 z = movemask128(and128(or128(lastz1, shiftLeft8Bits(z1)), z2));
lastz1 = _mm_srli_si128(z1, 15);
// On large packet buffers, this prefetch appears to get us about 2%.
__builtin_prefetch(d + 128);
DEBUG_PRINTF("z 0x%08x\n", z);
DOUBLE_ZSCAN();
}
return HWLM_SUCCESS;
}

View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Data structures for Noodle literal matcher engine.
*/
#ifndef NOODLE_INTERNAL_H_25D751C42E34A6
#define NOODLE_INTERNAL_H_25D751C42E34A6
#include "ue2common.h"
struct noodTable {
u32 id;
u32 len;
u32 key_offset;
u8 nocase;
u8 str[];
};
#endif /* NOODLE_INTERNAL_H_25D751C42E34A6 */

131
src/nfa/accel.c Normal file
View File

@@ -0,0 +1,131 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "accel.h"
#include "shufti.h"
#include "truffle.h"
#include "vermicelli.h"
#include "ue2common.h"
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
assert(ISALIGNED_N(accel, alignof(union AccelAux)));
const u8 *rv;
switch (accel->accel_type) {
case ACCEL_NONE:
DEBUG_PRINTF("accel none %p %p\n", c, c_end);
return c;
case ACCEL_VERM:
DEBUG_PRINTF("accel verm %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = vermicelliExec(accel->verm.c, 0, c, c_end);
break;
case ACCEL_VERM_NOCASE:
DEBUG_PRINTF("accel verm nc %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = vermicelliExec(accel->verm.c, 1, c, c_end);
break;
case ACCEL_DVERM:
DEBUG_PRINTF("accel dverm %p %p\n", c, c_end);
if (c + 16 + 1 >= c_end) {
return c;
}
/* need to stop one early to get an accurate end state */
rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 0, c,
c_end - 1);
break;
case ACCEL_DVERM_NOCASE:
DEBUG_PRINTF("accel dverm nc %p %p\n", c, c_end);
if (c + 16 + 1 >= c_end) {
return c;
}
/* need to stop one early to get an accurate end state */
rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 1, c,
c_end - 1);
break;
case ACCEL_SHUFTI:
DEBUG_PRINTF("accel shufti %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = shuftiExec(accel->shufti.lo, accel->shufti.hi, c, c_end);
break;
case ACCEL_TRUFFLE:
DEBUG_PRINTF("accel Truffle %p %p\n", c, c_end);
if (c + 15 >= c_end) {
return c;
}
rv = truffleExec(accel->truffle.mask1, accel->truffle.mask2, c, c_end);
break;
case ACCEL_DSHUFTI:
DEBUG_PRINTF("accel dshufti %p %p\n", c, c_end);
if (c + 15 + 1 >= c_end) {
return c;
}
/* need to stop one early to get an accurate end state */
rv = shuftiDoubleExec(accel->dshufti.lo1,
accel->dshufti.hi1,
accel->dshufti.lo2,
accel->dshufti.hi2, c, c_end - 1);
break;
case ACCEL_RED_TAPE:
DEBUG_PRINTF("accel red tape %p %p\n", c, c_end);
rv = c_end;
break;
default:
assert(!"not here");
return c;
}
DEBUG_PRINTF("adjusting for offset %u\n", accel->generic.offset);
/* adjust offset to take into account the offset */
rv = MAX(c + accel->generic.offset, rv);
rv -= accel->generic.offset;
return rv;
}

112
src/nfa/accel.h Normal file
View File

@@ -0,0 +1,112 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Acceleration: data structures and common definitions.
*/
#ifndef ACCEL_H
#define ACCEL_H
#include "ue2common.h"
/* run time defs */
#define BAD_ACCEL_DIST 4
#define SMALL_ACCEL_PENALTY 8
#define BIG_ACCEL_PENALTY 32
/// Minimum length of the scan buffer for us to attempt acceleration.
#define ACCEL_MIN_LEN 16
enum AccelType {
ACCEL_NONE,
ACCEL_VERM,
ACCEL_VERM_NOCASE,
ACCEL_DVERM,
ACCEL_DVERM_NOCASE,
ACCEL_RVERM,
ACCEL_RVERM_NOCASE,
ACCEL_RDVERM,
ACCEL_RDVERM_NOCASE,
ACCEL_REOD,
ACCEL_REOD_NOCASE,
ACCEL_RDEOD,
ACCEL_RDEOD_NOCASE,
ACCEL_SHUFTI,
ACCEL_DSHUFTI,
ACCEL_TRUFFLE,
ACCEL_RED_TAPE
};
/** \brief Structure for accel framework. */
union AccelAux {
u8 accel_type;
struct {
u8 accel_type;
u8 offset;
} generic;
struct {
u8 accel_type;
u8 offset;
u8 c; // uppercase if nocase
} verm;
struct {
u8 accel_type;
u8 offset;
u8 c1; // uppercase if nocase
u8 c2; // uppercase if nocase
} dverm;
struct {
u8 accel_type;
u8 offset;
m128 lo;
m128 hi;
} shufti;
struct {
u8 accel_type;
u8 offset;
m128 lo1;
m128 hi1;
m128 lo2;
m128 hi2;
} dshufti;
struct {
u8 accel_type;
u8 offset;
m128 mask1;
m128 mask2;
} truffle;
};
/**
* Runs the specified acceleration scheme between c and c_end, returns a point
* such that the acceleration scheme does not match before.
*/
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end);
#endif

152
src/nfa/accel_dump.cpp Normal file
View File

@@ -0,0 +1,152 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Acceleration: dump code.
*/
#include "config.h"
#include "accel.h"
#include "accel_dump.h"
#include "shufticompile.h"
#include "trufflecompile.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/dump_mask.h"
#include <cstdio>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
namespace ue2 {
static
const char *accelName(u8 accel_type) {
switch (accel_type) {
case ACCEL_NONE:
return "none";
case ACCEL_VERM:
return "vermicelli";
case ACCEL_VERM_NOCASE:
return "vermicelli nocase";
case ACCEL_DVERM:
return "double-vermicelli";
case ACCEL_DVERM_NOCASE:
return "double-vermicelli nocase";
case ACCEL_RVERM:
return "reverse vermicelli";
case ACCEL_RVERM_NOCASE:
return "reverse vermicelli nocase";
case ACCEL_RDVERM:
return "reverse double-vermicelli";
case ACCEL_RDVERM_NOCASE:
return "reverse double-vermicelli nocase";
case ACCEL_REOD:
return "reverse eod";
case ACCEL_REOD_NOCASE:
return "reverse eod nocase";
case ACCEL_RDEOD:
return "reverse double-eod";
case ACCEL_RDEOD_NOCASE:
return "reverse double-eod nocase";
case ACCEL_SHUFTI:
return "shufti";
case ACCEL_DSHUFTI:
return "double-shufti";
case ACCEL_TRUFFLE:
return "truffle";
case ACCEL_RED_TAPE:
return "red tape";
default:
return "unknown!";
}
}
void dumpAccelInfo(FILE *f, const AccelAux &accel) {
fprintf(f, " %s", accelName(accel.accel_type));
if (accel.generic.offset) {
fprintf(f, "+%hhu", accel.generic.offset);
}
switch (accel.accel_type) {
case ACCEL_VERM:
case ACCEL_VERM_NOCASE:
case ACCEL_RVERM:
case ACCEL_RVERM_NOCASE:
fprintf(f, " [\\x%02hhx]\n", accel.verm.c);
break;
case ACCEL_DVERM:
case ACCEL_DVERM_NOCASE:
case ACCEL_RDVERM:
case ACCEL_RDVERM_NOCASE:
fprintf(f, " [\\x%02hhx\\x%02hhx]\n", accel.dverm.c1, accel.dverm.c2);
break;
case ACCEL_SHUFTI: {
fprintf(f, "\n");
fprintf(f, "lo %s\n",
dumpMask((const u8 *)&accel.shufti.lo, 128).c_str());
fprintf(f, "hi %s\n",
dumpMask((const u8 *)&accel.shufti.hi, 128).c_str());
CharReach cr = shufti2cr(accel.shufti.lo, accel.shufti.hi);
fprintf(f, "count %zu class %s\n", cr.count(),
describeClass(cr).c_str());
break;
}
case ACCEL_DSHUFTI:
fprintf(f, "\n");
fprintf(f, "lo1 %s\n",
dumpMask((const u8 *)&accel.dshufti.lo1, 128).c_str());
fprintf(f, "hi1 %s\n",
dumpMask((const u8 *)&accel.dshufti.hi1, 128).c_str());
fprintf(f, "lo2 %s\n",
dumpMask((const u8 *)&accel.dshufti.lo2, 128).c_str());
fprintf(f, "hi2 %s\n",
dumpMask((const u8 *)&accel.dshufti.hi2, 128).c_str());
break;
case ACCEL_TRUFFLE: {
fprintf(f, "\n");
fprintf(f, "lo %s\n",
dumpMask((const u8 *)&accel.truffle.mask1, 128).c_str());
fprintf(f, "hi %s\n",
dumpMask((const u8 *)&accel.truffle.mask2, 128).c_str());
CharReach cr = truffle2cr(accel.truffle.mask1, accel.truffle.mask2);
fprintf(f, "count %zu class %s\n", cr.count(),
describeClass(cr).c_str());
break;
}
default:
fprintf(f, "\n");
break;
}
}
} // namespace ue2

49
src/nfa/accel_dump.h Normal file
View File

@@ -0,0 +1,49 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Acceleration: dump code.
*/
#ifndef ACCEL_DUMP_H
#define ACCEL_DUMP_H
#if defined(DUMP_SUPPORT)
#include <cstdio>
union AccelAux;
namespace ue2 {
void dumpAccelInfo(FILE *f, const AccelAux &accel);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif // ACCEL_DUMP_H

191
src/nfa/accelcompile.cpp Normal file
View File

@@ -0,0 +1,191 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "accel.h"
#include "accelcompile.h"
#include "shufticompile.h"
#include "trufflecompile.h"
#include "nfagraph/ng_limex_accel.h" /* for constants */
#include "util/bitutils.h"
#include "util/verify_types.h"
#include <map>
#include <set>
#include <vector>
using namespace std;
namespace ue2 {
static
void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
assert(aux->accel_type == ACCEL_NONE);
if (info.single_stops.all()) {
return;
}
size_t outs = info.single_stops.count();
DEBUG_PRINTF("%zu outs\n", outs);
assert(outs && outs < 256);
u32 offset = info.single_offset;
if (outs == 1) {
aux->accel_type = ACCEL_VERM;
aux->verm.offset = offset;
aux->verm.c = info.single_stops.find_first();
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
return;
}
if (outs == 2 && info.single_stops.isCaselessChar()) {
aux->accel_type = ACCEL_VERM_NOCASE;
aux->verm.offset = offset;
aux->verm.c = info.single_stops.find_first() & CASE_CLEAR;
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
aux->verm.c);
return;
}
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
if (-1 != shuftiBuildMasks(info.single_stops, &aux->shufti.lo,
&aux->shufti.hi)) {
aux->accel_type = ACCEL_SHUFTI;
aux->shufti.offset = offset;
DEBUG_PRINTF("shufti built OK\n");
return;
} else {
DEBUG_PRINTF("shufti build failed, falling through\n");
}
if (outs <= ACCEL_MAX_STOP_CHAR) {
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
aux->accel_type = ACCEL_TRUFFLE;
aux->truffle.offset = offset;
truffleBuildMasks(info.single_stops, &aux->truffle.mask1,
&aux->truffle.mask2);
return;
}
DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs);
}
static
bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) {
// test for vector containing <A,Z> <A,z> <a,Z> <a,z>
if (stop.size() != 4) {
return false;
}
const u8 a = stop.begin()->first & CASE_CLEAR;
const u8 b = stop.begin()->second & CASE_CLEAR;
flat_set<pair<u8, u8>>::const_iterator it, ite;
for (it = stop.begin(), ite = stop.end(); it != ite; ++it) {
if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) {
return false;
}
}
return true;
}
static
void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
size_t outs1 = info.double_stop1.count();
size_t outs2 = info.double_stop2.size();
u8 offset = verify_u8(info.double_offset);
DEBUG_PRINTF("outs1=%zu, outs2=%zu\n", outs1, outs2);
assert(aux->accel_type == ACCEL_NONE);
if (!outs2) {
/* no double byte accel available */
return;
}
// double-byte accel
if (outs1 == 0 && outs2 == 1) {
aux->accel_type = ACCEL_DVERM;
aux->dverm.offset = offset;
aux->dverm.c1 = info.double_stop2.begin()->first;
aux->dverm.c2 = info.double_stop2.begin()->second;
DEBUG_PRINTF("building double-vermicelli caseful for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
return;
}
if (outs1 == 0 && isCaselessDouble(info.double_stop2)) {
aux->accel_type = ACCEL_DVERM_NOCASE;
aux->dverm.offset = offset;
aux->dverm.c1 = info.double_stop2.begin()->first & CASE_CLEAR;
aux->dverm.c2 = info.double_stop2.begin()->second & CASE_CLEAR;
DEBUG_PRINTF("building double-vermicelli caseless for 0x%02hhx%02hhx\n",
aux->dverm.c1, aux->dverm.c2);
return;
}
if (outs1 + outs2 <= 8) {
if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu"
" two-byte literals\n", outs1, outs2);
aux->accel_type = ACCEL_DSHUFTI;
aux->dshufti.offset = offset;
shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
&aux->dshufti.lo1,
&aux->dshufti.hi1,
&aux->dshufti.lo2,
&aux->dshufti.hi2);
return;
}
}
// drop back to attempt single-byte accel
DEBUG_PRINTF("dropping back to single-byte acceleration\n");
aux->accel_type = ACCEL_NONE;
}
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
assert(aux->accel_type == ACCEL_NONE);
if (info.single_stops.none()) {
DEBUG_PRINTF("picked red tape\n");
aux->accel_type = ACCEL_RED_TAPE;
aux->generic.offset = info.single_offset;
} else {
buildAccelDouble(info, aux);
}
if (aux->accel_type == ACCEL_NONE) {
buildAccelSingle(info, aux);
}
assert(aux->accel_type == ACCEL_NONE
|| aux->generic.offset == info.single_offset
|| aux->generic.offset == info.double_offset);
return aux->accel_type != ACCEL_NONE;
}
} // namespace ue2

56
src/nfa/accelcompile.h Normal file
View File

@@ -0,0 +1,56 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ACCEL_COMPILE_H
#define ACCEL_COMPILE_H
#include "ue2common.h"
#include "util/charreach.h"
#include "util/ue2_containers.h"
union AccelAux;
namespace ue2 {
struct AccelInfo {
AccelInfo() : single_offset(0U), double_offset(0U),
single_stops(CharReach::dot()) {}
u32 single_offset; /**< offset correction to apply to single schemes */
u32 double_offset; /**< offset correction to apply to double schemes */
CharReach double_stop1; /**< single-byte accel stop literals for double
* schemes */
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
* literals */
CharReach single_stops; /**< escapes for single byte acceleration */
};
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
} // namespace ue2
#endif

76
src/nfa/callback.h Normal file
View File

@@ -0,0 +1,76 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief NFA Callback definitions, used at runtime.
*/
#ifndef NFA_CALLBACK_H
#define NFA_CALLBACK_H
#include "ue2common.h"
/** \brief The type for an NFA callback.
*
* This is a function that takes as arguments the current offset where the
* match occurs, the id of the match and the context pointer that was passed
* into the NFA API function that executed the NFA.
*
* The offset where the match occurs will be the offset after the character
* that caused the match. Thus, if we have a buffer containing 'abc', then a
* pattern that matches an empty string will have an offset of 0, a pattern
* that matches 'a' will have an offset of 1, and a pattern that matches 'abc'
* will have an offset of 3, which will be a value that is 'beyond' the size of
* the buffer. That is, if we have n characters in the buffer, there are n+1
* different potential offsets for matches.
*
* This function should return an int - currently the possible return values
* are 0, which means 'stop running the engine' or non-zero, which means
* 'continue matching'.
*/
typedef int (*NfaCallback)(u64a offset, ReportID id, void *context);
/** \brief The type for an NFA callback which also tracks start of match.
*
* see \ref NfaCallback
*/
typedef int (*SomNfaCallback)(u64a from_offset, u64a to_offset, ReportID id,
void *context);
/**
* standard \ref NfaCallback return value indicating that engine execution
* should continue. (any non-zero value will serve this purpose)
*/
#define MO_CONTINUE_MATCHING 1
/**
* \ref NfaCallback return value indicating that engine execution should halt.
*/
#define MO_HALT_MATCHING 0
#endif // NFA_CALLBACK_H

1016
src/nfa/castle.c Normal file

File diff suppressed because it is too large Load Diff

64
src/nfa/castle.h Normal file
View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef NFA_CASTLE_H
#define NFA_CASTLE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "ue2common.h"
struct mq;
struct NFA;
char nfaExecCastle0_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
struct mq *q);
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecCastle0_queueCompressState(const struct NFA *nfa,
const struct mq *q, s64a loc);
char nfaExecCastle0_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecCastle0_testEOD NFA_API_NO_IMPL
#define nfaExecCastle0_B_Reverse NFA_API_NO_IMPL
#define nfaExecCastle0_zombie_status NFA_API_NO_IMPL
#ifdef __cplusplus
}
#endif // __cplusplus
#endif

116
src/nfa/castle_dump.cpp Normal file
View File

@@ -0,0 +1,116 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Castle: multi-tenant repeat engine, dump code.
*/
#include "config.h"
#include "castle_dump.h"
#include "castle_internal.h"
#include "nfa_dump_internal.h"
#include "nfa_internal.h"
#include "shufticompile.h"
#include "trufflecompile.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
namespace ue2 {
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *) {
// No GraphViz output for Castles.
}
static
void dumpTextSubCastle(const SubCastle &sub, FILE *f) {
const RepeatInfo *info =
(const RepeatInfo *)((const char *)&sub + sub.repeatInfoOffset);
fprintf(f, " repeat model: %s\n", repeatTypeName(info->type));
fprintf(f, " repeat bounds: {%u, %u}\n", info->repeatMin,
info->repeatMax);
fprintf(f, " min period: %u\n", info->minPeriod);
fprintf(f, " report: %u\n", sub.report);
fprintf(f, " full state offset: %u\n", sub.fullStateOffset);
fprintf(f, " stream state offset: %u\n", sub.streamStateOffset);
fprintf(f, "\n");
}
void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) {
const Castle *c = (const Castle *)getImplNfa(nfa);
fprintf(f, "Castle multi-tenant repeat engine\n");
fprintf(f, "\n");
fprintf(f, "Number of repeat tenants: %u\n", c->numRepeats);
fprintf(f, "Scan type: ");
switch (c->type) {
case CASTLE_DOT:
fprintf(f, "dot\n");
break;
case CASTLE_VERM:
fprintf(f, "verm, scanning for 0x%02x\n", c->u.verm.c);
break;
case CASTLE_NVERM:
fprintf(f, "negated verm, scanning for 0x%02x\n", c->u.verm.c);
break;
case CASTLE_SHUFTI: {
const CharReach cr = shufti2cr(c->u.shuf.mask_lo, c->u.shuf.mask_hi);
fprintf(f, "shufti, scanning for %s (%zu chars)\n",
describeClass(cr).c_str(), cr.count());
break;
}
case CASTLE_TRUFFLE: {
const CharReach cr = truffle2cr(c->u.truffle.mask1, c->u.truffle.mask2);
fprintf(f, "truffle, scanning for %s (%zu chars)\n",
describeClass(cr).c_str(), cr.count());
break;
}
default:
fprintf(f, "unknown type %u\n", c->type);
break;
}
fprintf(f, "\n");
dumpTextReverse(nfa, f);
fprintf(f, "\n");
const SubCastle *sub =
(const SubCastle *)((const char *)c + sizeof(Castle));
for (u32 i = 0; i < c->numRepeats; i++) {
fprintf(f, "Sub %u:\n", i);
dumpTextSubCastle(sub[i], f);
}
}
} // namespace ue2

47
src/nfa/castle_dump.h Normal file
View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CASTLE_DUMP_H
#define CASTLE_DUMP_H
#if defined(DUMP_SUPPORT)
#include <cstdio>
struct NFA;
namespace ue2 {
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file);
void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);
} // namespace ue2
#endif // DUMP_SUPPORT
#endif

101
src/nfa/castle_internal.h Normal file
View File

@@ -0,0 +1,101 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Castle: multi-tenant repeat engine, data structures.
*/
#ifndef NFA_CASTLE_INTERNAL_H
#define NFA_CASTLE_INTERNAL_H
#include "ue2common.h"
#include "repeat_internal.h"
struct SubCastle {
ReportID report; //!< report to raise on match
u32 fullStateOffset; //!< offset within full state (scratch)
u32 streamStateOffset; //!< offset within stream state
u32 repeatInfoOffset; //!< offset of RepeatInfo structure
// relative to the start of SubCastle
char exclusive; //!< exclusive info of this SubCastle
};
#define CASTLE_DOT 0
#define CASTLE_VERM 1
#define CASTLE_NVERM 2
#define CASTLE_SHUFTI 3
#define CASTLE_TRUFFLE 4
/**
* \brief Castle engine structure.
*
* A Castle is a collection of repeats that all share the same character
* reachability.
*
* The whole engine is laid out in memory as:
*
* - struct NFA
* - struct Castle
* - struct SubCastle[numRepeats]
* - tables for sparse model repeats
*
* Castle stores an "active repeats" multibit in stream state, followed by the
* packed repeat state for each SubCastle. If all SubCastles are mutual
* exclusive, we store current active SubCastle id instead of "active repeats"
* multibit in stream state. If there are both exclusive and non-exclusive
* SubCastle groups, we use an active id for the exclusive group and a multibit
* for the non-exclusive group.
*
* In full state (stored in scratch space) it stores a temporary multibit over
* the repeats (used by \ref castleMatchLoop), followed by the repeat control
* blocks for each SubCastle. If all SubCastles are mutual exclusive, we only
* need to store the repeat control blocks for each SubCastle.
*/
struct ALIGN_AVX_DIRECTIVE Castle {
u32 numRepeats;
u8 type; //!< tells us which scanning mechanism (below) to use
char exclusive; //!< tells us if there are mutual exclusive SubCastles
char pureExclusive; //!< tells us if all SubCastles are mutual exclusive
u8 activeIdxSize; //!< number of bytes in stream state to store
// active SubCastle id for exclusive mode
union {
struct {
char c;
} verm;
struct {
m128 mask_lo;
m128 mask_hi;
} shuf;
struct {
m128 mask1;
m128 mask2;
} truffle;
} u;
};
#endif // NFA_CASTLE_INTERNAL_H

1029
src/nfa/castlecompile.cpp Normal file

File diff suppressed because it is too large Load Diff

146
src/nfa/castlecompile.h Normal file
View File

@@ -0,0 +1,146 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Castle: multi-tenant repeat engine, compiler code.
*/
#ifndef NFA_CASTLECOMPILE_H
#define NFA_CASTLECOMPILE_H
#include "nfa_kind.h"
#include "ue2common.h"
#include "nfagraph/ng_repeat.h"
#include "util/alloc.h"
#include "util/depth.h"
#include <map>
#include <memory>
#include <set>
#include <vector>
struct NFA;
namespace ue2 {
class CharReach;
class NGHolder;
struct CompileContext;
/**
* \brief Prototype for a Castle engine: contains at least one CastleRepeat.
*
* Currently, all repeats in a Castle must have the same character
* reachability.
*
* A CastleProto is converted into a single NFA, with each top triggering a
* unique repeat. A CastleProto can contain at most CastleProto::max_occupancy
* elements.
*/
struct CastleProto {
static constexpr size_t max_occupancy = 65536; // arbitrary limit
explicit CastleProto(const PureRepeat &pr);
const CharReach &reach() const;
u32 add(const PureRepeat &pr);
/**
* \brief Merge in the given repeat, returning the top used.
*
* If the repeat already exists in this castle, we will re-use (and return)
* the old top. If it doesn't, it will be added and assigned a new top.
* Returns \ref max_occupancy if capacity would be exceeded.
*/
u32 merge(const PureRepeat &pr);
/** \brief Mapping from unique top id to repeat. */
std::map<u32, PureRepeat> repeats;
};
std::set<ReportID> all_reports(const CastleProto &proto);
depth findMinWidth(const CastleProto &proto);
depth findMaxWidth(const CastleProto &proto);
/**
* \brief Remap tops to be contiguous.
*
* Remap the tops in the given CastleProto so that they're contiguous in the
* range [0 .. N-1].
*/
void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map);
/**
* \brief Construct an NFA from a CastleProto.
*
* NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run
* first.
*/
ue2::aligned_unique_ptr<NFA>
buildCastle(const CastleProto &proto,
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
const CompileContext &cc);
/**
* \brief Merge two CastleProto prototypes together, if possible.
*
* Returns true if merge of all repeats in c2 into c1 succeeds, and fills
* mapping with the repeat indices.
*/
bool mergeCastle(CastleProto &c1, const CastleProto &c2,
std::map<u32, u32> &top_map);
/**
* \brief True if the two castles are identical with respect to the reports
* given; i.e. the same tops lead to the same repeats, just with report1 in c1
* and report2 in c2.
*
* Repeats leading to other reports are ignored.
*/
bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
ReportID report2);
/**
* \brief True if the two castles given are identical.
*/
bool is_equal(const CastleProto &c1, const CastleProto &c2);
/**
* \brief True if the given castle contains more than a single instance of any
* of the reports in the given set.
*/
bool requiresDedupe(const CastleProto &proto, const std::set<ReportID> &reports);
/**
* \brief Build an NGHolder from a CastleProto.
*/
std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle, nfa_kind kind,
const CompileContext &cc);
} // namespace ue2
#endif // NFA_CASTLECOMPILE_H

351
src/nfa/dfa_min.cpp Normal file
View File

@@ -0,0 +1,351 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Build code for DFA minimization
*/
/**
* /Summary of the Hopcrofts algorithm/
* partition := {F, Q \ F};
* work_queue := {F};
* while (work_queue is not empty) do
* choose and remove a set A from work_queue
* for each c in . do
* let X be the set of states for which a transition on c
* leads to a state in A
* for each set Y in partition for which X . Y is nonempty and
* Y \ X is nonempty do
* replace Y in partition by the two sets X . Y and Y \ X
* if Y is in work_queue
* replace Y in work_queue by the same two sets
* else
* if |X . Y| <= |Y \ X|
* add X . Y to work_queue
* else
* add Y \ X to work_queue
* end;
* end;
* end;
*/
#include "dfa_min.h"
#include "grey.h"
#include "nfa/rdfa.h"
#include "nfagraph/ng_mcclellan.h"
#include "ue2common.h"
#include "util/partitioned_set.h"
#include "util/container.h"
#include "util/ue2_containers.h"
#include <algorithm>
#include <functional>
#include <map>
#include <set>
#include <vector>
#include <iterator>
#include <boost/core/noncopyable.hpp>
#include <boost/dynamic_bitset.hpp>
using namespace std;
namespace ue2 {
namespace {
struct hopcroft_state_info {
vector<vector<dstate_id_t> > prev;
};
struct DFA_components : boost::noncopyable {
dstate_id_t nstates;
size_t inp_size;
set<size_t> work_queue;
/*Partition contains reduced states*/
partitioned_set<dstate_id_t> partition;
vector<hopcroft_state_info> states;
explicit DFA_components(const raw_dfa &rdfa);
};
} //namespace
/**
* create_map:
* Creates an initial partitioning and work_queue.
* Initial partition contains {accepting states..., Non-accepting states}
* Initial work_queue contains accepting state subsets
*
* The initial partitioning needs to distinguish between the different
* reporting behaviours (unlike standard hopcroft) --> more than one subset
* possible for the accepting states.
*
* Look for accepting states in both reports and reports_eod.
* Creates a map with a key(reports, reports_eod) and an id.
* Reports of each state are searched against the map and
* added to the corresponding id -> partition[id] and work_queue[id].
* Non Accept states are added to partition[id+1].
*/
static
vector<size_t> create_map(const raw_dfa &rdfa, set<size_t> &work_queue) {
using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>;
map<ReportKey, size_t> subset_map;
vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET);
for (size_t i = 0; i < rdfa.states.size(); i++) {
if (!rdfa.states[i].reports.empty() ||
!rdfa.states[i].reports_eod.empty()) {
ReportKey key(rdfa.states[i].reports, rdfa.states[i].reports_eod);
if (contains(subset_map, key)) {
state_to_subset[i] = subset_map[key];
} else {
size_t sub = subset_map.size();
subset_map[key] = sub;
state_to_subset[i] = sub;
work_queue.insert(sub);
}
}
}
/* handle non accepts */
size_t non_accept_sub = subset_map.size();
for (size_t i = 0; i < state_to_subset.size(); i++) {
if (state_to_subset[i] == INVALID_SUBSET) {
state_to_subset[i] = non_accept_sub;
}
}
return state_to_subset;
}
DFA_components::DFA_components(const raw_dfa &rdfa)
: nstates(rdfa.states.size()),
inp_size(rdfa.states[nstates - 1].next.size()),
partition(create_map(rdfa, work_queue)) {
/* initializing states */
for (size_t i = 0; i < nstates; i++) {
states.push_back(hopcroft_state_info());
states.back().prev.resize(inp_size);
}
for (size_t i = 0; i < nstates; i++) { // i is the previous state
for (size_t j = 0; j < inp_size; j++) {
/* Creating X_table */
dstate_id_t present_state = rdfa.states[i].next[j];
states[present_state].prev[j].push_back(i);
DEBUG_PRINTF("rdfa.states[%zu].next[%zu] %hu \n", i, j,
rdfa.states[i].next[j]);
}
}
}
/**
* choose and remove a set A from work_queue.
*/
static
void get_work_item(DFA_components &mdfa, ue2::flat_set<dstate_id_t> &A) {
A.clear();
assert(!mdfa.work_queue.empty());
set<size_t>::iterator pt = mdfa.work_queue.begin();
insert(&A, mdfa.partition[*pt]);
mdfa.work_queue.erase(pt);
}
/**
* X is the set of states for which a transition on the input leads to a state
* in A.
*/
static
void create_X(const DFA_components &mdfa, const ue2::flat_set<dstate_id_t> &A,
size_t inp, ue2::flat_set<dstate_id_t> &X) {
X.clear();
for (dstate_id_t id : A) {
insert(&X, mdfa.states[id].prev[inp]);
}
}
/**
* For a split set X, each subset S (given by part_index) in the partition, two
* sets are created: v_inter (X intersection S) and v_sub (S - X).
*
* For each subset S in the partition that could be split (v_inter is nonempty
* and v_sub is nonempty):
* - replace S in partition by the two sets v_inter and v_sub.
* - if S is in work_queue:
* - replace S in work_queue by the two subsets.
* - else:
* - replace S in work_queue by the smaller of the two sets.
*/
static
void split_and_replace_set(const size_t part_index, DFA_components &mdfa,
const ue2::flat_set<dstate_id_t> &splitter) {
/* singleton sets cannot be split */
if (mdfa.partition[part_index].size() == 1) {
return;
}
size_t small_index = mdfa.partition.split(part_index, splitter);
if (small_index == INVALID_SUBSET) {
/* the set could not be split */
return;
}
/* larger subset remains at the input subset index, if the input subset was
* already in the work queue then the larger subset will remain there. */
mdfa.work_queue.insert(small_index);
}
/**
* The complete Hopcrofts algorithm is implemented in this function.
* Choose and remove a set tray from work_queue
* For each input- X is created.
* For each subset in the partition, split_and_replace_sets are called with the
* split set.
*/
static
void dfa_min(DFA_components &mdfa) {
ue2::flat_set<dstate_id_t> A, X;
vector<size_t> cand_subsets;
while (!mdfa.work_queue.empty()) {
get_work_item(mdfa, A);
for (size_t inp = 0; inp < mdfa.inp_size; inp++) {
create_X(mdfa, A, inp, X);
if (X.empty()) {
continue;
}
/* we only need to consider subsets with at least one member in X for
* splitting */
cand_subsets.clear();
mdfa.partition.find_overlapping(X, &cand_subsets);
for (size_t sub : cand_subsets) {
split_and_replace_set(sub, mdfa, X);
}
}
}
}
/**
* Creating new dfa table
* Map ordering contains key being an equivalence classes first state
* and the value being the equivalence class index.
* Eq_state[i] tells us new state id the equivalence class located at
* partition[i].
*/
static
void mapping_new_states(const DFA_components &mdfa,
vector<dstate_id_t> &old_to_new,
raw_dfa &rdfa) {
const size_t num_partitions = mdfa.partition.size();
// Mapping from equiv class's first state to equiv class index.
map<dstate_id_t, size_t> ordering;
// New state id for each equiv class.
vector<dstate_id_t> eq_state(num_partitions);
for (size_t i = 0; i < num_partitions; i++) {
ordering[*mdfa.partition[i].begin()] = i;
}
dstate_id_t new_id = 0;
for (const auto &m : ordering) {
eq_state[m.second] = new_id++;
}
for (size_t t = 0; t < mdfa.partition.size(); t++) {
for (dstate_id_t id : mdfa.partition[t]) {
old_to_new[id] = eq_state[t];
}
}
vector<dstate> new_states;
new_states.reserve(num_partitions);
for (size_t i = 0; i < mdfa.nstates; i++) {
if (contains(ordering, i)) {
new_states.push_back(rdfa.states[i]);
}
}
rdfa.states.swap(new_states);
}
static
void renumber_new_states(const DFA_components &mdfa,
const vector<dstate_id_t> &old_to_new,
raw_dfa &rdfa) {
for (size_t i = 0; i < mdfa.partition.size(); i++) {
for (size_t j = 0; j < mdfa.inp_size; j++) {
dstate_id_t output = rdfa.states[i].next[j];
rdfa.states[i].next[j] = old_to_new[output];
}
dstate_id_t dad = rdfa.states[i].daddy;
rdfa.states[i].daddy = old_to_new[dad];
}
rdfa.start_floating = old_to_new[rdfa.start_floating];
rdfa.start_anchored = old_to_new[rdfa.start_anchored];
}
static
void new_dfa(raw_dfa &rdfa, const DFA_components &mdfa) {
if (mdfa.partition.size() != mdfa.nstates) {
vector<dstate_id_t> old_to_new(mdfa.nstates);
mapping_new_states(mdfa, old_to_new, rdfa);
renumber_new_states(mdfa, old_to_new, rdfa);
}
}
/**
* MAIN FUNCTION
*/
void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
if (!grey.minimizeDFA) {
return;
}
UNUSED const size_t states_before = rdfa.states.size();
DFA_components mdfa(rdfa);
dfa_min(mdfa);
new_dfa(rdfa, mdfa);
DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before,
rdfa.states.size());
}
} // namespace ue2

45
src/nfa/dfa_min.h Normal file
View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Build code for McClellan DFA.
*/
#ifndef DFA_MIN_H
#define DFA_MIN_H
namespace ue2 {
struct raw_dfa;
struct Grey;
void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey);
} // namespace ue2
#endif

1153
src/nfa/gough.c Normal file

File diff suppressed because it is too large Load Diff

82
src/nfa/gough.h Normal file
View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GOUGH_H
#define GOUGH_H
#include "callback.h"
#include "ue2common.h"
struct NFA;
struct mq;
// 8-bit Gough
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb,
void *context);
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q,
s64a loc);
char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecGough8_B_Reverse NFA_API_NO_IMPL
#define nfaExecGough8_zombie_status NFA_API_NO_IMPL
// 16-bit Gough
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
const char *streamState, u64a offset,
NfaCallback callback, SomNfaCallback som_cb,
void *context);
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
void *state, u8 key);
char nfaExecGough16_queueCompressState(const struct NFA *nfa,
const struct mq *q, s64a loc);
char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
const void *src, u64a offset, u8 key);
#define nfaExecGough16_B_Reverse NFA_API_NO_IMPL
#define nfaExecGough16_zombie_status NFA_API_NO_IMPL
#endif

134
src/nfa/gough_internal.h Normal file
View File

@@ -0,0 +1,134 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GOUGH_INTERNAL_H
#define GOUGH_INTERNAL_H
#include "accel.h"
#include "mcclellan_internal.h"
#include "ue2common.h"
#define INVALID_SLOT (~0U)
#define GOUGH_INS_END 0
#define GOUGH_INS_MOV 1
#define GOUGH_INS_NEW 2
#define GOUGH_INS_MIN 3
/* todo: add instructions targeting acc reg? */
struct gough_ins {
u32 op; /* u32 to avoid padding */
u32 dest;
u32 src; /* for GOUGH_INS_NEW, this specifies the adjustment to apply to the
* current offset */
};
/*
* HAPPY FUN ASCII ART TIME
*
* ----
* | | struct NFA
* ----
* ~~~~ normal(ish) mcclellan engine
* ~~~~
* ~~~~
* ~~~~
* ~~~~
* ~~~~
* ~~~~
* ~~~~
* ---- = m->haig_offset
* | | } struct gough_info
* ----
* | | }
* | | } edge prog table -> provides the offset of the start of the program
* | | } to run when the edge is taken. 0 indicates no
* | | } work to do
* ---- = h->top_prog_offset
* | | }
* | | } top prog table -> provides the offset of the start of the program
* | | } to run when a top is taken from this state. 0
* | | } indicates nothing to do
* ---- = h->prog_base_offset
* | | }
* | | } programs to run
* | | }
* | | }
* ----
*/
struct gough_info {
u32 top_prog_offset; /**< offset to the base of the top prog table */
u32 prog_base_offset; /**< not used at runtime */
u32 stream_som_loc_count; /**< number of som locs in the stream state */
u8 stream_som_loc_width; /**< number of bytes per som loc */
};
static really_inline
const struct gough_info *get_gough(const struct mcclellan *m) {
assert(m->haig_offset);
const char *n = (const char *)m - sizeof(struct NFA);
return (const struct gough_info *)(n + m->haig_offset);
}
static really_inline
const u32 *get_gough_top_offsets(const struct mcclellan *m) {
const struct gough_info *g = get_gough(m);
if (!g->top_prog_offset) {
return NULL;
}
const char *n = (const char *)m - sizeof(struct NFA);
return (const u32 *)(n + g->top_prog_offset);
}
/* Gough state representation in scratch.
*
* During execution, gough tracks a number of variables containing potential
* starts of match. These are all stored in a large array of u64a slots.
*/
struct gough_som_info {
u64a slots[1]; /* 'flexible' member array */
};
struct gough_report {
ReportID r;
u32 som; /* som slot to report */
};
struct gough_report_list {
u32 count;
struct gough_report report[];
};
struct gough_accel {
union AccelAux accel;
u8 margin_dist;
u32 prog_offset;
};
#endif

1320
src/nfa/goughcompile.cpp Normal file

File diff suppressed because it is too large Load Diff

93
src/nfa/goughcompile.h Normal file
View File

@@ -0,0 +1,93 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GOUGHCOMPILE_H
#define GOUGHCOMPILE_H
#include "mcclellancompile.h"
#include "nfa_kind.h"
#include "ue2common.h"
#include "util/alloc.h"
#include "util/ue2_containers.h"
#include "util/order_check.h"
#include <map>
#include <memory>
#include <set>
#include <vector>
namespace ue2 {
#define CREATE_NEW_SOM (~0U)
/* dest nfa state -> som info for dest state is min of provided loc idx som
* info */
typedef flat_map<u32, std::vector<u32>> som_tran_info;
struct som_report {
som_report(ReportID r, u32 s) : report(r), slot(s) {}
ReportID report;
u32 slot;
bool operator<(const som_report &b) const {
const som_report &a = *this;
ORDER_CHECK(report);
ORDER_CHECK(slot);
return false;
}
};
struct dstate_som {
std::set<som_report> reports;
std::set<som_report> reports_eod;
som_tran_info preds; /* live nfa states mapped back to pred states */
};
struct raw_som_dfa : public raw_dfa {
raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in)
: raw_dfa(k), unordered_som_triggers(unordered_som_triggers_in) {
assert(!unordered_som_triggers || is_triggered(kind));
}
std::vector<dstate_som> state_som;
u32 stream_som_loc_width;
bool unordered_som_triggers;
void stripExtraEodReports(void) override;
std::map<u32, u32> new_som_nfa_states; /* map nfa vertex id -> offset */
u32 trigger_nfa_state; /* for triggered cases, slot_id that contains a new
* som */
};
aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
const CompileContext &cc);
} // namespace ue2
#endif

View File

@@ -0,0 +1,281 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "goughcompile_internal.h"
#include "gough_internal.h"
#include "grey.h"
#include "mcclellancompile.h"
#include "util/container.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include "ue2common.h"
#include <map>
#include <vector>
using namespace std;
namespace ue2 {
template<typename Graph>
void add_edge_if_not_selfloop(const typename Graph::vertex_descriptor &u,
const typename Graph::vertex_descriptor &v,
Graph &g) {
if (u != v) {
add_edge(u, v, g);
}
}
static
bool can_accel_over_selfloop(const GoughVertexProps &vp, const GoughEdge &e,
const GoughEdgeProps &ep, u32 *margin) {
if (vp.vars.empty() && ep.vars.empty()) {
/* if we update no som information, then it is trivial to accelerate */
*margin = 0;
return true;
}
/* if the effect of running a self loop stabilises after a small number of
* iterations, it is possible to accelerate over the state and only then run
* the block N times. To model this we create a graph which shows how the
* value for a variable at the end of a self loop block is related to values
* at the start */
typedef boost::adjacency_list<boost::vecS, boost::vecS,
boost::bidirectionalS> basic_graph;
typedef basic_graph::vertex_descriptor basic_vertex;
basic_graph bg;
map<const GoughSSAVar *, basic_vertex> verts;
/* create verts */
for (const auto &var : ep.vars) {
verts[var.get()] = add_vertex(bg);
}
for (const auto &var : vp.vars) {
verts[var.get()] = add_vertex(bg);
}
/* wire edges */
set<basic_vertex> done;
for (const auto &var : ep.vars) {
assert(contains(verts, var.get()));
basic_vertex v = verts[var.get()];
for (GoughSSAVar *pred : var->get_inputs()) {
if (!contains(verts, pred)) {
continue;
}
basic_vertex u = verts[pred];
if (contains(done, u)) { /* u has already taken on new values this
* iteration */
for (auto p : inv_adjacent_vertices_range(u, bg)) {
add_edge_if_not_selfloop(p, v, bg);
}
} else {
add_edge_if_not_selfloop(u, v, bg);
}
}
done.insert(v);
}
for (const auto &var : vp.vars) {
GoughSSAVar *pred = var->get_input(e);
assert(contains(verts, var.get()));
basic_vertex v = verts[var.get()];
if (!contains(verts, pred)) {
continue;
}
basic_vertex u = verts[pred];
if (contains(done, u)) { /* u has already taken on new values this
* iteration */
for (auto p : inv_adjacent_vertices_range(u, bg)) {
add_edge_if_not_selfloop(p, v, bg);
}
} else {
add_edge_if_not_selfloop(u, v, bg);
}
/* do not add v to done as all joins happen in parallel */
}
/* check for loops - non self loops may prevent settling */
if (!is_dag(bg)) {
DEBUG_PRINTF("can not %u accel as large loops\n", vp.state_id);
return false;
}
*margin = num_vertices(bg); /* TODO: be less conservative */
if (*margin > 50) {
return false;
}
return true;
}
static
bool verify_neighbour(const GoughGraph &g, GoughVertex u,
const map<gough_edge_id, vector<gough_ins> > &blocks,
const set<GoughVertex> &succs,
const vector<gough_ins> &block_sl) {
for (const auto &e : out_edges_range(u, g)) {
if (!g[e].reach.any()) { /* ignore top edges */
continue;
}
GoughVertex t = target(e, g);
if (!contains(succs, t)) { /* must be an escape string */
continue;
}
if (!contains(blocks, gough_edge_id(g, e))) {
return false;
}
if (blocks.at(gough_edge_id(g, e)) != block_sl) {
return false;
}
}
return true;
}
static
bool verify_neighbour_no_block(const GoughGraph &g, GoughVertex u,
const map<gough_edge_id, vector<gough_ins> > &blocks,
const set<GoughVertex> &succs) {
for (const auto &e : out_edges_range(u, g)) {
if (!g[e].reach.any()) { /* ignore top edges */
continue;
}
GoughVertex t = target(e, g);
if (!contains(succs, t)) { /* must be an escape string */
continue;
}
if (contains(blocks, gough_edge_id(g, e))) {
return false;
}
}
return true;
}
/* Checks the som aspects of allowing two byte accel - it is expected that the
* mcclellan logic will identify escape strings.
*
* For 2 byte acceleration to be correct we require that any non-escape sequence
* characters xy from the accel state has the same effect as just the character
* of y.
*
* The current way of ensuring this is to require:
* (a) all edges out of the cyclic state behave identically to the cyclic self
* loop edge
* (b) edges out of the neighbouring state which do not correspond to escape
* string behave identical to the cyclic state edges.
*
* TODO: these restrictions could be relaxed by looking at the effect on
* relevant (live?) vars only, allowing additions to the escape string set, and
* considering one byte escapes.
*/
static
bool allow_two_byte_accel(const GoughGraph &g,
const map<gough_edge_id, vector<gough_ins> > &blocks,
GoughVertex v, const GoughEdge &self_loop) {
if (contains(blocks, gough_edge_id(g, self_loop))) {
DEBUG_PRINTF("edge plan on self loop\n");
const auto &block_sl = blocks.at(gough_edge_id(g, self_loop));
set<GoughVertex> succs;
for (const auto &e : out_edges_range(v, g)) {
if (g[e].reach.none()) { /* ignore top edges */
continue;
}
gough_edge_id ged(g, e);
if (!contains(blocks, ged) || blocks.at(ged) != block_sl) {
DEBUG_PRINTF("different out-edge behaviour\n");
return false;
}
succs.insert(target(e, g));
}
for (auto w : adjacent_vertices_range(v, g)) {
if (w != v && !verify_neighbour(g, w, blocks, succs, block_sl)) {
return false;
}
}
} else {
DEBUG_PRINTF("no edge plan on self loop\n");
set<GoughVertex> succs;
for (const auto &e : out_edges_range(v, g)) {
if (g[e].reach.none()) { /* ignore top edges */
continue;
}
gough_edge_id ged(g, e);
if (contains(blocks, ged)) {
DEBUG_PRINTF("different out-edge behaviour\n");
return false;
}
succs.insert(target(e, g));
for (auto w : adjacent_vertices_range(v, g)) {
if (w != v && !verify_neighbour_no_block(g, w, blocks, succs)) {
return false;
}
}
}
}
DEBUG_PRINTF("allowing two byte accel for %u\n", g[v].state_id);
return true;
}
void find_allowed_accel_states(const GoughGraph &g,
const map<gough_edge_id, vector<gough_ins> > &blocks,
map<dstate_id_t, gough_accel_state_info> *out) {
for (auto v : vertices_range(g)) {
GoughEdge e;
if (!find_normal_self_loop(v, g, &e)) {
continue; /* not accelerable */
}
u32 margin = 0;
if (!can_accel_over_selfloop(g[v], e, g[e], &margin)) {
continue; /* not accelerable */
}
bool tba = allow_two_byte_accel(g, blocks, v, e);
out->emplace(g[v].state_id, gough_accel_state_info(margin, tba));
}
}
} // namespace ue2

View File

@@ -0,0 +1,334 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "goughcompile_dump.h"
#include "goughcompile_internal.h"
#include "grey.h"
#include "util/container.h"
#include "util/graph_range.h"
#include <string>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
string dump_name(const GoughVertexProps &vp) {
stringstream ss;
ss << "vertex_" << vp.state_id;
return ss.str();
}
static
string dump_name(const GoughGraph &g, const GoughEdge &e) {
stringstream ss;
ss << "edge_" << g[source(e, g)].state_id << "_"
<< g[target(e, g)].state_id;
return ss.str();
}
string dump_name(const gough_edge_id &e) {
stringstream ss;
ss << "edge_" << e.src << "_" << e.dest;
return ss.str();
}
static
void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) {
stringstream ss;
ss << grey.dumpPath << "gough_" << base << ".dot";
FILE *f = fopen(ss.str().c_str(), "w");
fprintf(f, "digraph NFA {\n");
fprintf(f, "rankdir=LR;\n");
fprintf(f, "size=\"11.5,8\"\n");
fprintf(f, "node [ shape = circle ];\n");
fprintf(f, "START [style=invis];\n");
for (auto v : vertices_range(g)) {
fprintf(f, "%s [ width = 1, fixedsize = true, fontsize = 12, ",
dump_name(g[v]).c_str());
if (!g[v].reports.empty() || !g[v].reports_eod.empty()) {
fprintf(f, "shape = doublecircle ");
}
fprintf(f, "label = \"%u\"];\n", g[v].state_id);
}
for (const auto &e : edges_range(g)) {
GoughVertex s = source(e, g);
GoughVertex t = target(e, g);
fprintf(f, "%s -> %s\n",
dump_name(g[s]).c_str(), dump_name(g[t]).c_str());
}
fprintf(f, "}\n");
fclose(f);
}
static
set<const GoughSSAVar *> uses(const GoughVertexProps &vp) {
set<const GoughSSAVar *> rv;
for (const auto &r : vp.reports) {
if (r.second) {
rv.insert(r.second);
}
}
for (const auto &r : vp.reports_eod) {
if (r.second) {
rv.insert(r.second);
}
}
for (const auto &var : vp.vars) {
insert(&rv, var->get_inputs());
}
return rv;
}
static
set<const GoughSSAVar *> uses(const GoughEdgeProps &ep) {
set<const GoughSSAVar *> rv;
for (const auto &var : ep.vars) {
insert(&rv, var->get_inputs());
}
return rv;
}
static
void dump_var_mapping(const GoughGraph &g, const string &base,
const Grey &grey) {
stringstream ss;
ss << grey.dumpPath << "gough_" << base << "_vars.txt";
FILE *f = fopen(ss.str().c_str(), "w");
for (auto v : vertices_range(g)) {
set<const GoughSSAVar *> used = uses(g[v]);
if (g[v].vars.empty() && used.empty()) {
continue;
}
fprintf(f, "%s\n", dump_name(g[v]).c_str());
for (u32 i = 0; i < g[v].vars.size(); i++) {
const GoughSSAVar *vp = g[v].vars[i].get();
fprintf(f, "\t%u: slot %u\n", i, vp->slot);
}
if (!used.empty()) {
fprintf(f, "\tuses:");
vector<u32> used_id;
for (const GoughSSAVar *var : used) {
used_id.push_back(var->slot);
}
for (const u32 &id : used_id) {
fprintf(f, " %u", id);
}
fprintf(f, "\n");
}
}
for (const auto &e : edges_range(g)) {
set<const GoughSSAVar *> used = uses(g[e]);
if (g[e].vars.empty() && used.empty()) {
continue;
}
fprintf(f, "%s\n", dump_name(g, e).c_str());
for (u32 i = 0; i < g[e].vars.size(); i++) {
const GoughSSAVar *vp = g[e].vars[i].get();
fprintf(f, "\t%u: slot %u\n", i, vp->slot);
}
if (!used.empty()) {
fprintf(f, "\tuses:");
vector<u32> used_id;
for (const GoughSSAVar *var : used) {
used_id.push_back(var->slot);
}
for (const u32 &id : used_id) {
fprintf(f, " %u", id);
}
fprintf(f, "\n");
}
}
fclose(f);
}
static
void gather_vars(const GoughGraph &g, vector<const GoughSSAVar *> *vars,
map<const GoughSSAVar *, string> *names,
map<const GoughSSAVar *, string> *src_label,
set<const GoughSSAVar *> *reporters) {
for (auto v : vertices_range(g)) {
for (const auto &r : g[v].reports) {
reporters->insert(r.second);
}
for (const auto &r : g[v].reports_eod) {
reporters->insert(r.second);
}
for (u32 i = 0; i < g[v].vars.size(); i++) {
const GoughSSAVar *vp = g[v].vars[i].get();
stringstream ss;
ss << dump_name(g[v]) << "_" << i;
vars->push_back(vp);
names->insert(make_pair(vp, ss.str()));
src_label->insert(make_pair(vp, dump_name(g[v])));
}
}
for (const auto &e : edges_range(g)) {
for (u32 i = 0; i < g[e].vars.size(); i++) {
const GoughSSAVar *vp = g[e].vars[i].get();
stringstream ss;
ss << dump_name(g, e) << "_" << i;
vars->push_back(vp);
names->insert(make_pair(vp, ss.str()));
src_label->insert(make_pair(vp, dump_name(g, e)));
}
}
}
static
void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) {
FILE *f;
{
stringstream ss;
ss << grey.dumpPath << "gough_" << base << "_vars.dot";
f = fopen(ss.str().c_str(), "w");
}
fprintf(f, "digraph NFA {\n");
fprintf(f, "rankdir=LR;\n");
fprintf(f, "size=\"11.5,8\"\n");
fprintf(f, "node [ shape = circle ];\n");
fprintf(f, "START [style=invis];\n");
vector<const GoughSSAVar *> vars;
map<const GoughSSAVar *, string> names;
map<const GoughSSAVar *, string> src_label;
set<const GoughSSAVar *> reporters;
gather_vars(g, &vars, &names, &src_label, &reporters);
for (const GoughSSAVar *vp : vars) {
fprintf(f, "%s [ width = 1, fixedsize = true, fontsize = 12, ",
names[vp].c_str());
fprintf(f, "label = \"%s\\n", src_label[vp].c_str());
if (dynamic_cast<const GoughSSAVarMin *>(vp)) {
fprintf(f, "MIN");
} else if (dynamic_cast<const GoughSSAVarJoin *>(vp)) {
fprintf(f, "JOIN");
} else if (dynamic_cast<const GoughSSAVarNew *>(vp)) {
fprintf(f, "NEW");
} else {
fprintf(f, "???");
}
fprintf(f, "\"];\n");
}
for (const GoughSSAVar *vp : reporters) {
if (vp) {
fprintf(f, "%s [ shape = doublecircle]\n", names[vp].c_str());
} else {
fprintf(f, "eps [ label = \"eps\" shape = doublecircle]\n");
}
}
for (const GoughSSAVar *vp : vars) {
const flat_set<GoughSSAVar *> &inputs = vp->get_inputs();
for (const GoughSSAVar *v_in : inputs) {
fprintf(f, "%s -> %s\n", names[v_in].c_str(), names[vp].c_str());
}
}
fprintf(f, "}\n");
fclose(f);
}
void dump(const GoughGraph &g, const string &base, const Grey &grey) {
if (!grey.dumpFlags) {
return;
}
dump_graph(g, base, grey);
dump_var_mapping(g, base, grey);
dump_vars(g, base, grey);
}
static
void dump_block(FILE *f, const gough_edge_id &e,
const vector<gough_ins> &block) {
fprintf(f, "%s:\n", dump_name(e).c_str());
for (const gough_ins &ins : block) {
fprintf(f, "\t");
switch (ins.op) {
case GOUGH_INS_END:
fprintf(f, "END");
break;
case GOUGH_INS_MOV:
fprintf(f, "MOV %u %u", ins.dest, ins.src);
break;
case GOUGH_INS_NEW:
fprintf(f, "NEW %u (+%u)", ins.dest, ins.src);
break;
case GOUGH_INS_MIN:
fprintf(f, "MIN %u %u", ins.dest, ins.src);
break;
default:
fprintf(f, "<UNKNOWN>");
break;
}
fprintf(f, "\n");
}
}
void dump_blocks(const map<gough_edge_id, vector<gough_ins> > &blocks,
const string &base, const Grey &grey) {
if (!grey.dumpFlags) {
return;
}
FILE *f;
{
stringstream ss;
ss << grey.dumpPath << "gough_" << base << "_programs.txt";
f = fopen(ss.str().c_str(), "w");
}
for (const auto &m : blocks) {
dump_block(f, m.first, m.second);
}
fclose(f);
}
} // namespace ue2

View File

@@ -0,0 +1,63 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GOUGHCOMPILE_DUMP_H
#define GOUGHCOMPILE_DUMP_H
#include "goughcompile_internal.h"
#include <map>
#include <string>
namespace ue2 {
struct Grey;
#ifdef DUMP_SUPPORT
std::string dump_name(const GoughVertexProps &vp);
std::string dump_name(const gough_edge_id &e);
void dump(const GoughGraph &g, const std::string &base, const Grey &grey);
void dump_blocks(const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
const std::string &base, const Grey &grey);
#else
static UNUSED
void dump(UNUSED const GoughGraph &g, UNUSED const std::string &base,
UNUSED const Grey &grey) {
}
static UNUSED
void dump_blocks(
UNUSED const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
UNUSED const std::string &base, UNUSED const Grey &grey) {
}
#endif
} // namespace ue2
#endif

View File

@@ -0,0 +1,227 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef GOUGHCOMPILE_INTERNAL_H
#define GOUGHCOMPILE_INTERNAL_H
#include "gough_internal.h"
#include "mcclellancompile.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/order_check.h"
#include "util/ue2_containers.h"
#include <map>
#include <memory>
#include <set>
#include <vector>
#include <boost/core/noncopyable.hpp>
#include <boost/graph/adjacency_list.hpp>
namespace ue2 {
struct Grey;
struct GoughSSAVar;
struct GoughSSAVarJoin;
struct GoughVertexProps {
GoughVertexProps() {}
explicit GoughVertexProps(u32 state_in) : state_id(state_in) {}
u32 state_id = ~0U;
std::vector<std::shared_ptr<GoughSSAVarJoin> > vars; /* owns variables */
std::vector<std::pair<ReportID, GoughSSAVar *> > reports; /**< report som,
som variable */
std::vector<std::pair<ReportID, GoughSSAVar *> > reports_eod;
};
struct GoughEdgeProps {
GoughEdgeProps(void) : top(false) {}
bool top;
CharReach reach;
std::vector<std::shared_ptr<GoughSSAVar> > vars; /* owns variables */
};
struct GoughGraphProps {
boost::adjacency_list_traits<boost::vecS, boost::vecS>::vertex_descriptor
initial_vertex; /* for triggered nfas, dead state;
* for others start anchored or start floating
*/
};
typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS,
GoughVertexProps, GoughEdgeProps, GoughGraphProps> GoughGraph;
typedef GoughGraph::vertex_descriptor GoughVertex;
typedef GoughGraph::edge_descriptor GoughEdge;
struct gough_edge_id {
gough_edge_id(const GoughGraph &g, const GoughEdge &e)
: src(g[source(e, g)].state_id), dest(g[target(e, g)].state_id),
first_char(g[e].reach.find_first()) {}
bool operator<(const gough_edge_id &b) const {
const gough_edge_id &a = *this;
ORDER_CHECK(src);
ORDER_CHECK(dest);
ORDER_CHECK(first_char);
return false;
}
const u32 src;
const u32 dest;
const u32 first_char; /* ~0U if only top */
};
struct GoughSSAVarWithInputs;
struct GoughSSAVarMin;
struct GoughSSAVarJoin;
struct GoughSSAVar : boost::noncopyable {
GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {}
virtual ~GoughSSAVar();
const ue2::flat_set<GoughSSAVar *> &get_inputs() const {
return inputs;
}
const ue2::flat_set<GoughSSAVarWithInputs *> &get_outputs() const {
return outputs;
}
virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0;
virtual void generate(std::vector<gough_ins> *out) const = 0;
bool seen; /* for temp use by remove_dead alg */
u32 slot;
void clear_outputs();
/** remove all inputs and outputs of the vertex, call before
* removing vertex */
virtual void clear_all() {
clear_outputs();
}
protected:
ue2::flat_set<GoughSSAVar *> inputs;
ue2::flat_set<GoughSSAVarWithInputs *> outputs;
friend struct GoughSSAVarWithInputs;
friend struct GoughSSAVarMin;
friend struct GoughSSAVarJoin;
};
struct GoughSSAVarNew : public GoughSSAVar {
explicit GoughSSAVarNew(u32 adjust_in) : adjust(adjust_in) {}
void replace_input(GoughSSAVar *, GoughSSAVar *) override {
assert(0);
}
void generate(std::vector<gough_ins> *out) const override;
const u32 adjust;
};
struct GoughSSAVarWithInputs : public GoughSSAVar {
GoughSSAVarWithInputs(void) {}
void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override = 0;
virtual void clear_inputs() = 0;
void clear_all() override;
protected:
virtual void remove_input_raw(GoughSSAVar *v) = 0;
friend struct GoughSSAVar;
};
struct GoughSSAVarMin : public GoughSSAVarWithInputs {
GoughSSAVarMin(void) {}
void generate(std::vector<gough_ins> *out) const override;
void clear_inputs() override;
void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override;
virtual void add_input(GoughSSAVar *v) {
inputs.insert(v);
v->outputs.insert(this);
}
protected:
void remove_input_raw(GoughSSAVar *v) override;
};
struct GoughSSAVarJoin : public GoughSSAVarWithInputs {
GoughSSAVarJoin(void) {}
/* dummy; all joins at a point must be generated simultaneously */
void generate(std::vector<gough_ins> *out) const override;
GoughSSAVar *get_input(const GoughEdge &prev) const;
void clear_inputs() override;
void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override;
void add_input(GoughSSAVar *v, GoughEdge prev);
const ue2::flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input)
const;
const std::map<GoughSSAVar *, ue2::flat_set<GoughEdge> > &get_input_map()
const;
protected:
void remove_input_raw(GoughSSAVar *v) override;
private:
std::map<GoughSSAVar *, ue2::flat_set<GoughEdge>> input_map;
};
struct gough_accel_state_info {
u32 margin;
bool two_byte;
gough_accel_state_info(u32 margin_in, bool two_byte_in)
: margin(margin_in), two_byte(two_byte_in) {
}
};
u32 assign_slots(GoughGraph &g, const Grey &grey);
void find_allowed_accel_states(const GoughGraph &g,
const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
std::map<dstate_id_t, gough_accel_state_info> *out);
bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out);
} // namespace ue2
// Note: C structure, can't be in namespace ue2
static inline
bool operator==(const gough_ins &a, const gough_ins &b) {
return a.op == b.op && a.dest == b.dest && a.src == b.src;
}
static inline
bool operator<(const gough_ins &a, const gough_ins &b) {
return std::tie(a.op, a.src, a.dest) < std::tie(b.op, b.src, b.dest);
}
#endif

View File

@@ -0,0 +1,502 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "goughcompile.h"
#include "goughcompile_dump.h"
#include "goughcompile_internal.h"
#include "gough_internal.h"
#include "grey.h"
#include "util/container.h"
#include "util/graph.h"
#include "util/graph_range.h"
#include "util/order_check.h"
#include "util/ue2_containers.h"
#include "ue2common.h"
#include <algorithm>
#include <boost/graph/depth_first_search.hpp>
#include <boost/range/adaptor/map.hpp>
using namespace std;
using boost::adaptors::map_values;
namespace ue2 {
template<typename VarP, typename VarQ>
void push_back_all_raw(vector<VarP> *out, const vector<VarQ> &in) {
for (const auto &var : in) {
out->push_back(var.get());
}
}
static
void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) {
for (auto v : vertices_range(g)) {
push_back_all_raw(out, g[v].vars);
}
for (const auto &e : edges_range(g)) {
push_back_all_raw(out, g[e].vars);
}
}
namespace {
struct GoughGraphAux {
map<const GoughSSAVar *, GoughVertex> containing_v;
map<const GoughSSAVar *, GoughEdge> containing_e;
map<const GoughSSAVar *, set<GoughVertex> > reporters;
};
}
static never_inline
void fill_aux(const GoughGraph &g, GoughGraphAux *aux) {
for (auto v : vertices_range(g)) {
for (const auto &var : g[v].vars) {
aux->containing_v[var.get()] = v;
DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id);
}
for (GoughSSAVar *var : g[v].reports | map_values) {
aux->reporters[var].insert(v);
}
for (GoughSSAVar *var : g[v].reports_eod | map_values) {
aux->reporters[var].insert(v);
}
}
for (const auto &e : edges_range(g)) {
for (const auto &var : g[e].vars) {
aux->containing_e[var.get()] = e;
DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot,
g[source(e, g)].state_id, g[target(e, g)].state_id);
}
}
}
static
bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var,
const GoughGraphAux &aux) {
/* if var used as a report, it cannot be considered block local */
if (contains(aux.reporters, var)) {
return false;
}
/* (useful) vertex/join vars never local - they are terminal in blocks
* and so should be read by another block. */
if (!contains(aux.containing_e, var)) {
return false;
}
/* for other cases, require that all uses of var are later in the same edge
* or on the target AND if on target it is sole on flow coming from the
* edge in question. */
const GoughEdge &e = aux.containing_e.at(var);
GoughVertex t = target(e, cfg);
size_t seen_outputs = 0;
const flat_set<GoughSSAVarWithInputs *> &out = var->get_outputs();
bool seen_var = false;
for (const auto &e_var : cfg[e].vars) {
if (seen_var) {
GoughSSAVarWithInputs *w
= dynamic_cast<GoughSSAVarWithInputs *>(e_var.get());
if (contains(out, w)) {
seen_outputs++;
}
} else {
seen_var = var == e_var.get();
}
}
assert(seen_var);
for (const auto &t_var : cfg[t].vars) {
if (contains(out, t_var.get())) {
seen_outputs++;
const flat_set<GoughEdge> &flow = t_var->get_edges_for_input(var);
if (flow.size() != 1 || *flow.begin() != e) {
/* this var is used by the target join var BUT on a different
* flow, so this is not a block local variable */
return false;
}
}
}
assert(seen_outputs <= out.size());
return seen_outputs == out.size();
}
static
void handle_pending_edge(const GoughGraph &g, const GoughEdge &e,
GoughSSAVar *start, set<GoughVertex> &pending_vertex,
set<const GoughSSAVar *> &rv) {
const vector<shared_ptr<GoughSSAVar> > &vars = g[e].vars;
bool marking = !start;
DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n", g[source(e, g)].state_id,
g[target(e, g)].state_id, marking ? "full" : "partial",
vars.size());
for (auto it = vars.rbegin(); it != vars.rend(); ++it) {
GoughSSAVar *var = it->get();
if (contains(rv, var)) {
DEBUG_PRINTF("somebody has already processed this vertex [%u]\n",
var->slot);
return;
}
if (var == start) {
assert(!marking);
marking = true;
continue;
}
if (marking) {
rv.insert(var);
}
}
assert(marking);
GoughVertex s = source(e, g);
for (const auto &var : g[s].vars) {
DEBUG_PRINTF("interferes %u\n", var->slot);
rv.insert(var.get());
}
pending_vertex.insert(s);
}
static
void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g,
const GoughGraphAux &aux,
const flat_set<GoughSSAVarWithInputs *> &pending_var,
set<GoughVertex> &pending_vertex,
set<const GoughSSAVar *> &rv) {
for (GoughSSAVarWithInputs *var : pending_var) {
if (contains(aux.containing_v, var)) {
/* def is used by join vertex, value only needs to be live on some
* incoming edges */
GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var;
const flat_set<GoughEdge> &live_edges
= vj->get_edges_for_input(def);
for (const auto &e : live_edges) {
handle_pending_edge(g, e, nullptr, pending_vertex, rv);
}
continue;
}
const GoughEdge &e = aux.containing_e.at(var);
handle_pending_edge(g, e, var, pending_vertex, rv);
}
}
static
void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g,
GoughVertex current,
set<GoughVertex> &pending_vertex,
set<const GoughSSAVar *> &rv) {
DEBUG_PRINTF("---checking vertex %u\n", g[current].state_id);
if (def_v == current) {
DEBUG_PRINTF("contains target vertex\n");
return; /* we have reached def */
}
for (const auto &e : in_edges_range(current, g)) {
handle_pending_edge(g, e, nullptr, pending_vertex, rv);
}
}
static
void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g,
const GoughGraphAux &aux,
set<GoughVertex> &pending_vertex,
set<const GoughSSAVar *> &rv) {
if (pending_vertex.empty()) {
return;
}
GoughVertex def_v = GoughGraph::null_vertex();
if (contains(aux.containing_v, def)) {
def_v = aux.containing_v.at(def);
}
ue2::unordered_set<GoughVertex> done;
while (!pending_vertex.empty()) {
GoughVertex current = *pending_vertex.begin();
pending_vertex.erase(current);
if (contains(done, current)) {
continue;
}
done.insert(current);
handle_pending_vertex(def_v, g, current, pending_vertex, rv);
}
}
/* returns set of labels that the given def is live at */
static never_inline
set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g,
const GoughGraphAux &aux) {
DEBUG_PRINTF("checking who is defined during %u lifetime\n", def->slot);
set<GoughVertex> pending_vertex;
set<const GoughSSAVar *> rv;
rv.insert(def);
if (contains(aux.reporters, def)) {
DEBUG_PRINTF("--> gets reported\n");
const set<GoughVertex> &reporters = aux.reporters.at(def);
for (auto v : reporters) {
pending_vertex.insert(v);
for (const auto &var : g[v].vars) {
DEBUG_PRINTF("interferes %u\n", var->slot);
rv.insert(var.get());
}
}
}
handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv);
handle_pending_vertices(def, g, aux, pending_vertex, rv);
rv.erase(def);
return rv;
}
template<typename VarP>
void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) {
for (auto &var : vars) {
assert(var->slot == INVALID_SLOT);
var->slot = (*next_slot)++;
}
}
/* crude, deterministic assignment of symbolic register slots.
* returns number of slots given out
*/
static
u32 initial_slots(const GoughGraph &g) {
u32 next_slot = 0;
for (auto v : vertices_range(g)) {
set_initial_slots(g[v].vars, &next_slot);
}
for (const auto &e : edges_range(g)) {
set_initial_slots(g[e].vars, &next_slot);
}
return next_slot;
}
#define NO_COLOUR (~0U)
static
u32 available_colour(const flat_set<u32> &bad_colours) {
u32 rv = 0;
for (const u32 &colour : bad_colours) {
if (colour != rv) {
assert(colour > rv);
break;
}
rv = colour + 1;
}
assert(rv != NO_COLOUR);
return rv;
}
static
void poison_colours(const set<const GoughSSAVar *> &live, u32 c,
const vector<u32> &colour_map,
vector<flat_set<u32> > *bad_colour) {
for (const GoughSSAVar *var : live) {
u32 var_index = var->slot;
if (colour_map[var_index] != NO_COLOUR) {
assert(c != colour_map[var_index]);
} else {
(*bad_colour)[var_index].insert(c);
}
}
}
static
void find_bad_due_to_live(const set<const GoughSSAVar *> &live,
const vector<u32> &colour_map, flat_set<u32> *out) {
for (const GoughSSAVar *var : live) {
u32 var_index = var->slot;
if (colour_map[var_index] != NO_COLOUR) {
out->insert(colour_map[var_index]);
}
}
}
static
void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux,
const vector<GoughSSAVar *> &order,
vector<u32> &colour_map) {
assert(order.size() < NO_COLOUR);
colour_map.clear();
colour_map.resize(order.size(), NO_COLOUR);
vector<u32> temp(order.size(), ~0U);
vector<flat_set<u32> > bad_colour(order.size());
for (GoughSSAVar *var : order) {
u32 var_index = var->slot;
if (is_block_local(g, var, aux)) {
DEBUG_PRINTF("%u is block local\n", var_index);
/* ignore variable whose lifetime is limited to their local block
* there is no need to assign stream state to these variables */
continue;
}
assert(colour_map[var_index] == NO_COLOUR);
set<const GoughSSAVar *> live = live_during(var, g, aux);
flat_set<u32> &local_bad = bad_colour[var_index];
find_bad_due_to_live(live, colour_map, &local_bad);
DEBUG_PRINTF("colouring %u\n", var_index);
u32 c = available_colour(local_bad);
colour_map[var_index] = c;
assert(!contains(bad_colour[var_index], c));
poison_colours(live, c, colour_map, &bad_colour);
flat_set<u32> temp_set;
local_bad.swap(temp_set);
DEBUG_PRINTF(" %u coloured %u\n", var_index, c);
}
}
template<typename VarP>
void add_to_dom_ordering(const vector<VarP> &vars,
vector<GoughSSAVar *> *out) {
for (const auto &var : vars) {
out->push_back(var.get());
}
}
namespace {
class FinishVisitor : public boost::default_dfs_visitor {
public:
explicit FinishVisitor(vector<GoughVertex> *o) : out(o) {}
void finish_vertex(const GoughVertex v, const GoughGraph &) {
out->push_back(v);
}
vector<GoughVertex> *out;
};
}
static
void find_dom_ordering(const GoughGraph &cfg, vector<GoughSSAVar *> *out) {
vector<GoughVertex> g_order;
/* due to construction quirks, default vertex order provides entry points */
depth_first_search(cfg, visitor(FinishVisitor(&g_order))
.root_vertex(cfg[boost::graph_bundle].initial_vertex));
for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) {
add_to_dom_ordering(cfg[*it].vars, out);
for (const auto &e : out_edges_range(*it, cfg)) {
add_to_dom_ordering(cfg[e].vars, out);
}
}
}
static
void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count,
vector<u32> *old_new) {
/* Interference graphs from SSA form are chordal -> optimally colourable in
* poly time.
*
* Chordal graphs can be coloured by walking in perfect elimination order.
* If the SSA CFG is iterated over in a way that respects dominance
* relationship, the interference graph will be iterated in a perfect
* elimination order.
*
* We can avoid creating the full interference graph and use liveness
* information as we iterate over the definitions to perform the colouring.
*
* See S Hack various 2006-
*/
vector<GoughSSAVar *> dom_order;
GoughGraphAux aux;
fill_aux(cfg, &aux);
find_dom_ordering(cfg, &dom_order);
assert(dom_order.size() == old_slot_count);
sequential_vertex_colouring(cfg, aux, dom_order, *old_new);
}
static
void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals,
u32 local_base) {
DEBUG_PRINTF("%zu local variables\n", locals.size());
/* local variables only occur on edges (joins are never local) */
u32 allocated_count = 0;
for (const auto &e : edges_range(g)) {
u32 next_slot = local_base;
for (auto &var : g[e].vars) {
if (contains(locals, var.get())) {
DEBUG_PRINTF("updating slot %u using local %u\n", var->slot,
next_slot);
var->slot = next_slot++;
allocated_count++;
}
}
}
assert(allocated_count == locals.size());
}
static never_inline
u32 update_slots(GoughGraph &g, const vector<u32> &old_new,
UNUSED u32 old_slot_count) {
vector<GoughSSAVar *> vars;
set<GoughSSAVar *> locals;
all_vars(g, &vars);
u32 slot_count = 0;
for (GoughSSAVar *v : vars) {
assert(v->slot < old_new.size());
DEBUG_PRINTF("updating slot %u to %u\n", v->slot, old_new[v->slot]);
if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */
v->slot = old_new[v->slot];
ENSURE_AT_LEAST(&slot_count, v->slot + 1);
} else {
locals.insert(v);
}
}
assert(slot_count <= old_slot_count);
DEBUG_PRINTF("reduce stream slots from %u to %u\n", old_slot_count,
slot_count);
update_local_slots(g, locals, slot_count);
return slot_count;
}
u32 assign_slots(GoughGraph &cfg, const Grey &grey) {
u32 slot_count = initial_slots(cfg);
if (!grey.goughRegisterAllocate) {
return slot_count;
}
dump(cfg, "slots_pre", grey);
vector<u32> old_new;
create_slot_mapping(cfg, slot_count, &old_new);
slot_count = update_slots(cfg, old_new, slot_count);
return slot_count;
}
} // namespace ue2

349
src/nfa/goughdump.cpp Normal file
View File

@@ -0,0 +1,349 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "goughdump.h"
#include "gough_internal.h"
#include "mcclellandump.h"
#include "nfa_dump_internal.h"
#include "nfa_internal.h"
#include "ue2common.h"
#include "util/charreach.h"
#include "util/dump_charclass.h"
#include "util/unaligned.h"
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <map>
#include <set>
#include <vector>
#ifndef DUMP_SUPPORT
#error No dump support!
#endif
using namespace std;
namespace ue2 {
static
void goughGetTransitions(const NFA *n, u16 s, u16 *t) {
assert(isGoughType(n->type));
const mcclellan *m = (const mcclellan *)getImplNfa(n);
const mstate_aux *aux = getAux(n, s);
const u32 as = m->alphaShift;
const char *sher_base
= (const char *)m - sizeof(struct NFA) + m->sherman_offset;
if (n->type == GOUGH_NFA_8) {
const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcclellan));
for (u16 c = 0; c < N_CHARS; c++) {
t[c] = succ_table[((u32)s << as) + m->remap[c]];
}
} else {
u16 base_s = s;
if (s >= m->sherman_limit) {
const char *state_base
= findShermanState(m, sher_base, m->sherman_limit, s);
base_s = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET);
}
const u16 *succ_table = (const u16 *)((const char *)m
+ sizeof(mcclellan));
for (u16 c = 0; c < N_CHARS; c++) {
const u8 *addr
= (const u8*)(succ_table + (((u32)base_s << as) + m->remap[c]));
t[c] = unaligned_load_u16(addr);
t[c] &= STATE_MASK;
}
if (s >= m->sherman_limit) {
const char *state_base
= findShermanState(m, sher_base, m->sherman_limit, s);
u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
const u8 *chars = (const u8 *)state_base + SHERMAN_CHARS_OFFSET;
const u16 *states
= (const u16 *)(state_base + SHERMAN_STATES_OFFSET(len));
for (u8 i = 0; i < len; i++) {
for (u16 c = 0; c < N_CHARS; c++) {
if (m->remap[c] != chars[i]) {
t[c] = unaligned_load_u16((const u8*)&states[i])
& STATE_MASK;
}
}
}
}
}
t[TOP] = aux->top & STATE_MASK;
}
static
void describeNode(const NFA *n, const mcclellan *m, u16 i, FILE *f) {
const mstate_aux *aux = getAux(n, i);
bool isSherman = m->sherman_limit && i >= m->sherman_limit;
const char *sher_base
= (const char *)m - sizeof(NFA) + m->sherman_offset;
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
"label = \"%u%s\" ]; \n", i, i, isSherman ? "w":"");
if (aux->accel_offset) {
dumpAccelDot(f, i,
&((const gough_accel *)((const char *)m + aux->accel_offset))->accel);
}
if (aux->accept_eod) {
fprintf(f, "%u [ color = darkorchid ];\n", i);
}
if (aux->accept) {
fprintf(f, "%u [ shape = doublecircle ];\n", i);
}
if (aux->top && aux->top != i) {
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
aux->top);
}
if (i == m->start_anchored) {
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
}
if (i == m->start_floating) {
fprintf(f, "STARTF -> %u [color = red ]\n", i);
}
if (isSherman) {
const char *sherman_state
= findShermanState(m, sher_base, m->sherman_limit, i);
fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i);
u16 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET);
if (daddy) {
fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n",
i, daddy);
}
}
}
static
void dump_program(FILE *f, const pair<u32, u32> &e, const gough_ins *prog) {
fprintf(f, "edge_%u_%u:\n", e.first, e.second);
for (const gough_ins *it = prog;; ++it) {
fprintf(f, "\t");
u32 s = it->src;
u32 d = it->dest;
switch (it->op) {
case GOUGH_INS_END:
fprintf(f, "END");
fprintf(f, "\n");
return;
case GOUGH_INS_MOV:
fprintf(f, "MOV %u %u", d, s);
break;
case GOUGH_INS_NEW:
fprintf(f, "NEW-%u %u", s, d);
break;
case GOUGH_INS_MIN:
fprintf(f, "MIN %u %u", d, s);
break;
default:
fprintf(f, "<UNKNOWN>");
fprintf(f, "\n");
return;
}
fprintf(f, "\n");
}
}
static
void dump_programs(FILE *f, const NFA *nfa,
const set<pair<pair<u32, u32>, u32 > > &prog_dump) {
fprintf(f, "Edge Programs\n");
fprintf(f, "-------------\n");
for (set<pair<pair<u32, u32>, u32 > >::const_iterator it
= prog_dump.begin(); it != prog_dump.end(); ++it) {
assert(it->second);
const gough_ins *p = (const gough_ins *)((const u8 *)nfa + it->second);
dump_program(f, it->first, p);
}
}
static
void dumpTransitions(const NFA *nfa, FILE *f,
set<pair<pair<u32, u32>, u32 > > *prog_dump) {
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
const gough_info *g = get_gough(m);
u32 alphaSize = 1U << m->alphaShift;
const u32 *prog_offset_table = (const u32 *)(g + 1);
for (u16 i = 0; i < m->state_count; i++) {
fprintf(f, "%05hu", i);
const mstate_aux *aux = getAux(nfa, i);
if (aux->accel_offset) {
dumpAccelText(f, (const union AccelAux *)((const char *)m +
aux->accel_offset));
}
u16 trans[ALPHABET_SIZE];
goughGetTransitions(nfa, i, trans);
int rstart = 0;
u16 prev = 0xffff;
for (int j = 0; j < N_CHARS; j++) {
u16 curr = trans[j];
if (curr == prev) {
continue;
}
if (prev != 0xffff) {
if (j == rstart + 1) {
fprintf(f, " %02x->%hu", rstart, prev);
} else {
fprintf(f, " [%02x - %02x]->%hu", rstart, j - 1, prev);
}
}
prev = curr;
rstart = j;
u32 edge_index = i * alphaSize + m->remap[j];
u32 prog_offset = prog_offset_table[edge_index];
if (prog_offset) {
prog_dump->insert(make_pair(make_pair((u32)i, (u32)trans[j]),
prog_offset));
}
}
if (N_CHARS == rstart + 1) {
fprintf(f, " %02x->%hu", rstart, prev);
} else {
fprintf(f, " [%02x - %02x]->%hu", rstart, N_CHARS - 1, prev);
}
fprintf(f, " TOP->%hu\n", trans[TOP]);
fprintf(f, "\n");
}
fprintf(f, "\n");
}
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) {
assert(nfa->type == GOUGH_NFA_8);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < m->state_count; i++) {
describeNode(nfa, m, i, f);
u16 t[ALPHABET_SIZE];
goughGetTransitions(nfa, i, t);
describeEdge(f, t, i);
}
fprintf(f, "}\n");
}
void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
assert(nfa->type == GOUGH_NFA_8);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
fprintf(f, "gough 8\n");
fprintf(f, "report: %u, states %u, length %u\n", m->arb_report,
m->state_count, m->length);
fprintf(f, "astart: %hu, fstart %hu\n", m->start_anchored,
m->start_floating);
fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
m->accept_limit_8);
fprintf(f, "\n");
describeAlphabet(f, m);
set<pair<pair<u32, u32>, u32 > > prog_dump;
dumpTransitions(nfa, f, &prog_dump);
dump_programs(f, nfa, prog_dump);
dumpTextReverse(nfa, f);
}
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) {
assert(nfa->type == GOUGH_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
dumpDotPreambleDfa(f);
for (u16 i = 1; i < m->state_count; i++) {
describeNode(nfa, m, i, f);
u16 t[ALPHABET_SIZE];
goughGetTransitions(nfa, i, t);
describeEdge(f, t, i);
}
fprintf(f, "}\n");
}
void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) {
assert(nfa->type == GOUGH_NFA_16);
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
// const gough_info *h = get_gough(m);
fprintf(f, "gough 16\n");
fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report,
m->state_count, m->length);
fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored,
m->start_floating);
fprintf(f, "single accept: %d\n", !!(int)m->flags & MCCLELLAN_FLAG_SINGLE);
fprintf(f, "sherman_limit: %u, sherman_end: %u\n", m->sherman_limit,
m->sherman_end);
describeAlphabet(f, m);
set<pair<pair<u32, u32>, u32 > > prog_dump;
dumpTransitions(nfa, f, &prog_dump);
dump_programs(f, nfa, prog_dump);
fprintf(f, "\n");
dumpTextReverse(nfa, f);
}
} // namespace ue2

Some files were not shown because too many files have changed in this diff Show More