mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2026-01-17 16:00:26 +03:00
Initial commit of Hyperscan
This commit is contained in:
109
src/alloc.c
Normal file
109
src/alloc.c
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime functions for setting custom allocators.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "allocator.h"
|
||||
|
||||
#define default_malloc malloc
|
||||
#define default_free free
|
||||
|
||||
hs_alloc_t hs_database_alloc = default_malloc;
|
||||
hs_alloc_t hs_misc_alloc = default_malloc;
|
||||
hs_alloc_t hs_scratch_alloc = default_malloc;
|
||||
hs_alloc_t hs_stream_alloc = default_malloc;
|
||||
|
||||
hs_free_t hs_database_free = default_free;
|
||||
hs_free_t hs_misc_free = default_free;
|
||||
hs_free_t hs_scratch_free = default_free;
|
||||
hs_free_t hs_stream_free = default_free;
|
||||
|
||||
static
|
||||
hs_alloc_t normalise_alloc(hs_alloc_t a) {
|
||||
if (!a) {
|
||||
return default_malloc;
|
||||
} else {
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
hs_free_t normalise_free(hs_free_t f) {
|
||||
if (!f) {
|
||||
return default_free;
|
||||
} else {
|
||||
return f;
|
||||
}
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_set_database_allocator(allocfunc, freefunc);
|
||||
hs_set_misc_allocator(allocfunc, freefunc);
|
||||
hs_set_stream_allocator(allocfunc, freefunc);
|
||||
hs_set_scratch_allocator(allocfunc, freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_database_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_database_alloc = normalise_alloc(allocfunc);
|
||||
hs_database_free = normalise_free(freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_misc_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_misc_alloc = normalise_alloc(allocfunc);
|
||||
hs_misc_free = normalise_free(freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_scratch_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_scratch_alloc = normalise_alloc(allocfunc);
|
||||
hs_scratch_free = normalise_free(freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_set_stream_allocator(hs_alloc_t allocfunc, hs_free_t freefunc) {
|
||||
hs_stream_alloc = normalise_alloc(allocfunc);
|
||||
hs_stream_free = normalise_free(freefunc);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
66
src/allocator.h
Normal file
66
src/allocator.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ALLOCATOR_H
|
||||
#define ALLOCATOR_H
|
||||
|
||||
#include "hs_common.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
extern hs_alloc_t hs_database_alloc;
|
||||
extern hs_alloc_t hs_misc_alloc;
|
||||
extern hs_alloc_t hs_scratch_alloc;
|
||||
extern hs_alloc_t hs_stream_alloc;
|
||||
|
||||
extern hs_free_t hs_database_free;
|
||||
extern hs_free_t hs_misc_free;
|
||||
extern hs_free_t hs_scratch_free;
|
||||
extern hs_free_t hs_stream_free;
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
/** \brief Check the results of an alloc done with hs_alloc for alignment.
|
||||
*
|
||||
* If we have incorrect alignment, return an error. Caller should free the
|
||||
* offending block. */
|
||||
static really_inline
|
||||
hs_error_t hs_check_alloc(const void *mem) {
|
||||
hs_error_t ret = HS_SUCCESS;
|
||||
if (!mem) {
|
||||
ret = HS_NOMEM;
|
||||
} else if (!ISALIGNED_N(mem, alignof(unsigned long long))) {
|
||||
ret = HS_BAD_ALLOC;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
310
src/compiler/asserts.cpp
Normal file
310
src/compiler/asserts.cpp
Normal file
@@ -0,0 +1,310 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Convert temporary assert vertices (from construction method) to
|
||||
* edge-based flags.
|
||||
*
|
||||
* This pass converts the temporary assert vertices created by the Glushkov
|
||||
* construction process above (vertices with special assertions flags) into
|
||||
* edges between those vertices' neighbours in the graph.
|
||||
*
|
||||
* These edges have the appropriate flags applied to them -- a path (u,t,v)
|
||||
* through an assert vertex t will be replaced with the edge (u,v) with the
|
||||
* assertion flags from t.
|
||||
*
|
||||
* Edges with mutually incompatible flags (such as the conjunction of
|
||||
* word-to-word and word-to-nonword) are dropped.
|
||||
*/
|
||||
#include "asserts.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_prune.h"
|
||||
#include "nfagraph/ng_redundancy.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "parser/position.h" // for POS flags
|
||||
#include "util/compile_error.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <queue>
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** Hard limit on the maximum number of edges we'll clone before we throw up
|
||||
* our hands and report 'Pattern too large.' */
|
||||
static const size_t MAX_ASSERT_EDGES = 300000;
|
||||
|
||||
/** Flags representing the word-boundary assertions, \\b or \\B. */
|
||||
static const int WORDBOUNDARY_FLAGS = POS_FLAG_ASSERT_WORD_TO_WORD
|
||||
| POS_FLAG_ASSERT_WORD_TO_NONWORD
|
||||
| POS_FLAG_ASSERT_NONWORD_TO_WORD
|
||||
| POS_FLAG_ASSERT_NONWORD_TO_NONWORD
|
||||
| POS_FLAG_ASSERT_WORD_TO_WORD_UCP
|
||||
| POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP
|
||||
| POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP
|
||||
| POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
|
||||
|
||||
#define OPEN_EDGE 0U
|
||||
#define DEAD_EDGE (~0U)
|
||||
|
||||
static
|
||||
u32 disjunct(u32 flags1, u32 flags2) {
|
||||
/* from two asserts in parallel */
|
||||
DEBUG_PRINTF("disjunct %x %x\n", flags1, flags2);
|
||||
u32 rv;
|
||||
if (flags1 == DEAD_EDGE) {
|
||||
rv = flags2;
|
||||
} else if (flags2 == DEAD_EDGE) {
|
||||
rv = flags1;
|
||||
} else if (flags1 == OPEN_EDGE || flags2 == OPEN_EDGE) {
|
||||
rv = OPEN_EDGE;
|
||||
} else {
|
||||
rv = flags1 | flags2;
|
||||
}
|
||||
DEBUG_PRINTF("--> %x\n", rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
u32 conjunct(u32 flags1, u32 flags2) {
|
||||
/* from two asserts in series */
|
||||
DEBUG_PRINTF("conjunct %x %x\n", flags1, flags2);
|
||||
u32 rv;
|
||||
if (flags1 == OPEN_EDGE) {
|
||||
rv = flags2;
|
||||
} else if (flags2 == OPEN_EDGE) {
|
||||
rv = flags1;
|
||||
} else if (flags1 & flags2) {
|
||||
rv = flags1 & flags2;
|
||||
} else {
|
||||
rv = DEAD_EDGE; /* the conjunction of two different word boundary
|
||||
* assertion is impassable */
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("--> %x\n", rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
typedef map<pair<NFAVertex, NFAVertex>, NFAEdge> edge_cache_t;
|
||||
|
||||
static
|
||||
void replaceAssertVertex(NGWrapper &g, NFAVertex t, edge_cache_t &edge_cache,
|
||||
u32 &assert_edge_count) {
|
||||
DEBUG_PRINTF("replacing assert vertex %u\n", g[t].index);
|
||||
|
||||
const u32 flags = g[t].assert_flags;
|
||||
DEBUG_PRINTF("consider assert vertex %u with flags %u\n",
|
||||
g[t].index, flags);
|
||||
|
||||
// Wire up all the predecessors to all the successors.
|
||||
|
||||
for (const auto &inEdge : in_edges_range(t, g)) {
|
||||
NFAVertex u = source(inEdge, g);
|
||||
if (u == t) {
|
||||
continue; // ignore self-loops
|
||||
}
|
||||
|
||||
const u32 flags_inc_in = conjunct(g[inEdge].assert_flags,
|
||||
flags);
|
||||
if (flags_inc_in == DEAD_EDGE) {
|
||||
DEBUG_PRINTF("fail, in-edge has bad flags %d\n",
|
||||
g[inEdge].assert_flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const auto &outEdge : out_edges_range(t, g)) {
|
||||
NFAVertex v = target(outEdge, g);
|
||||
|
||||
DEBUG_PRINTF("consider path [%u,%u,%u]\n", g[u].index,
|
||||
g[t].index, g[v].index);
|
||||
|
||||
if (v == t) {
|
||||
continue; // ignore self-loops
|
||||
}
|
||||
|
||||
const u32 flags_final = conjunct(g[outEdge].assert_flags,
|
||||
flags_inc_in);
|
||||
|
||||
if (flags_final == DEAD_EDGE) {
|
||||
DEBUG_PRINTF("fail, out-edge has bad flags %d\n",
|
||||
g[outEdge].assert_flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((g[u].assert_flags & POS_FLAG_MULTILINE_START)
|
||||
&& v == g.acceptEod) {
|
||||
DEBUG_PRINTF("fail, (?m)^ does not match \\n at eod\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Replace path (u,t,v) with direct edge (u,v), unless the edge
|
||||
* already exists, in which case we just need to edit its
|
||||
* properties.
|
||||
*
|
||||
* Use edge_cache to prevent us going O(N).
|
||||
*/
|
||||
auto cache_key = make_pair(u, v);
|
||||
auto ecit = edge_cache.find(cache_key);
|
||||
if (ecit == edge_cache.end()) {
|
||||
DEBUG_PRINTF("adding edge %u %u\n", g[u].index,
|
||||
g[v].index);
|
||||
NFAEdge e = add_edge(u, v, g).first;
|
||||
edge_cache.emplace(cache_key, e);
|
||||
g[e].assert_flags = flags;
|
||||
if (++assert_edge_count > MAX_ASSERT_EDGES) {
|
||||
throw CompileError(g.expressionIndex,
|
||||
"Pattern is too large.");
|
||||
}
|
||||
} else {
|
||||
NFAEdge e = ecit->second;
|
||||
DEBUG_PRINTF("updating edge %u %u [a %u]\n", g[u].index,
|
||||
g[v].index, g[t].index);
|
||||
// Edge already exists.
|
||||
u32 &e_flags = g[e].assert_flags;
|
||||
e_flags = disjunct(e_flags, flags_final);
|
||||
assert(e_flags != DEAD_EDGE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clear vertex t to remove all the old edges.
|
||||
/* no need to clear the cache, as we will never look up its edge as it is
|
||||
* unreachable */
|
||||
clear_vertex(t, g);
|
||||
}
|
||||
|
||||
static
|
||||
void setReportId(ReportManager &rm, NGWrapper &g, NFAVertex v, s32 adj) {
|
||||
// Don't try and set the report ID of a special vertex.
|
||||
assert(!is_special(v, g));
|
||||
|
||||
// There should be no reports set already.
|
||||
assert(g[v].reports.empty());
|
||||
|
||||
Report r = rm.getBasicInternalReport(g, adj);
|
||||
|
||||
g[v].reports.insert(rm.getInternalId(r));
|
||||
DEBUG_PRINTF("set report id for vertex %u, adj %d\n",
|
||||
g[v].index, adj);
|
||||
}
|
||||
|
||||
static
|
||||
void checkForMultilineStart(ReportManager &rm, NGWrapper &g) {
|
||||
vector<NFAEdge> dead;
|
||||
for (auto v : adjacent_vertices_range(g.start, g)) {
|
||||
if (!(g[v].assert_flags & POS_FLAG_MULTILINE_START)) {
|
||||
continue;
|
||||
}
|
||||
DEBUG_PRINTF("mls %u %08x\n", g[v].index,
|
||||
g[v].assert_flags);
|
||||
|
||||
/* we have found a multi-line start (maybe more than one) */
|
||||
|
||||
/* we need to interpose a dummy dot vertex between v and accept if
|
||||
* required so that ^ doesn't match trailing \n */
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
if (target(e, g) == g.accept) {
|
||||
dead.push_back(e);
|
||||
}
|
||||
}
|
||||
/* assert has been resolved; clear flag */
|
||||
g[v].assert_flags &= ~POS_FLAG_MULTILINE_START;
|
||||
}
|
||||
|
||||
for (const auto &e : dead) {
|
||||
NFAVertex dummy = add_vertex(g);
|
||||
g[dummy].char_reach.setall();
|
||||
setReportId(rm, g, dummy, -1);
|
||||
add_edge(source(e, g), dummy, g[e], g);
|
||||
add_edge(dummy, g.accept, g);
|
||||
}
|
||||
|
||||
remove_edges(dead, g);
|
||||
}
|
||||
|
||||
static
|
||||
bool hasAssertVertices(const NGHolder &g) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
int flags = g[v].assert_flags;
|
||||
if (flags & WORDBOUNDARY_FLAGS) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** \brief Convert temporary assert vertices (from construction method) to
|
||||
* edge-based flags.
|
||||
*
|
||||
* Remove the horrors that are the temporary assert vertices which arise from
|
||||
* our construction method. Allows the rest of our code base to live in
|
||||
* blissful ignorance of their existence. */
|
||||
void removeAssertVertices(ReportManager &rm, NGWrapper &g) {
|
||||
size_t num = 0;
|
||||
|
||||
DEBUG_PRINTF("before: graph has %zu vertices\n", num_vertices(g));
|
||||
|
||||
// Sweep over the graph and ascertain that we do actually have vertices
|
||||
// with assertion flags set. Otherwise, we're done.
|
||||
if (!hasAssertVertices(g)) {
|
||||
DEBUG_PRINTF("no assert vertices, done\n");
|
||||
return;
|
||||
}
|
||||
|
||||
u32 assert_edge_count = 0;
|
||||
|
||||
// Build a cache of (u, v) vertex pairs to edge descriptors.
|
||||
edge_cache_t edge_cache;
|
||||
for (const auto &e : edges_range(g)) {
|
||||
edge_cache[make_pair(source(e, g), target(e, g))] = e;
|
||||
}
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
if (g[v].assert_flags & WORDBOUNDARY_FLAGS) {
|
||||
replaceAssertVertex(g, v, edge_cache, assert_edge_count);
|
||||
num++;
|
||||
}
|
||||
}
|
||||
|
||||
checkForMultilineStart(rm, g);
|
||||
|
||||
if (num) {
|
||||
DEBUG_PRINTF("resolved %zu assert vertices\n", num);
|
||||
pruneUseless(g);
|
||||
pruneEmptyVertices(g);
|
||||
g.renumberVertices();
|
||||
g.renumberEdges();
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("after: graph has %zu vertices\n", num_vertices(g));
|
||||
assert(!hasAssertVertices(g));
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
51
src/compiler/asserts.h
Normal file
51
src/compiler/asserts.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Convert temporary assert vertices (from construction method) to
|
||||
* edge-based flags.
|
||||
*/
|
||||
#ifndef ASSERTS_H
|
||||
#define ASSERTS_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class ReportManager;
|
||||
class NGWrapper;
|
||||
|
||||
/** \brief Convert temporary assert vertices (from construction method) to
|
||||
* edge-based flags.
|
||||
*
|
||||
* Remove the horrors that are the temporary assert vertices which arise from
|
||||
* our construction method. Allows the rest of our code base to live in
|
||||
* blissful ignorance of their existence. */
|
||||
void removeAssertVertices(ReportManager &rm, NGWrapper &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // ASSERTS_H
|
||||
459
src/compiler/compiler.cpp
Normal file
459
src/compiler/compiler.cpp
Normal file
@@ -0,0 +1,459 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compiler front-end interface.
|
||||
*/
|
||||
#include "asserts.h"
|
||||
#include "compiler.h"
|
||||
#include "database.h"
|
||||
#include "grey.h"
|
||||
#include "hs_internal.h"
|
||||
#include "hs_runtime.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfagraph/ng_builder.h"
|
||||
#include "nfagraph/ng_dump.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "parser/buildstate.h"
|
||||
#include "parser/dump.h"
|
||||
#include "parser/Component.h"
|
||||
#include "parser/parse_error.h"
|
||||
#include "parser/Parser.h" // for flags
|
||||
#include "parser/position.h"
|
||||
#include "parser/position_dump.h"
|
||||
#include "parser/position_info.h"
|
||||
#include "parser/prefilter.h"
|
||||
#include "parser/shortcut_literal.h"
|
||||
#include "parser/unsupported.h"
|
||||
#include "parser/utf8_validate.h"
|
||||
#include "smallwrite/smallwrite_build.h"
|
||||
#include "rose/rose_build.h"
|
||||
#include "rose/rose_build_dump.h"
|
||||
#include "som/slot_manager_dump.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
|
||||
static
|
||||
void validateExt(const hs_expr_ext &ext) {
|
||||
static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET |
|
||||
HS_EXT_FLAG_MAX_OFFSET |
|
||||
HS_EXT_FLAG_MIN_LENGTH;
|
||||
if (ext.flags & ~ALL_EXT_FLAGS) {
|
||||
throw CompileError("Invalid hs_expr_ext flag set.");
|
||||
}
|
||||
|
||||
if ((ext.flags & HS_EXT_FLAG_MIN_OFFSET) &&
|
||||
(ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
|
||||
(ext.min_offset > ext.max_offset)) {
|
||||
throw CompileError("In hs_expr_ext, min_offset must be less than or "
|
||||
"equal to max_offset.");
|
||||
}
|
||||
|
||||
if ((ext.flags & HS_EXT_FLAG_MIN_LENGTH) &&
|
||||
(ext.flags & HS_EXT_FLAG_MAX_OFFSET) &&
|
||||
(ext.min_length > ext.max_offset)) {
|
||||
throw CompileError("In hs_expr_ext, min_length must be less than or "
|
||||
"equal to max_offset.");
|
||||
}
|
||||
}
|
||||
|
||||
ParsedExpression::ParsedExpression(unsigned index_in, const char *expression,
|
||||
unsigned flags, ReportID actionId,
|
||||
const hs_expr_ext *ext)
|
||||
: utf8(false),
|
||||
allow_vacuous(flags & HS_FLAG_ALLOWEMPTY),
|
||||
highlander(flags & HS_FLAG_SINGLEMATCH),
|
||||
prefilter(flags & HS_FLAG_PREFILTER),
|
||||
som(SOM_NONE),
|
||||
index(index_in),
|
||||
id(actionId),
|
||||
min_offset(0),
|
||||
max_offset(MAX_OFFSET),
|
||||
min_length(0) {
|
||||
ParseMode mode(flags);
|
||||
|
||||
component = parse(expression, mode);
|
||||
|
||||
utf8 = mode.utf8; /* utf8 may be set by parse() */
|
||||
|
||||
if (utf8 && !isValidUtf8(expression)) {
|
||||
throw ParseError("Expression is not valid UTF-8.");
|
||||
}
|
||||
|
||||
if (!component) {
|
||||
assert(0); // parse() should have thrown a ParseError.
|
||||
throw ParseError("Parse error.");
|
||||
}
|
||||
|
||||
if (flags & ~HS_FLAG_ALL) {
|
||||
DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags);
|
||||
throw CompileError("Unrecognised flag.");
|
||||
}
|
||||
|
||||
// FIXME: we disallow highlander + SOM, see UE-1850.
|
||||
if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) {
|
||||
throw CompileError("HS_FLAG_SINGLEMATCH is not supported in "
|
||||
"combination with HS_FLAG_SOM_LEFTMOST.");
|
||||
}
|
||||
|
||||
// FIXME: we disallow prefilter + SOM, see UE-1899.
|
||||
if ((flags & HS_FLAG_PREFILTER) && (flags & HS_FLAG_SOM_LEFTMOST)) {
|
||||
throw CompileError("HS_FLAG_PREFILTER is not supported in "
|
||||
"combination with HS_FLAG_SOM_LEFTMOST.");
|
||||
}
|
||||
|
||||
// Set SOM type.
|
||||
if (flags & HS_FLAG_SOM_LEFTMOST) {
|
||||
som = SOM_LEFT;
|
||||
}
|
||||
|
||||
// Set extended parameters, if we have them.
|
||||
if (ext) {
|
||||
// Ensure that the given parameters make sense.
|
||||
validateExt(*ext);
|
||||
|
||||
if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) {
|
||||
min_offset = ext->min_offset;
|
||||
}
|
||||
if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) {
|
||||
max_offset = ext->max_offset;
|
||||
}
|
||||
if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) {
|
||||
min_length = ext->min_length;
|
||||
}
|
||||
}
|
||||
|
||||
// These are validated in validateExt, so an error will already have been
|
||||
// thrown if these conditions don't hold.
|
||||
assert(max_offset >= min_offset);
|
||||
assert(max_offset >= min_length);
|
||||
|
||||
// Since prefiltering and SOM aren't supported together, we must squash any
|
||||
// min_length constraint as well.
|
||||
if (flags & HS_FLAG_PREFILTER && min_length) {
|
||||
DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n");
|
||||
min_length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(DUMP_SUPPORT) || defined(DEBUG)
|
||||
/**
|
||||
* \brief Dumps the parse tree to screen in debug mode and to disk in dump
|
||||
* mode.
|
||||
*/
|
||||
void dumpExpression(UNUSED const ParsedExpression &expr,
|
||||
UNUSED const char *stage, UNUSED const Grey &grey) {
|
||||
#if defined(DEBUG)
|
||||
DEBUG_PRINTF("===== Rule ID: %u (internalID: %u) =====\n", expr.id,
|
||||
expr.index);
|
||||
ostringstream debug_tree;
|
||||
dumpTree(debug_tree, expr.component.get());
|
||||
printf("%s\n", debug_tree.str().c_str());
|
||||
#endif // DEBUG
|
||||
|
||||
#if defined(DUMP_SUPPORT)
|
||||
if (grey.dumpFlags & Grey::DUMP_PARSE) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "Expr_" << expr.index << "_componenttree_"
|
||||
<< stage << ".txt";
|
||||
ofstream out(ss.str().c_str());
|
||||
out << "Component Tree for " << expr.id << endl;
|
||||
dumpTree(out, expr.component.get());
|
||||
if (expr.utf8) {
|
||||
out << "UTF8 mode" << endl;
|
||||
}
|
||||
}
|
||||
#endif // DEBUG
|
||||
}
|
||||
#endif
|
||||
|
||||
/** \brief Run Component tree optimisations on \a expr. */
|
||||
static
|
||||
void optimise(ParsedExpression &expr) {
|
||||
if (expr.min_length || expr.som) {
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("optimising\n");
|
||||
expr.component->optimise(true /* root is connected to sds */);
|
||||
}
|
||||
|
||||
void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID id) {
|
||||
assert(expression);
|
||||
const CompileContext &cc = ng.cc;
|
||||
DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n", index, id, flags,
|
||||
expression);
|
||||
|
||||
// Ensure that our pattern isn't too long (in characters).
|
||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
||||
throw CompileError("Pattern length exceeds limit.");
|
||||
}
|
||||
|
||||
// Do per-expression processing: errors here will result in an exception
|
||||
// being thrown up to our caller
|
||||
ParsedExpression expr(index, expression, flags, id, ext);
|
||||
dumpExpression(expr, "orig", cc.grey);
|
||||
|
||||
// Apply prefiltering transformations if desired.
|
||||
if (expr.prefilter) {
|
||||
prefilterTree(expr.component, ParseMode(flags));
|
||||
dumpExpression(expr, "prefiltered", cc.grey);
|
||||
}
|
||||
|
||||
// Expressions containing zero-width assertions and other extended pcre
|
||||
// types aren't supported yet. This call will throw a ParseError exception
|
||||
// if the component tree contains such a construct.
|
||||
checkUnsupported(*expr.component);
|
||||
|
||||
expr.component->checkEmbeddedStartAnchor(true);
|
||||
expr.component->checkEmbeddedEndAnchor(true);
|
||||
|
||||
if (cc.grey.optimiseComponentTree) {
|
||||
optimise(expr);
|
||||
dumpExpression(expr, "opt", cc.grey);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n",
|
||||
expr.component.get(), expr.index, expr.id);
|
||||
|
||||
// You can only use the SOM flags if you've also specified an SOM
|
||||
// precision mode.
|
||||
if (expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) {
|
||||
throw CompileError("To use a SOM expression flag in streaming mode, "
|
||||
"an SOM precision mode (e.g. "
|
||||
"HS_MODE_SOM_HORIZON_LARGE) must be specified.");
|
||||
}
|
||||
|
||||
// If this expression is a literal, we can feed it directly to Rose rather
|
||||
// than building the NFA graph.
|
||||
if (shortcutLiteral(ng, expr)) {
|
||||
DEBUG_PRINTF("took literal short cut\n");
|
||||
return;
|
||||
}
|
||||
|
||||
unique_ptr<NGWrapper> g = buildWrapper(ng.rm, cc, expr);
|
||||
|
||||
if (!g) {
|
||||
DEBUG_PRINTF("NFA build failed on ID %u, but no exception was "
|
||||
"thrown.\n", expr.id);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
|
||||
if (!expr.allow_vacuous && matches_everywhere(*g)) {
|
||||
throw CompileError("Pattern matches empty buffer; use "
|
||||
"HS_FLAG_ALLOWEMPTY to enable support.");
|
||||
}
|
||||
|
||||
if (!ng.addGraph(*g)) {
|
||||
DEBUG_PRINTF("NFA addGraph failed on ID %u.\n", expr.id);
|
||||
throw CompileError("Error compiling expression.");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
|
||||
const u32 minWidth =
|
||||
ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF;
|
||||
auto rose = ng.rose->buildRose(minWidth);
|
||||
|
||||
if (!rose) {
|
||||
DEBUG_PRINTF("error building rose\n");
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* avoid building a smwr if just a pure floating case. */
|
||||
if (!roseIsPureLiteral(rose.get())) {
|
||||
u32 qual = roseQuality(rose.get());
|
||||
auto smwr = ng.smwr->build(qual);
|
||||
if (smwr) {
|
||||
rose = roseAddSmallWrite(rose.get(), smwr.get());
|
||||
}
|
||||
}
|
||||
|
||||
dumpRose(*ng.rose, rose.get(), ng.cc.grey);
|
||||
dumpReportManager(ng.rm, ng.cc.grey);
|
||||
dumpSomSlotManager(ng.ssm, ng.cc.grey);
|
||||
dumpSmallWrite(rose.get(), ng.cc.grey);
|
||||
|
||||
return rose;
|
||||
}
|
||||
|
||||
platform_t target_to_platform(const target_t &target_info) {
|
||||
platform_t p;
|
||||
p = 0;
|
||||
|
||||
if (!target_info.has_avx2()) {
|
||||
p |= HS_PLATFORM_NOAVX2;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
struct hs_database *build(NG &ng, unsigned int *length) {
|
||||
assert(length);
|
||||
|
||||
auto rose = generateRoseEngine(ng);
|
||||
if (!rose) {
|
||||
throw CompileError("Unable to generate bytecode.");
|
||||
}
|
||||
*length = roseSize(rose.get());
|
||||
if (!*length) {
|
||||
DEBUG_PRINTF("RoseEngine has zero length\n");
|
||||
assert(0);
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
|
||||
const char *bytecode = (const char *)(rose.get());
|
||||
const platform_t p = target_to_platform(ng.cc.target_info);
|
||||
struct hs_database *db = dbCreate(bytecode, *length, p);
|
||||
if (!db) {
|
||||
throw CompileError("Could not allocate memory for bytecode.");
|
||||
}
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
static
|
||||
void stripFromPositions(vector<PositionInfo> &v, Position pos) {
|
||||
auto removed = remove(v.begin(), v.end(), PositionInfo(pos));
|
||||
v.erase(removed, v.end());
|
||||
}
|
||||
|
||||
static
|
||||
void connectInitialStates(GlushkovBuildState &bs,
|
||||
const ParsedExpression &expr) {
|
||||
vector<PositionInfo> initials = expr.component->first();
|
||||
const NFABuilder &builder = bs.getBuilder();
|
||||
const Position startState = builder.getStart();
|
||||
const Position startDotStarState = builder.getStartDotStar();
|
||||
|
||||
DEBUG_PRINTF("wiring initials = %s\n",
|
||||
dumpPositions(initials.begin(), initials.end()).c_str());
|
||||
|
||||
vector<PositionInfo> starts = {startState, startDotStarState};
|
||||
|
||||
// strip start and startDs, which can be present due to boundaries
|
||||
stripFromPositions(initials, startState);
|
||||
stripFromPositions(initials, startDotStarState);
|
||||
|
||||
// replace epsilons with accepts
|
||||
for (const auto &s : initials) {
|
||||
if (s.pos != GlushkovBuildState::POS_EPSILON) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(starts.size() == 2); /* start, startds */
|
||||
vector<PositionInfo> starts_temp = starts;
|
||||
starts_temp[0].flags = s.flags;
|
||||
starts_temp[1].flags = s.flags;
|
||||
bs.connectAccepts(starts_temp);
|
||||
}
|
||||
|
||||
if (!initials.empty()) {
|
||||
bs.connectRegions(starts, initials);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void connectFinalStates(GlushkovBuildState &bs, const ParsedExpression &expr) {
|
||||
vector<PositionInfo> finals = expr.component->last();
|
||||
|
||||
DEBUG_PRINTF("wiring finals = %s\n",
|
||||
dumpPositions(finals.begin(), finals.end()).c_str());
|
||||
|
||||
bs.connectAccepts(finals);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
static
|
||||
bool isSupported(const Component &c) {
|
||||
try {
|
||||
checkUnsupported(c);
|
||||
return true;
|
||||
}
|
||||
catch (ParseError &) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
unique_ptr<NGWrapper> buildWrapper(ReportManager &rm, const CompileContext &cc,
|
||||
const ParsedExpression &expr) {
|
||||
assert(isSupported(*expr.component));
|
||||
|
||||
const unique_ptr<NFABuilder> builder = makeNFABuilder(rm, cc, expr);
|
||||
assert(builder);
|
||||
|
||||
// Set up START and ACCEPT states; retrieve the special states
|
||||
const auto bs = makeGlushkovBuildState(*builder, expr.prefilter);
|
||||
|
||||
// Map position IDs to characters/components
|
||||
expr.component->notePositions(*bs);
|
||||
|
||||
// Wire the start dotstar state to the firsts
|
||||
connectInitialStates(*bs, expr);
|
||||
|
||||
DEBUG_PRINTF("wire up body of expr\n");
|
||||
// Build the rest of the FOLLOW set
|
||||
vector<PositionInfo> initials = {builder->getStartDotStar(),
|
||||
builder->getStart()};
|
||||
expr.component->buildFollowSet(*bs, initials);
|
||||
|
||||
// Wire the lasts to the accept state
|
||||
connectFinalStates(*bs, expr);
|
||||
|
||||
// Create our edges
|
||||
bs->buildEdges();
|
||||
|
||||
auto g = builder->getGraph();
|
||||
assert(g);
|
||||
|
||||
dumpDotWrapper(*g, "00_before_asserts", cc.grey);
|
||||
removeAssertVertices(rm, *g);
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
152
src/compiler/compiler.h
Normal file
152
src/compiler/compiler.h
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compiler front-end interface
|
||||
*/
|
||||
|
||||
#ifndef COMPILER_H
|
||||
#define COMPILER_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "database.h"
|
||||
#include "parser/Component.h"
|
||||
#include "som/som.h"
|
||||
|
||||
#include <memory>
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
struct hs_database;
|
||||
struct hs_expr_ext;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CompileContext;
|
||||
struct Grey;
|
||||
struct target_t;
|
||||
class NG;
|
||||
class ReportManager;
|
||||
class NGWrapper;
|
||||
|
||||
/** Class gathering together the pieces of a parsed expression.
|
||||
* Note: Owns the provided component.
|
||||
*/
|
||||
class ParsedExpression : boost::noncopyable {
|
||||
public:
|
||||
ParsedExpression(unsigned index, const char *expression, unsigned flags,
|
||||
ReportID actionId, const hs_expr_ext *ext = nullptr);
|
||||
|
||||
bool utf8; //!< UTF-8 mode flag specified
|
||||
|
||||
/** \brief root node of parsed component tree. */
|
||||
std::unique_ptr<ue2::Component> component;
|
||||
|
||||
const bool allow_vacuous; //!< HS_FLAG_ALLOWEMPTY specified
|
||||
const bool highlander; //!< HS_FLAG_SINGLEMATCH specified
|
||||
const bool prefilter; //!< HS_FLAG_PREFILTER specified
|
||||
som_type som; //!< chosen SOM mode, or SOM_NONE
|
||||
|
||||
/** \brief index in expressions array passed to \ref hs_compile_multi */
|
||||
const unsigned index;
|
||||
|
||||
const ReportID id; //!< user-specified pattern ID
|
||||
u64a min_offset; //!< 0 if not used
|
||||
u64a max_offset; //!< MAX_OFFSET if not used
|
||||
u64a min_length; //!< 0 if not used
|
||||
};
|
||||
|
||||
/**
|
||||
* Add an expression to the compiler.
|
||||
*
|
||||
* @param ng
|
||||
* The global NG object.
|
||||
* @param index
|
||||
* The index of the expression (used for errors)
|
||||
* @param expression
|
||||
* NULL-terminated PCRE expression
|
||||
* @param flags
|
||||
* The full set of Hyperscan flags associated with this rule.
|
||||
* @param ext
|
||||
* Struct containing extra parameters for this expression, or NULL if
|
||||
* none.
|
||||
* @param actionId
|
||||
* The identifier to associate with the expression; returned by engine on
|
||||
* match.
|
||||
*/
|
||||
void addExpression(NG &ng, unsigned index, const char *expression,
|
||||
unsigned flags, const hs_expr_ext *ext, ReportID actionId);
|
||||
|
||||
/**
|
||||
* Build a Hyperscan database out of the expressions we've been given. A
|
||||
* fatal error will result in an exception being thrown.
|
||||
*
|
||||
* @param ng
|
||||
* The global NG object.
|
||||
* @param[out] length
|
||||
* The number of bytes occupied by the compiled structure.
|
||||
* @return
|
||||
* The compiled structure. Should be deallocated with the
|
||||
* hs_database_free() function.
|
||||
*/
|
||||
struct hs_database *build(NG &ng, unsigned int *length);
|
||||
|
||||
/**
|
||||
* Constructs an NFA graph from the given expression tree.
|
||||
*
|
||||
* @param rm
|
||||
* Global ReportManager for this compile.
|
||||
* @param cc
|
||||
* Global compile context for this compile.
|
||||
* @param expr
|
||||
* ParsedExpression object.
|
||||
* @return
|
||||
* nullptr on error.
|
||||
*/
|
||||
std::unique_ptr<NGWrapper> buildWrapper(ReportManager &rm,
|
||||
const CompileContext &cc,
|
||||
const ParsedExpression &expr);
|
||||
|
||||
/**
|
||||
* Build a platform_t out of a target_t.
|
||||
*/
|
||||
platform_t target_to_platform(const target_t &target_info);
|
||||
|
||||
#if defined(DUMP_SUPPORT) || defined(DEBUG)
|
||||
void dumpExpression(const ParsedExpression &expr, const char *stage,
|
||||
const Grey &grey);
|
||||
#else
|
||||
static really_inline
|
||||
void dumpExpression(UNUSED const ParsedExpression &expr,
|
||||
UNUSED const char *stage, UNUSED const Grey &grey) {
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // COMPILER_H
|
||||
95
src/compiler/error.cpp
Normal file
95
src/compiler/error.cpp
Normal file
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compile-time error utils.
|
||||
*/
|
||||
#include "allocator.h"
|
||||
#include "error.h"
|
||||
#include "ue2common.h"
|
||||
#include "hs_compile.h"
|
||||
#include "util/compile_error.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
using std::string;
|
||||
|
||||
static const char failureNoMemory[] = "Unable to allocate memory.";
|
||||
static const char failureInternal[] = "Internal error.";
|
||||
|
||||
extern const hs_compile_error_t hs_enomem = {
|
||||
const_cast<char *>(failureNoMemory), 0
|
||||
};
|
||||
extern const hs_compile_error_t hs_einternal = {
|
||||
const_cast<char *>(failureInternal), 0
|
||||
};
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
hs_compile_error_t *generateCompileError(const string &err, int expression) {
|
||||
hs_compile_error_t *ret =
|
||||
(struct hs_compile_error *)hs_misc_alloc(sizeof(hs_compile_error_t));
|
||||
if (ret) {
|
||||
char *msg = (char *)hs_misc_alloc(err.size() + 1);
|
||||
if (msg) {
|
||||
memcpy(msg, err.c_str(), err.size() + 1);
|
||||
ret->message = msg;
|
||||
} else {
|
||||
hs_misc_free(ret);
|
||||
ret = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret || !ret->message) {
|
||||
return const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
}
|
||||
|
||||
ret->expression = expression;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
hs_compile_error_t *generateCompileError(const CompileError &e) {
|
||||
return generateCompileError(e.reason, e.hasIndex ? (int)e.index : -1);
|
||||
}
|
||||
|
||||
void freeCompileError(hs_compile_error_t *error) {
|
||||
if (!error) {
|
||||
return;
|
||||
}
|
||||
if (error == &hs_enomem || error == &hs_einternal) {
|
||||
// These are not allocated.
|
||||
return;
|
||||
}
|
||||
|
||||
hs_misc_free(error->message);
|
||||
hs_misc_free(error);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
55
src/compiler/error.h
Normal file
55
src/compiler/error.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compile-time error utils.
|
||||
*/
|
||||
|
||||
#ifndef COMPILE_ERROR_H
|
||||
#define COMPILE_ERROR_H
|
||||
|
||||
#include <string>
|
||||
|
||||
struct hs_compile_error;
|
||||
|
||||
// Special errors that aren't allocated with hs_alloc/hs_free.
|
||||
extern const hs_compile_error hs_enomem;
|
||||
extern const hs_compile_error hs_einternal;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CompileError;
|
||||
|
||||
hs_compile_error *generateCompileError(const std::string &err, int expression);
|
||||
hs_compile_error *generateCompileError(const CompileError &e);
|
||||
|
||||
void freeCompileError(hs_compile_error *error);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
652
src/crc32.c
Normal file
652
src/crc32.c
Normal file
@@ -0,0 +1,652 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "crc32.h"
|
||||
#include "config.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#if defined(HAVE_C_X86INTRIN_H)
|
||||
#include <x86intrin.h>
|
||||
#elif defined(HAVE_C_INTRIN_H)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifndef __SSE4_2__
|
||||
|
||||
/***
|
||||
*** What follows is derived from Intel's Slicing-by-8 CRC32 impl, which is BSD
|
||||
*** licensed and available from http://sourceforge.net/projects/slicing-by-8/
|
||||
***/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2004-2006 Intel Corporation - All Rights Reserved
|
||||
*
|
||||
*
|
||||
* This software program is licensed subject to the BSD License,
|
||||
* available at http://www.opensource.org/licenses/bsd-license.html.
|
||||
*
|
||||
* Abstract:
|
||||
*
|
||||
* Tables for software CRC generation
|
||||
*/
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o32[256] =
|
||||
{
|
||||
0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
|
||||
0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
|
||||
0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
|
||||
0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
|
||||
0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
|
||||
0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
|
||||
0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
|
||||
0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
|
||||
0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
|
||||
0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
|
||||
0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
|
||||
0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
|
||||
0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
|
||||
0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
|
||||
0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
|
||||
0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
|
||||
0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
|
||||
0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
|
||||
0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
|
||||
0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
|
||||
0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
|
||||
0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
|
||||
0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
|
||||
0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
|
||||
0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
|
||||
0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
|
||||
0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
|
||||
0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
|
||||
0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
|
||||
0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
|
||||
0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
|
||||
0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o32
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o40[256] =
|
||||
{
|
||||
0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945,
|
||||
0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD,
|
||||
0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
|
||||
0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C,
|
||||
0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47,
|
||||
0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
|
||||
0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6,
|
||||
0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E,
|
||||
0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
|
||||
0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9,
|
||||
0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0,
|
||||
0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
|
||||
0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43,
|
||||
0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB,
|
||||
0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
|
||||
0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A,
|
||||
0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC,
|
||||
0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
|
||||
0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D,
|
||||
0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185,
|
||||
0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
|
||||
0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306,
|
||||
0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F,
|
||||
0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
|
||||
0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8,
|
||||
0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600,
|
||||
0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
|
||||
0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781,
|
||||
0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA,
|
||||
0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
|
||||
0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B,
|
||||
0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o40
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o48[256] =
|
||||
{
|
||||
0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469,
|
||||
0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC,
|
||||
0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
|
||||
0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726,
|
||||
0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D,
|
||||
0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
|
||||
0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7,
|
||||
0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32,
|
||||
0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
|
||||
0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75,
|
||||
0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A,
|
||||
0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
|
||||
0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4,
|
||||
0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161,
|
||||
0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
|
||||
0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB,
|
||||
0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A,
|
||||
0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
|
||||
0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0,
|
||||
0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065,
|
||||
0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
|
||||
0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB,
|
||||
0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4,
|
||||
0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
|
||||
0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3,
|
||||
0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36,
|
||||
0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
|
||||
0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC,
|
||||
0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7,
|
||||
0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
|
||||
0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D,
|
||||
0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o48
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o56[256] =
|
||||
{
|
||||
0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA,
|
||||
0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C,
|
||||
0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
|
||||
0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11,
|
||||
0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41,
|
||||
0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
|
||||
0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C,
|
||||
0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A,
|
||||
0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
|
||||
0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB,
|
||||
0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610,
|
||||
0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
|
||||
0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6,
|
||||
0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040,
|
||||
0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
|
||||
0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D,
|
||||
0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5,
|
||||
0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
|
||||
0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8,
|
||||
0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E,
|
||||
0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
|
||||
0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698,
|
||||
0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443,
|
||||
0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
|
||||
0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12,
|
||||
0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4,
|
||||
0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
|
||||
0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9,
|
||||
0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99,
|
||||
0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
|
||||
0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4,
|
||||
0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o56
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o64[256] =
|
||||
{
|
||||
0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44,
|
||||
0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5,
|
||||
0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
|
||||
0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406,
|
||||
0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13,
|
||||
0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
|
||||
0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0,
|
||||
0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151,
|
||||
0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
|
||||
0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B,
|
||||
0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539,
|
||||
0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
|
||||
0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD,
|
||||
0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C,
|
||||
0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
|
||||
0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF,
|
||||
0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18,
|
||||
0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
|
||||
0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB,
|
||||
0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A,
|
||||
0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
|
||||
0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE,
|
||||
0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C,
|
||||
0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
|
||||
0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6,
|
||||
0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27,
|
||||
0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
|
||||
0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4,
|
||||
0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1,
|
||||
0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
|
||||
0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532,
|
||||
0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o64
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o72[256] =
|
||||
{
|
||||
0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD,
|
||||
0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2,
|
||||
0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
|
||||
0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C,
|
||||
0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20,
|
||||
0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
|
||||
0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E,
|
||||
0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201,
|
||||
0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
|
||||
0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59,
|
||||
0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778,
|
||||
0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
|
||||
0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB,
|
||||
0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4,
|
||||
0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
|
||||
0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA,
|
||||
0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B,
|
||||
0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
|
||||
0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45,
|
||||
0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A,
|
||||
0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
|
||||
0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9,
|
||||
0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8,
|
||||
0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
|
||||
0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090,
|
||||
0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F,
|
||||
0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
|
||||
0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1,
|
||||
0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D,
|
||||
0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
|
||||
0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623,
|
||||
0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o72
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o80[256] =
|
||||
{
|
||||
0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089,
|
||||
0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA,
|
||||
0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
|
||||
0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C,
|
||||
0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334,
|
||||
0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
|
||||
0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992,
|
||||
0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1,
|
||||
0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
|
||||
0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0,
|
||||
0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55,
|
||||
0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
|
||||
0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E,
|
||||
0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D,
|
||||
0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
|
||||
0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB,
|
||||
0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D,
|
||||
0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
|
||||
0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB,
|
||||
0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988,
|
||||
0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
|
||||
0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093,
|
||||
0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766,
|
||||
0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
|
||||
0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907,
|
||||
0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454,
|
||||
0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
|
||||
0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2,
|
||||
0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA,
|
||||
0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
|
||||
0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C,
|
||||
0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o80
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The following CRC lookup table was generated automagically
|
||||
* using the following model parameters:
|
||||
*
|
||||
* Generator Polynomial = ................. 0x1EDC6F41
|
||||
* Generator Polynomial Length = .......... 32 bits
|
||||
* Reflected Bits = ....................... TRUE
|
||||
* Table Generation Offset = .............. 32 bits
|
||||
* Number of Slices = ..................... 8 slices
|
||||
* Slice Lengths = ........................ 8 8 8 8 8 8 8 8
|
||||
* Directory Name = ....................... .\
|
||||
* File Name = ............................ 8x256_tables.c
|
||||
*/
|
||||
|
||||
static
|
||||
u32 crc_tableil8_o88[256] =
|
||||
{
|
||||
0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504,
|
||||
0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE,
|
||||
0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
|
||||
0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A,
|
||||
0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D,
|
||||
0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
|
||||
0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929,
|
||||
0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3,
|
||||
0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
|
||||
0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC,
|
||||
0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782,
|
||||
0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
|
||||
0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF,
|
||||
0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75,
|
||||
0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
|
||||
0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1,
|
||||
0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360,
|
||||
0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
|
||||
0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4,
|
||||
0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E,
|
||||
0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
|
||||
0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223,
|
||||
0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D,
|
||||
0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
|
||||
0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852,
|
||||
0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88,
|
||||
0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
|
||||
0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C,
|
||||
0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB,
|
||||
0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
|
||||
0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F,
|
||||
0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5
|
||||
};
|
||||
|
||||
/*
|
||||
* end of the CRC lookup table crc_tableil8_o88
|
||||
*/
|
||||
|
||||
//#define VERIFY_ASSERTION
|
||||
|
||||
#ifdef VERIFY_ASSERTION
|
||||
|
||||
// Trivial byte-by-byte version: you can switch on the assertion in the
|
||||
// Crc32_ComputeBuf function (by defining VERIFY_ASSERTION) to check this
|
||||
// against the slicing variant.
|
||||
static really_inline
|
||||
u32 crc32c(u32 running_crc, const unsigned char* p_buf, size_t length) {
|
||||
u32 crc = running_crc;
|
||||
while (length--) {
|
||||
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
#endif // VERIFY_ASSERTION
|
||||
|
||||
// Slicing-by-8 approach, which is much faster. Derived from Intel's
|
||||
// BSD-licensed code, with additions to handled aligned case automatically.
|
||||
static really_inline
|
||||
u32 crc32c_sb8_64_bit(u32 running_crc, const unsigned char* p_buf,
|
||||
const size_t length) {
|
||||
u32 crc = running_crc;
|
||||
|
||||
// Process byte-by-byte until p_buf is aligned
|
||||
|
||||
const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, 4);
|
||||
size_t init_bytes = aligned_buf - p_buf;
|
||||
size_t running_length = ((length - init_bytes)/8)*8;
|
||||
size_t end_bytes = length - init_bytes - running_length;
|
||||
|
||||
while (p_buf < aligned_buf) {
|
||||
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
|
||||
}
|
||||
|
||||
// Main aligned loop, processes eight bytes at a time.
|
||||
|
||||
u32 term1, term2;
|
||||
for (size_t li = 0; li < running_length/8; li++) {
|
||||
u32 block = *(const u32 *)p_buf;
|
||||
crc ^= block;
|
||||
p_buf += 4;
|
||||
term1 = crc_tableil8_o88[crc & 0x000000FF] ^
|
||||
crc_tableil8_o80[(crc >> 8) & 0x000000FF];
|
||||
term2 = crc >> 16;
|
||||
crc = term1 ^
|
||||
crc_tableil8_o72[term2 & 0x000000FF] ^
|
||||
crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
|
||||
|
||||
|
||||
block = *(const u32 *)p_buf;
|
||||
|
||||
term1 = crc_tableil8_o56[block & 0x000000FF] ^
|
||||
crc_tableil8_o48[(block >> 8) & 0x000000FF];
|
||||
|
||||
term2 = block >> 16;
|
||||
crc = crc ^
|
||||
term1 ^
|
||||
crc_tableil8_o40[term2 & 0x000000FF] ^
|
||||
crc_tableil8_o32[(term2 >> 8) & 0x000000FF];
|
||||
p_buf += 4;
|
||||
}
|
||||
|
||||
// Remaining bytes
|
||||
|
||||
for(size_t li = 0; li < end_bytes; li++) {
|
||||
crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
#else // __SSE4_2__
|
||||
|
||||
#ifdef ARCH_64_BIT
|
||||
#define CRC_WORD 8
|
||||
#define CRC_TYPE u64a
|
||||
#define CRC_FUNC _mm_crc32_u64
|
||||
#else
|
||||
#define CRC_WORD 4
|
||||
#define CRC_TYPE u32
|
||||
#define CRC_FUNC _mm_crc32_u32
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use the crc32 instruction from SSE4.2 to compute our checksum - same
|
||||
* polynomial as the above function.
|
||||
*/
|
||||
static really_inline
|
||||
u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf,
|
||||
const size_t length) {
|
||||
u32 crc = running_crc;
|
||||
|
||||
// Process byte-by-byte until p_buf is aligned
|
||||
|
||||
const unsigned char *aligned_buf = ROUNDUP_PTR(p_buf, CRC_WORD);
|
||||
size_t init_bytes = aligned_buf - p_buf;
|
||||
size_t running_length = ((length - init_bytes)/CRC_WORD)*CRC_WORD;
|
||||
size_t end_bytes = length - init_bytes - running_length;
|
||||
|
||||
while (p_buf < aligned_buf) {
|
||||
crc = _mm_crc32_u8(crc, *p_buf++);
|
||||
}
|
||||
|
||||
// Main aligned loop, processes a word at a time.
|
||||
|
||||
for (size_t li = 0; li < running_length/CRC_WORD; li++) {
|
||||
CRC_TYPE block = *(const CRC_TYPE *)p_buf;
|
||||
crc = CRC_FUNC(crc, block);
|
||||
p_buf += CRC_WORD;
|
||||
}
|
||||
|
||||
// Remaining bytes
|
||||
|
||||
for(size_t li = 0; li < end_bytes; li++) {
|
||||
crc = _mm_crc32_u8(crc, *p_buf++);
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VERIFY_ASSERTION
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
// Externally visible function
|
||||
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
|
||||
#ifdef __SSE4_2__
|
||||
u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
|
||||
#else
|
||||
u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
|
||||
#endif
|
||||
|
||||
#ifdef VERIFY_ASSERTION
|
||||
assert(crc == crc32c(inCrc32, (const unsigned char *)buf, bufLen));
|
||||
#endif
|
||||
|
||||
return crc;
|
||||
}
|
||||
46
src/crc32.h
Normal file
46
src/crc32.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CRC32_H_36A5015B5840C1
|
||||
#define CRC32_H_36A5015B5840C1
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CRC32_H_36A5015B5840C1 */
|
||||
|
||||
507
src/database.c
Normal file
507
src/database.c
Normal file
@@ -0,0 +1,507 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime code for hs_database manipulation.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "allocator.h"
|
||||
#include "hs_common.h"
|
||||
#include "hs_internal.h"
|
||||
#include "hs_version.h"
|
||||
#include "ue2common.h"
|
||||
#include "database.h"
|
||||
#include "crc32.h"
|
||||
#include "rose/rose_internal.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
static really_inline
|
||||
int db_correctly_aligned(const void *db) {
|
||||
return ISALIGNED_N(db, alignof(unsigned long long));
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_free_database(hs_database_t *db) {
|
||||
if (db && db->magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
hs_database_free(db);
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
|
||||
size_t *serialized_length) {
|
||||
if (!db || !bytes || !serialized_length) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
if (!db_correctly_aligned(db)) {
|
||||
return HS_BAD_ALIGN;
|
||||
}
|
||||
|
||||
hs_error_t ret = validDatabase(db);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t length = sizeof(struct hs_database) + db->length;
|
||||
|
||||
char *out = hs_misc_alloc(length);
|
||||
ret = hs_check_alloc(out);
|
||||
if (ret != HS_SUCCESS) {
|
||||
hs_misc_free(out);
|
||||
return ret;
|
||||
}
|
||||
|
||||
memset(out, 0, length);
|
||||
|
||||
u32 *buf = (u32 *)out;
|
||||
*buf = db->magic;
|
||||
buf++;
|
||||
*buf = db->version;
|
||||
buf++;
|
||||
*buf = db->length;
|
||||
buf++;
|
||||
memcpy(buf, &db->platform, sizeof(u64a));
|
||||
buf += 2;
|
||||
*buf = db->crc32;
|
||||
buf++;
|
||||
*buf = db->reserved0;
|
||||
buf++;
|
||||
*buf = db->reserved1;
|
||||
buf++;
|
||||
|
||||
const char *bytecode = hs_get_bytecode(db);
|
||||
memcpy(buf, bytecode, db->length);
|
||||
|
||||
*bytes = out;
|
||||
*serialized_length = length;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
// check that the database header's platform is compatible with the current
|
||||
// runtime platform.
|
||||
static
|
||||
hs_error_t db_check_platform(const u64a p) {
|
||||
if (p != hs_current_platform
|
||||
&& p != hs_current_platform_no_avx2) {
|
||||
return HS_DB_PLATFORM_ERROR;
|
||||
}
|
||||
// passed all checks
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
// Decode and check the database header, returning appropriate errors or
|
||||
// HS_SUCCESS if it's OK. The header should be allocated on the stack
|
||||
// and later copied into the deserialized database.
|
||||
static
|
||||
hs_error_t db_decode_header(const char **bytes, const size_t length,
|
||||
struct hs_database *header) {
|
||||
if (!*bytes) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
if (length < sizeof(struct hs_database)) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
// There's no requirement, really, that the serialized stream of bytes
|
||||
// we've been given is 4-byte aligned, so we use unaligned loads here.
|
||||
|
||||
const u32 *buf = (const u32 *)*bytes;
|
||||
|
||||
// Zero header so that none of it (e.g. its padding) is uninitialized.
|
||||
memset(header, 0, sizeof(struct hs_database));
|
||||
|
||||
header->magic = unaligned_load_u32(buf++);
|
||||
if (header->magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
header->version = unaligned_load_u32(buf++);
|
||||
if (header->version != HS_DB_VERSION) {
|
||||
return HS_DB_VERSION_ERROR;
|
||||
}
|
||||
|
||||
header->length = unaligned_load_u32(buf++);
|
||||
if (length != sizeof(struct hs_database) + header->length) {
|
||||
DEBUG_PRINTF("bad length %zu, expecting %zu\n", length,
|
||||
sizeof(struct hs_database) + header->length);
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
header->platform = unaligned_load_u64a(buf);
|
||||
buf += 2;
|
||||
header->crc32 = unaligned_load_u32(buf++);
|
||||
header->reserved0 = unaligned_load_u32(buf++);
|
||||
header->reserved1 = unaligned_load_u32(buf++);
|
||||
|
||||
*bytes = (const char *)buf;
|
||||
|
||||
return HS_SUCCESS; // Header checks out
|
||||
}
|
||||
|
||||
// Check the CRC on a database
|
||||
static
|
||||
hs_error_t db_check_crc(const hs_database_t *db) {
|
||||
const char *bytecode = hs_get_bytecode(db);
|
||||
u32 crc = Crc32c_ComputeBuf(0, bytecode, db->length);
|
||||
if (crc != db->crc32) {
|
||||
DEBUG_PRINTF("crc mismatch! 0x%x != 0x%x\n", crc, db->crc32);
|
||||
return HS_INVALID;
|
||||
}
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
static
|
||||
void db_copy_bytecode(const char *serialized, hs_database_t *db) {
|
||||
// we need to align things manually
|
||||
uintptr_t shift = (uintptr_t)db->bytes & 0x3f;
|
||||
db->bytecode = offsetof(struct hs_database, bytes) - shift;
|
||||
char *bytecode = (char *)db + db->bytecode;
|
||||
|
||||
// Copy the bytecode into place
|
||||
memcpy(bytecode, serialized, db->length);
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
|
||||
hs_database_t *db) {
|
||||
if (!bytes || !db) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
// We require the user to deserialize into an 8-byte aligned region.
|
||||
if (!ISALIGNED_N(db, 8)) {
|
||||
return HS_BAD_ALIGN;
|
||||
}
|
||||
|
||||
// Decode the header
|
||||
hs_database_t header;
|
||||
hs_error_t ret = db_decode_header(&bytes, length, &header);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Make sure the serialized database is for our platform
|
||||
ret = db_check_platform(header.platform);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Zero new space for safety
|
||||
size_t dblength = sizeof(struct hs_database) + header.length;
|
||||
memset(db, 0, dblength);
|
||||
|
||||
// Copy the decoded header into place
|
||||
memcpy(db, &header, sizeof(header));
|
||||
|
||||
// Copy the bytecode into the correctly-aligned location, set offsets
|
||||
db_copy_bytecode(bytes, db);
|
||||
|
||||
if (db_check_crc(db) != HS_SUCCESS) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
|
||||
hs_database_t **db) {
|
||||
if (!bytes || !db) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
*db = NULL;
|
||||
|
||||
// Decode and check the header
|
||||
hs_database_t header;
|
||||
hs_error_t ret = db_decode_header(&bytes, length, &header);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Make sure the serialized database is for our platform
|
||||
ret = db_check_platform(header.platform);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Allocate space for new database
|
||||
size_t dblength = sizeof(struct hs_database) + header.length;
|
||||
struct hs_database *tempdb = hs_database_alloc(dblength);
|
||||
ret = hs_check_alloc(tempdb);
|
||||
if (ret != HS_SUCCESS) {
|
||||
hs_database_free(tempdb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Zero new space for safety
|
||||
memset(tempdb, 0, dblength);
|
||||
|
||||
// Copy the decoded header into place
|
||||
memcpy(tempdb, &header, sizeof(header));
|
||||
|
||||
// Copy the bytecode into the correctly-aligned location, set offsets
|
||||
db_copy_bytecode(bytes, tempdb);
|
||||
|
||||
if (db_check_crc(tempdb) != HS_SUCCESS) {
|
||||
hs_database_free(tempdb);
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
*db = tempdb;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_database_size(const hs_database_t *db, size_t *size) {
|
||||
if (!size) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
hs_error_t ret = validDatabase(db);
|
||||
if (unlikely(ret != HS_SUCCESS)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
*size = sizeof(struct hs_database) + db->length;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
|
||||
size_t *size) {
|
||||
// Decode and check the header
|
||||
hs_database_t header;
|
||||
hs_error_t ret = db_decode_header(&bytes, length, &header);
|
||||
if (ret != HS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!size) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
*size = sizeof(struct hs_database) + header.length;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
hs_error_t dbIsValid(const hs_database_t *db) {
|
||||
if (db->magic != HS_DB_MAGIC) {
|
||||
DEBUG_PRINTF("bad magic\n");
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
if (db->version != HS_DB_VERSION) {
|
||||
DEBUG_PRINTF("bad version\n");
|
||||
return HS_DB_VERSION_ERROR;
|
||||
}
|
||||
|
||||
if (db_check_platform(db->platform) != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("bad platform\n");
|
||||
return HS_DB_PLATFORM_ERROR;
|
||||
}
|
||||
|
||||
if (!ISALIGNED_16(hs_get_bytecode(db))) {
|
||||
DEBUG_PRINTF("bad alignment\n");
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
hs_error_t rv = db_check_crc(db);
|
||||
if (rv != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("bad crc\n");
|
||||
return rv;
|
||||
}
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
/** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated
|
||||
* \ref hs_database, ensuring that it is padded correctly to give cacheline
|
||||
* alignment. */
|
||||
hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) {
|
||||
size_t db_len = sizeof(struct hs_database) + len;
|
||||
DEBUG_PRINTF("db size %zu\n", db_len);
|
||||
DEBUG_PRINTF("db platform %llx\n", platform);
|
||||
|
||||
struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len);
|
||||
if (hs_check_alloc(db) != HS_SUCCESS) {
|
||||
hs_database_free(db);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// So that none of our database is uninitialized
|
||||
memset(db, 0, db_len);
|
||||
|
||||
// we need to align things manually
|
||||
size_t shift = (uintptr_t)db->bytes & 0x3f;
|
||||
DEBUG_PRINTF("shift is %zu\n", shift);
|
||||
|
||||
db->bytecode = offsetof(struct hs_database, bytes) - shift;
|
||||
char *bytecode = (char *)db + db->bytecode;
|
||||
assert(ISALIGNED_CL(bytecode));
|
||||
|
||||
db->magic = HS_DB_MAGIC;
|
||||
db->version = HS_DB_VERSION;
|
||||
db->length = len;
|
||||
db->platform = platform;
|
||||
|
||||
// Copy bytecode
|
||||
memcpy(bytecode, in_bytecode, len);
|
||||
|
||||
db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length);
|
||||
return db;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define SNPRINTF_COMPAT _snprintf
|
||||
#else
|
||||
#define SNPRINTF_COMPAT snprintf
|
||||
#endif
|
||||
|
||||
/** Allocate a buffer and prints the database info into it. Returns an
|
||||
* appropriate error code on failure, or HS_SUCCESS on success. */
|
||||
static
|
||||
hs_error_t print_database_string(char **s, u32 version, const platform_t plat,
|
||||
u32 raw_mode) {
|
||||
assert(s);
|
||||
*s = NULL;
|
||||
|
||||
u8 release = (version >> 8) & 0xff;
|
||||
u8 minor = (version >> 16) & 0xff;
|
||||
u8 major = (version >> 24) & 0xff;
|
||||
|
||||
const char *avx2 = (plat & HS_PLATFORM_NOAVX2) ? "NOAVX2" : " AVX2";
|
||||
|
||||
const char *mode = NULL;
|
||||
|
||||
if (raw_mode == HS_MODE_STREAM) {
|
||||
mode = "STREAM";
|
||||
} else if (raw_mode == HS_MODE_VECTORED) {
|
||||
mode = "VECTORED";
|
||||
} else {
|
||||
assert(raw_mode == HS_MODE_BLOCK);
|
||||
mode = "BLOCK";
|
||||
}
|
||||
|
||||
// Initial allocation size, which should be large enough to print our info.
|
||||
// If it isn't, snprintf will tell us and we can resize appropriately.
|
||||
size_t len = 256;
|
||||
|
||||
while (1) {
|
||||
char *buf = hs_misc_alloc(len);
|
||||
hs_error_t ret = hs_check_alloc(buf);
|
||||
if (ret != HS_SUCCESS) {
|
||||
hs_misc_free(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Note: SNPRINTF_COMPAT is a macro defined above, to cope with systems
|
||||
// that don't have snprintf but have a workalike.
|
||||
int p_len = SNPRINTF_COMPAT(
|
||||
buf, len, "Version: %u.%u.%u Features: %s Mode: %s",
|
||||
major, minor, release, avx2, mode);
|
||||
if (p_len < 0) {
|
||||
DEBUG_PRINTF("snprintf output error, returned %d\n", p_len);
|
||||
hs_misc_free(buf);
|
||||
break;
|
||||
} else if ((size_t)p_len < len) { // output fit within buffer.
|
||||
assert(buf[p_len] == '\0');
|
||||
*s = buf;
|
||||
return HS_SUCCESS;
|
||||
} else { // output didn't fit: resize and reallocate.
|
||||
len = (size_t)p_len + 1; // must add one for null terminator.
|
||||
hs_misc_free(buf);
|
||||
}
|
||||
}
|
||||
|
||||
return HS_NOMEM;
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
||||
char **info) {
|
||||
if (!info) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
*info = NULL;
|
||||
|
||||
if (!bytes || length < sizeof(struct hs_database)) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
const u32 *buf = (const u32 *)bytes;
|
||||
|
||||
u32 magic = unaligned_load_u32(buf++);
|
||||
if (magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
u32 version = unaligned_load_u32(buf++);
|
||||
|
||||
buf++; /* length */
|
||||
|
||||
platform_t plat;
|
||||
plat = unaligned_load_u64a(buf);
|
||||
buf += 2;
|
||||
|
||||
buf++; /* crc */
|
||||
buf++; /* reserved 0 */
|
||||
buf++; /* reserved 1 */
|
||||
|
||||
const char *t_raw = (const char *)buf;
|
||||
u32 mode = unaligned_load_u32(t_raw + offsetof(struct RoseEngine, mode));
|
||||
|
||||
return print_database_string(info, version, plat, mode);
|
||||
}
|
||||
|
||||
HS_PUBLIC_API
|
||||
hs_error_t hs_database_info(const hs_database_t *db, char **info) {
|
||||
if (!info) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
*info = NULL;
|
||||
|
||||
if (!db || !db_correctly_aligned(db) || db->magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
platform_t plat;
|
||||
plat = db->platform;
|
||||
|
||||
const struct RoseEngine *rose = hs_get_bytecode(db);
|
||||
|
||||
return print_database_string(info, db->version, plat, rose->mode);
|
||||
}
|
||||
119
src/database.h
Normal file
119
src/database.h
Normal file
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Runtime code for hs_database manipulation.
|
||||
*/
|
||||
|
||||
#ifndef DATABASE_H_D467FD6F343DDE
|
||||
#define DATABASE_H_D467FD6F343DDE
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#include "hs_compile.h" // for HS_MODE_ flags
|
||||
#include "hs_version.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#define HS_DB_VERSION HS_VERSION_32BIT
|
||||
#define HS_DB_MAGIC (0xdbdbdbdbU)
|
||||
|
||||
// Values in here cannot (easily) change - add new ones!
|
||||
|
||||
// CPU type is the low 6 bits (we can't need more than 64, surely!)
|
||||
|
||||
#define HS_PLATFORM_INTEL 1
|
||||
#define HS_PLATFORM_CPU_MASK 0x3F
|
||||
|
||||
#define HS_PLATFORM_NOAVX2 (4<<13)
|
||||
|
||||
/** \brief Platform features bitmask. */
|
||||
typedef u64a platform_t;
|
||||
|
||||
static UNUSED
|
||||
const platform_t hs_current_platform = {
|
||||
#if !defined(__AVX2__)
|
||||
HS_PLATFORM_NOAVX2 |
|
||||
#endif
|
||||
0,
|
||||
};
|
||||
|
||||
static UNUSED
|
||||
const platform_t hs_current_platform_no_avx2 = {
|
||||
HS_PLATFORM_NOAVX2 |
|
||||
0,
|
||||
};
|
||||
|
||||
/*
|
||||
* a header to enclose the actual bytecode - useful for keeping info about the
|
||||
* compiled data.
|
||||
*/
|
||||
struct hs_database {
|
||||
u32 magic;
|
||||
u32 version;
|
||||
u32 length;
|
||||
u64a platform;
|
||||
u32 crc32;
|
||||
u32 reserved0;
|
||||
u32 reserved1;
|
||||
u32 bytecode; // offset relative to db start
|
||||
u32 padding[16];
|
||||
char bytes[];
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const void *hs_get_bytecode(const struct hs_database *db) {
|
||||
return ((const char *)db + db->bytecode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cheap database sanity checks used in block mode scan calls and streaming
|
||||
* mode open calls.
|
||||
*/
|
||||
static really_inline
|
||||
hs_error_t validDatabase(const hs_database_t *db) {
|
||||
if (!db || db->magic != HS_DB_MAGIC) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
if (db->version != HS_DB_VERSION) {
|
||||
return HS_DB_VERSION_ERROR;
|
||||
}
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
hs_error_t dbIsValid(const struct hs_database *db);
|
||||
struct hs_database *dbCreate(const char *bytecode, size_t len, u64a platform);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* DATABASE_H_D467FD6F343DDE */
|
||||
39
src/fdr/CMakeLists.txt
Normal file
39
src/fdr/CMakeLists.txt
Normal file
@@ -0,0 +1,39 @@
|
||||
# The set of rules and other nastiness for generating FDR/Teddy source
|
||||
|
||||
# we need to add these as explicit dependencies
|
||||
set(AUTOGEN_PY_FILES
|
||||
arch.py
|
||||
autogen.py
|
||||
autogen_utils.py
|
||||
base_autogen.py
|
||||
fdr_autogen.py
|
||||
teddy_autogen.py
|
||||
)
|
||||
|
||||
function(fdr_autogen type out)
|
||||
add_custom_command (
|
||||
COMMENT "AUTOGEN ${out}"
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${out}
|
||||
COMMAND ${PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/autogen.py ${type} > ${CMAKE_CURRENT_BINARY_DIR}/${out}
|
||||
DEPENDS ${AUTOGEN_PY_FILES}
|
||||
)
|
||||
add_custom_target(autogen_${type} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${out})
|
||||
endfunction(fdr_autogen)
|
||||
|
||||
#now build the functions
|
||||
fdr_autogen(runtime fdr_autogen.c)
|
||||
fdr_autogen(compiler fdr_autogen_compiler.cpp)
|
||||
fdr_autogen(teddy_runtime teddy_autogen.c)
|
||||
fdr_autogen(teddy_compiler teddy_autogen_compiler.cpp)
|
||||
|
||||
set(fdr_GENERATED_SRC
|
||||
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen.c
|
||||
${CMAKE_BINARY_DIR}/src/fdr/fdr_autogen_compiler.cpp
|
||||
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen.c
|
||||
${CMAKE_BINARY_DIR}/src/fdr/teddy_autogen_compiler.cpp
|
||||
PARENT_SCOPE)
|
||||
|
||||
set_source_files_properties(${fdr_GENERATED_SRC} PROPERTIES GENERATED TRUE)
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
|
||||
58
src/fdr/arch.py
Executable file
58
src/fdr/arch.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import autogen_utils
|
||||
|
||||
# wrapper for architectures
|
||||
|
||||
class Arch:
|
||||
def __init__(self, name, extensions = []):
|
||||
self.name = name
|
||||
self.extensions = extensions
|
||||
self.target = None
|
||||
|
||||
def get_guard(self):
|
||||
# these defines definitely fall into the "belt-and-suspenders"
|
||||
# category of paranoia
|
||||
if (self.guard_list == []):
|
||||
return "#if 1"
|
||||
|
||||
return "#if " + " && ".join(self.guard_list)
|
||||
|
||||
class X86Arch(Arch):
|
||||
def __init__(self, name, extensions = []):
|
||||
Arch.__init__(self, name, extensions)
|
||||
self.guard_list = [ ]
|
||||
self.target = "0"
|
||||
|
||||
if "AVX2" in extensions:
|
||||
self.target += " | HS_CPU_FEATURES_AVX2"
|
||||
self.guard_list += [ "defined(__AVX2__)" ]
|
||||
|
||||
|
||||
arch_x86_64 = X86Arch("x86_64", extensions = [ ])
|
||||
arch_x86_64_avx2 = X86Arch("x86_64_avx2", extensions = [ "AVX2" ])
|
||||
159
src/fdr/autogen.py
Executable file
159
src/fdr/autogen.py
Executable file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from fdr_autogen import *
|
||||
from teddy_autogen import *
|
||||
from arch import *
|
||||
|
||||
# FDR setup
|
||||
|
||||
# these are either produced - if the guard succeeds, or #defined to zeroes.
|
||||
# either the function or the zero is fine in our array of function pointers
|
||||
|
||||
def produce_fdr_runtimes(l):
|
||||
for m in l:
|
||||
m.produce_code()
|
||||
|
||||
def produce_fdr_compiles(l):
|
||||
print "void getFdrDescriptions(vector<FDREngineDescription> *out) {"
|
||||
print " static const FDREngineDef defns[] = {"
|
||||
for m in l:
|
||||
m.produce_compile_call()
|
||||
print " };"
|
||||
print " out->clear();"
|
||||
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
|
||||
print " out->push_back(FDREngineDescription(defns[i]));"
|
||||
print " }"
|
||||
print "}"
|
||||
|
||||
def build_fdr_matchers():
|
||||
all_matchers = [ ]
|
||||
domains = [8, 10, 11, 12, 13]
|
||||
big_domains = [ 14, 15 ]
|
||||
|
||||
common = { "state_width" : 128, "num_buckets" : 8, "extract_frequency" : 8, "arch" : arch_x86_64 }
|
||||
for d in domains:
|
||||
all_matchers += [ M3(stride = 1, domain = d, **common) ]
|
||||
all_matchers += [ M3(stride = 2, domain = d, **common) ]
|
||||
all_matchers += [ M3(stride = 4, domain = d, **common) ]
|
||||
for d in big_domains:
|
||||
all_matchers += [ M3(stride = 1, domain = d, **common) ]
|
||||
|
||||
return all_matchers
|
||||
|
||||
# teddy setup
|
||||
|
||||
def build_teddy_matchers():
|
||||
all_matchers = [ ]
|
||||
|
||||
# AVX2
|
||||
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = False) ]
|
||||
all_matchers += [ MTFast(arch = arch_x86_64_avx2, packed = True) ]
|
||||
for n_msk in range(1, 5):
|
||||
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = False, num_masks = n_msk, num_buckets = 16) ]
|
||||
all_matchers += [ MTFat(arch = arch_x86_64_avx2, packed = True, num_masks = n_msk, num_buckets = 16) ]
|
||||
|
||||
# SSE/SSE2/SSSE3
|
||||
for n_msk in range(1, 5):
|
||||
all_matchers += [ MT(arch = arch_x86_64, packed = False, num_masks = n_msk, num_buckets = 8) ]
|
||||
all_matchers += [ MT(arch = arch_x86_64, packed = True, num_masks = n_msk, num_buckets = 8) ]
|
||||
|
||||
return all_matchers
|
||||
|
||||
def produce_teddy_compiles(l):
|
||||
print "void getTeddyDescriptions(vector<TeddyEngineDescription> *out) {"
|
||||
print " static const TeddyEngineDef defns[] = {"
|
||||
for m in l:
|
||||
m.produce_compile_call()
|
||||
print " };"
|
||||
print " out->clear();"
|
||||
print " for (size_t i = 0; i < ARRAY_LENGTH(defns); i++) {"
|
||||
print " out->push_back(TeddyEngineDescription(defns[i]));"
|
||||
print " }"
|
||||
print "}"
|
||||
|
||||
# see below - we don't produce our 'zeros' at the point of the teddy runtimes as they
|
||||
# are linked. So we either generate the function or we don't - then at the point of the
|
||||
# header in fdr_autogen.c we either generate the header or we #define the zero.
|
||||
|
||||
def produce_teddy_runtimes(l):
|
||||
# Since we're using -Wmissing-prototypes, we need headers first.
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
print m.produce_header(visible = True, header_only = True)
|
||||
m.close_guard()
|
||||
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
m.produce_code()
|
||||
m.close_guard()
|
||||
|
||||
# see produce_teddy_runtimes() comment for the rationale
|
||||
|
||||
def produce_teddy_headers(l):
|
||||
for m in l:
|
||||
m.produce_guard()
|
||||
print m.produce_header(visible = True, header_only = True)
|
||||
m.produce_zero_alternative()
|
||||
|
||||
# general utilities
|
||||
|
||||
def make_fdr_function_pointers(matcher_list):
|
||||
print """
|
||||
typedef hwlm_error_t (*FDRFUNCTYPE)(const struct FDR *fdr, const struct FDR_Runtime_Args *a);
|
||||
static FDRFUNCTYPE funcs[] = {
|
||||
"""
|
||||
all_funcs = ",\n".join([ " %s" % m.get_name() for m in matcher_list ])
|
||||
print all_funcs
|
||||
print """
|
||||
};
|
||||
"""
|
||||
|
||||
def assign_ids(matcher_list, next_id):
|
||||
for m in matcher_list:
|
||||
m.id = next_id
|
||||
next_id += 1
|
||||
return next_id
|
||||
|
||||
# Main entry point
|
||||
|
||||
m = build_fdr_matchers()
|
||||
next_id = assign_ids(m, 0)
|
||||
tm = build_teddy_matchers()
|
||||
next_id = assign_ids(tm, next_id)
|
||||
if sys.argv[1] == "compiler":
|
||||
produce_fdr_compiles(m)
|
||||
elif sys.argv[1] == "runtime":
|
||||
produce_fdr_runtimes(m)
|
||||
produce_teddy_headers(tm)
|
||||
make_fdr_function_pointers(m+tm)
|
||||
elif sys.argv[1] == "teddy_runtime":
|
||||
produce_teddy_runtimes(tm)
|
||||
elif sys.argv[1] == "teddy_compiler":
|
||||
produce_teddy_compiles(tm)
|
||||
285
src/fdr/autogen_utils.py
Executable file
285
src/fdr/autogen_utils.py
Executable file
@@ -0,0 +1,285 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
|
||||
def fail_out(msg = ""):
|
||||
print >>sys.stderr, "Internal failure in autogen.py: " + msg
|
||||
sys.exit(1)
|
||||
|
||||
class IntegerType:
|
||||
def __init__(self, size):
|
||||
self.size = size
|
||||
|
||||
def get_name(self):
|
||||
return { 256: "m256", 128 : "m128", 64 : "u64a", 32 : "u32" , 16 : "u16", 8 : "u8"}[self.size]
|
||||
|
||||
def size_in_bytes(self):
|
||||
return self.size / 8
|
||||
|
||||
def isSIMDOnIntel(self):
|
||||
return False
|
||||
|
||||
def zero_expression(self):
|
||||
return "0"
|
||||
|
||||
def constant_to_string(self, n):
|
||||
if self.size == 64:
|
||||
suffix = "ULL"
|
||||
else:
|
||||
suffix = ""
|
||||
return "0x%x%s" % (n & ((1 << self.size) - 1), suffix)
|
||||
|
||||
def lowbits(self, n):
|
||||
return (1 << n) - 1
|
||||
|
||||
def highbits(self, n):
|
||||
return ~(self.lowbits(self.size - n))
|
||||
|
||||
def lowbit_mask(self, n):
|
||||
return self.constant_to_string(self.lowbits(n))
|
||||
|
||||
def highbit_mask(self, n):
|
||||
return self.constant_to_string(self.highbits(n))
|
||||
|
||||
def lowbit_extract_expr(self, expr_string, n):
|
||||
return "(%s & %s)" % ( expr_string, self.lowbit_mask(n))
|
||||
|
||||
def highbit_extract_expr(self, expr_string, n):
|
||||
return "(%s >> %d)" % (expr_string, self.size - n)
|
||||
|
||||
def flip_lowbits_expr(self, expr_string, n):
|
||||
return "(%s ^ %s)" % ( expr_string, self.lowbit_mask(n))
|
||||
|
||||
def bit_extract_expr(self, expr_string, low, high):
|
||||
lbm = self.lowbit_mask(high - low)
|
||||
return "((%s >> %d) & %s)" % (expr_string, low, lbm)
|
||||
|
||||
# shifts are +ve if left and -ve if right
|
||||
def shift_expr(self, expr_string, n):
|
||||
if n <= -self.size or n >= self.size:
|
||||
return self.zero_expression()
|
||||
elif (n > 0):
|
||||
return "(%s << %d)" % (expr_string, n)
|
||||
elif (n < 0):
|
||||
return "(%s >> %d)" % (expr_string, -n)
|
||||
else:
|
||||
return "(%s)" % (expr_string)
|
||||
|
||||
# code is:
|
||||
# "normal" (always between buf and len) - the default
|
||||
# "aligned" (means normal + aligned to a natural boundary)
|
||||
# "cautious_forward" (means may go off the end of buf+len)
|
||||
# "cautious_backwards" (means may go off the start of buf)
|
||||
# "cautious_everywhere" (means may go off both)
|
||||
|
||||
def load_expr_data(self, offset = 0, code = "normal",
|
||||
base_string = "ptr", bounds_lo = "buf", bounds_hi = "buf + len"):
|
||||
if code is "normal":
|
||||
return "lv_%s(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "aligned":
|
||||
if self.size is 8:
|
||||
fail_out("no aligned byte loads")
|
||||
return "lv_%s_a(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_forward":
|
||||
return "lv_%s_cf(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_backward":
|
||||
return "lv_%s_cb(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
elif code is "cautious_everywhere":
|
||||
return "lv_%s_ce(%s + %d, %s, %s)" % (self.get_name(), base_string, offset, bounds_lo, bounds_hi)
|
||||
|
||||
|
||||
class SIMDIntegerType(IntegerType):
|
||||
def __init__(self, size):
|
||||
IntegerType.__init__(self, size)
|
||||
|
||||
def isSIMDOnIntel(self):
|
||||
return True
|
||||
|
||||
def zero_expression(self):
|
||||
return "zeroes128()"
|
||||
|
||||
def lowbit_extract_expr(self, expr_string, n):
|
||||
if (n <= 32):
|
||||
tmpType = IntegerType(32)
|
||||
tmpExpr = "movd(%s)" % expr_string
|
||||
elif (32 < n <= 64):
|
||||
tmpType = IntegerType(64)
|
||||
tmpExpr = "movq(%s)" % expr_string
|
||||
return tmpType.lowbit_extract_expr(tmpExpr, n)
|
||||
|
||||
def highbit_extract_expr(self, expr_string, n):
|
||||
fail_out("Unimplemented high bit extract on m128")
|
||||
|
||||
def bit_extract_expr(self, expr_string, low, high, flip):
|
||||
fail_out("Unimplemented bit extract on m128")
|
||||
|
||||
def shift_expr(self, expr_string, n):
|
||||
if n % 8 != 0:
|
||||
fail_out("Trying to shift a m128 by a bit granular value")
|
||||
|
||||
# should check that n is divisible by 8
|
||||
if n <= -self.size or n >= self.size:
|
||||
return self.zero_expression()
|
||||
elif (n > 0):
|
||||
return "_mm_slli_si128(%s, %s)" % (expr_string, n / 8)
|
||||
elif (n < 0):
|
||||
return "_mm_srli_si128(%s, %s)" % (expr_string, -n / 8)
|
||||
else:
|
||||
return "(%s)" % (expr_string)
|
||||
|
||||
def lowbit_mask(self, n):
|
||||
if n % 8 != 0:
|
||||
fail_out("Trying to make a lowbit mask in a m128 by a bit granular value")
|
||||
return self.shift_expr("ones128()", -(128 - n))
|
||||
|
||||
def getRequiredType(bits):
|
||||
if bits == 128:
|
||||
return SIMDIntegerType(bits)
|
||||
for b in [ 8, 16, 32, 64]:
|
||||
if (bits <= b):
|
||||
return IntegerType(b)
|
||||
return None
|
||||
|
||||
class IntegerVariable:
|
||||
def __init__(self, name, type):
|
||||
self.name = name
|
||||
self.type = type
|
||||
|
||||
def gen_initializer_stmt(self, initialization_string = None):
|
||||
if initialization_string:
|
||||
return "%s %s = %s;" % (self.type.get_name(), self.name, initialization_string)
|
||||
else:
|
||||
return "%s %s;" % (self.type.get_name(), self.name)
|
||||
|
||||
|
||||
class Step:
|
||||
def __init__(self, context, offset = 0):
|
||||
self.context = context
|
||||
self.matcher = context.matcher
|
||||
self.offset = offset
|
||||
self.latency = 1
|
||||
self.dependency_list = []
|
||||
self.latest = None
|
||||
self.context.add_step(self)
|
||||
|
||||
# return a string, complete with indentation
|
||||
def emit(self):
|
||||
indent = " " * (self.offset*2 + self.matcher.default_body_indent)
|
||||
s = "\n".join( [ indent + line for line in self.val.split("\n")] )
|
||||
if self.latest:
|
||||
s += " // " + str(self.debug_step) + " L" + str(self.latency) + " LTST:%d" % self.latest
|
||||
if self.dependency_list:
|
||||
s += " Derps: "
|
||||
for (d,l) in self.dependency_list:
|
||||
s += "%d/%d " % (d.debug_step,l)
|
||||
return s
|
||||
|
||||
def add_dependency(self, step, anti_dependency = False, output_dependency = False):
|
||||
if anti_dependency or output_dependency:
|
||||
self.dependency_list += [ (step, 1) ]
|
||||
else:
|
||||
self.dependency_list += [ (step, step.latency) ]
|
||||
|
||||
def nv(self, type, var_name):
|
||||
return self.context.new_var(self, type, var_name)
|
||||
|
||||
def gv(self, var_name, reader = True, writer = False):
|
||||
return self.context.get_var(self, var_name, reader = reader, writer = writer)
|
||||
|
||||
# utility steps, generic
|
||||
|
||||
class LabelStep(Step):
|
||||
def __init__(self, context, offset = 0, label_prefix = "off"):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "%s%d: UNUSED;" % (label_prefix, offset)
|
||||
|
||||
class OpenScopeStep(Step):
|
||||
def __init__(self, context, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "{"
|
||||
|
||||
class CloseScopeStep(Step):
|
||||
def __init__(self, context, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
self.val = "}"
|
||||
|
||||
|
||||
class CodeGenContext:
|
||||
def __init__(self, matcher):
|
||||
self.vars = {}
|
||||
self.steps = []
|
||||
self.ctr = 0
|
||||
self.matcher = matcher
|
||||
self.var_writer = {} # var to a single writer
|
||||
self.var_readers = {} # var to a list of all the readers that read the last value
|
||||
|
||||
def new_var(self, step, type, var_name):
|
||||
var = IntegerVariable(var_name, type)
|
||||
self.vars[var_name] = var
|
||||
self.var_writer[var_name] = step
|
||||
return var
|
||||
|
||||
def get_var(self, step, var_name, reader = True, writer = False):
|
||||
if reader:
|
||||
writer_step = self.var_writer[var_name]
|
||||
if writer_step:
|
||||
step.add_dependency(writer_step)
|
||||
self.var_readers.setdefault(var_name, []).append(step)
|
||||
if writer and not reader:
|
||||
if self.var_writer[var_name]:
|
||||
step.add_dependency(self.var_writer[var_name], output_dependency = True)
|
||||
if writer:
|
||||
if self.var_readers.has_key(var_name):
|
||||
for reader in [ r for r in self.var_readers[var_name] if r is not step ]:
|
||||
step.add_dependency(reader, anti_dependency = True)
|
||||
self.var_readers[var_name] = []
|
||||
self.var_writer[var_name] = step
|
||||
return self.vars[var_name]
|
||||
|
||||
def add_step(self, step):
|
||||
self.steps += [ step ]
|
||||
step.debug_step = self.ctr
|
||||
self.ctr += 1
|
||||
|
||||
def dontschedule(self, finals):
|
||||
return "\n".join( [ s.emit() for s in self.steps ] )
|
||||
|
||||
def schedule(self, finals):
|
||||
for f in finals:
|
||||
f.latest = f.latency
|
||||
worklist = finals
|
||||
while worklist:
|
||||
current = worklist[0]
|
||||
worklist = worklist[1:]
|
||||
for (dep, lat) in current.dependency_list:
|
||||
if dep.latest is None or dep.latest < (current.latest + dep.latency):
|
||||
dep.latest = current.latest + lat
|
||||
if dep not in worklist:
|
||||
worklist += [ dep ]
|
||||
self.steps.sort(reverse = True, key = lambda s : s.latest)
|
||||
return "\n".join( [ s.emit() for s in self.steps ] )
|
||||
167
src/fdr/base_autogen.py
Normal file
167
src/fdr/base_autogen.py
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class MatcherBase:
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def get_name(self):
|
||||
return "fdr_exec_%03d" % self.id
|
||||
|
||||
def produce_header(self, visible, header_only = False):
|
||||
s = ""
|
||||
if not visible:
|
||||
s += "static never_inline"
|
||||
s += """
|
||||
hwlm_error_t %s(UNUSED const struct FDR *fdr,
|
||||
UNUSED const struct FDR_Runtime_Args * a)""" % self.get_name()
|
||||
if header_only:
|
||||
s += ";"
|
||||
else:
|
||||
s += "{"
|
||||
s += "\n"
|
||||
return s
|
||||
|
||||
def produce_guard(self):
|
||||
print self.arch.get_guard()
|
||||
|
||||
def produce_zero_alternative(self):
|
||||
print """
|
||||
#else
|
||||
#define %s 0
|
||||
#endif
|
||||
""" % self.get_name()
|
||||
|
||||
# trivial function for documentation/modularity
|
||||
def close_guard(self):
|
||||
print "#endif"
|
||||
|
||||
def produce_common_declarations(self):
|
||||
return """
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
const u8 * ptr = buf + a->start_offset;
|
||||
hwlmcb_rv_t controlVal = *a->groups;
|
||||
hwlmcb_rv_t * control = &controlVal;
|
||||
u32 floodBackoff = FLOOD_BACKOFF_START;
|
||||
const u8 * tryFloodDetect = a->firstFloodDetect;
|
||||
UNUSED u32 bit, bitRem, confSplit, idx;
|
||||
u32 byte, cf;
|
||||
const struct FDRConfirm *fdrc;
|
||||
u32 last_match = (u32)-1;
|
||||
"""
|
||||
|
||||
def produce_continue_check(self):
|
||||
return """if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
"""
|
||||
def produce_flood_check(self):
|
||||
return """
|
||||
if (P0(ptr > tryFloodDetect)) {
|
||||
tryFloodDetect = floodDetect(fdr, a, &ptr, tryFloodDetect, &floodBackoff, &controlVal, iterBytes);
|
||||
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def produce_footer(self):
|
||||
return """
|
||||
*a->groups = controlVal;
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
"""
|
||||
|
||||
def produce_confirm_base(self, conf_var_name, conf_var_size, offset, cautious, enable_confirmless, do_bailout = False):
|
||||
if cautious:
|
||||
caution_string = "VECTORING"
|
||||
else:
|
||||
caution_string = "NOT_CAUTIOUS"
|
||||
conf_split_mask = IntegerType(32).constant_to_string(
|
||||
self.conf_top_level_split - 1)
|
||||
if enable_confirmless:
|
||||
quick_check_string = """
|
||||
if (!fdrc->mult) {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
if ((last_match == id) && (fdrc->flags & NoRepeat))
|
||||
continue;
|
||||
last_match = id;
|
||||
controlVal = a->cb(ptr+byte-buf, ptr+byte-buf, id, a->ctxt);
|
||||
continue;
|
||||
} """
|
||||
else:
|
||||
quick_check_string = ""
|
||||
if do_bailout:
|
||||
bailout_string = """
|
||||
if ((ptr + byte < buf + a->start_offset) || (ptr + byte >= buf + len)) continue;"""
|
||||
else:
|
||||
bailout_string = ""
|
||||
|
||||
return Template("""
|
||||
if (P0(!!$CONFVAR)) {
|
||||
do {
|
||||
bit = findAndClearLSB_$CONFVAR_SIZE(&$CONFVAR);
|
||||
byte = bit / $NUM_BUCKETS + $OFFSET;
|
||||
bitRem = bit % $NUM_BUCKETS;
|
||||
$BAILOUT_STRING
|
||||
confSplit = *(ptr+byte) & $SPLIT_MASK;
|
||||
idx = confSplit * $NUM_BUCKETS + bitRem;
|
||||
cf = confBase[idx];
|
||||
if (!cf)
|
||||
continue;
|
||||
fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);
|
||||
if (!(fdrc->groups & *control))
|
||||
continue;
|
||||
$QUICK_CHECK_STRING
|
||||
confWithBit(fdrc, a, ptr - buf + byte, $CAUTION_STRING, $CONF_PULL_BACK, control, &last_match);
|
||||
} while(P0(!!$CONFVAR));
|
||||
if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {
|
||||
*a->groups = controlVal;
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}""").substitute(CONFVAR = conf_var_name,
|
||||
CONFVAR_SIZE = conf_var_size,
|
||||
NUM_BUCKETS = self.num_buckets,
|
||||
OFFSET = offset,
|
||||
SPLIT_MASK = conf_split_mask,
|
||||
QUICK_CHECK_STRING = quick_check_string,
|
||||
BAILOUT_STRING = bailout_string,
|
||||
CAUTION_STRING = caution_string,
|
||||
CONF_PULL_BACK = self.conf_pull_back)
|
||||
|
||||
|
||||
def indent(block, depth):
|
||||
return "\n".join([ (" " * (4*depth)) + line for line in block.splitlines() ] )
|
||||
49
src/fdr/engine_description.cpp
Normal file
49
src/fdr/engine_description.cpp
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "hs_compile.h" // for hs_platform_info
|
||||
#include "util/target_info.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
EngineDescription::~EngineDescription() {}
|
||||
|
||||
bool EngineDescription::isValidOnTarget(const target_t &target_in) const {
|
||||
return target_in.can_run_on_code_built_for(code_target);
|
||||
}
|
||||
|
||||
target_t targetByArchFeatures(u64a cpu_features) {
|
||||
hs_platform_info p;
|
||||
p.tune = HS_TUNE_FAMILY_GENERIC;
|
||||
p.cpu_features = cpu_features;
|
||||
|
||||
return target_t(p);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
70
src/fdr/engine_description.h
Normal file
70
src/fdr/engine_description.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINE_DESCRIPTION_H
|
||||
#define ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/target_info.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class EngineDescription {
|
||||
u32 id;
|
||||
target_t code_target; // the target that we built this code for
|
||||
u32 numBuckets;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
|
||||
public:
|
||||
EngineDescription(u32 id_in, const target_t &code_target_in,
|
||||
u32 numBuckets_in, u32 confirmPullBackDistance_in,
|
||||
u32 confirmTopLevelSplit_in)
|
||||
: id(id_in), code_target(code_target_in), numBuckets(numBuckets_in),
|
||||
confirmPullBackDistance(confirmPullBackDistance_in),
|
||||
confirmTopLevelSplit(confirmTopLevelSplit_in) {}
|
||||
|
||||
virtual ~EngineDescription();
|
||||
|
||||
u32 getID() const { return id; }
|
||||
u32 getNumBuckets() const { return numBuckets; }
|
||||
u32 getConfirmPullBackDistance() const { return confirmPullBackDistance; }
|
||||
u32 getConfirmTopLevelSplit() const { return confirmTopLevelSplit; }
|
||||
|
||||
bool isValidOnTarget(const target_t &target_in) const;
|
||||
virtual u32 getDefaultFloodSuffixLength() const = 0;
|
||||
|
||||
virtual bool typicallyHoldsOneCharLits() const { return true; }
|
||||
};
|
||||
|
||||
/** Returns a target given a CPU feature set value. */
|
||||
target_t targetByArchFeatures(u64a cpu_features);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
126
src/fdr/fdr.c
Normal file
126
src/fdr/fdr.c
Normal file
@@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#define P0(cnd) unlikely(cnd)
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "teddy_internal.h"
|
||||
|
||||
#include "flood_runtime.h"
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
#include "fdr_streaming_runtime.h"
|
||||
#include "fdr_loadval.h"
|
||||
|
||||
static really_inline UNUSED
|
||||
u32 getPreStartVal(const struct FDR_Runtime_Args *a, u32 numBits) {
|
||||
u32 r = 0;
|
||||
if (a->start_offset == 0) {
|
||||
if (numBits <= 8) {
|
||||
r = a->buf_history[a->len_history - 1];
|
||||
} else {
|
||||
r = a->buf_history[a->len_history - 1];
|
||||
r |= (a->buf[0] << 8);
|
||||
}
|
||||
} else {
|
||||
if (numBits <= 8) {
|
||||
r = a->buf[a->start_offset - 1];
|
||||
} else {
|
||||
r = lv_u16(a->buf + a->start_offset - 1, a->buf, a->buf + a->len);
|
||||
}
|
||||
}
|
||||
return r & ((1 << numBits) - 1);
|
||||
}
|
||||
|
||||
#include "fdr_autogen.c"
|
||||
|
||||
#define FAKE_HISTORY_SIZE 16
|
||||
static const u8 fake_history[FAKE_HISTORY_SIZE];
|
||||
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len, size_t start,
|
||||
HWLMCallback cb, void *ctxt, hwlm_group_t groups) {
|
||||
|
||||
const struct FDR_Runtime_Args a = {
|
||||
buf,
|
||||
len,
|
||||
fake_history,
|
||||
0,
|
||||
fake_history, // nocase
|
||||
0,
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
&groups,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
0
|
||||
};
|
||||
if (unlikely(a.start_offset >= a.len)) {
|
||||
return HWLM_SUCCESS;
|
||||
} else {
|
||||
assert(funcs[fdr->engineID]);
|
||||
return funcs[fdr->engineID](fdr, &a);
|
||||
}
|
||||
}
|
||||
|
||||
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups, u8 * stream_state) {
|
||||
struct FDR_Runtime_Args a = {
|
||||
buf,
|
||||
len,
|
||||
hbuf,
|
||||
hlen,
|
||||
hbuf, // nocase - start same as caseful, override later if needed
|
||||
hlen, // nocase
|
||||
start,
|
||||
cb,
|
||||
ctxt,
|
||||
&groups,
|
||||
nextFloodDetect(buf, len, FLOOD_BACKOFF_START),
|
||||
hbuf ? CONF_LOADVAL_CALL_CAUTIOUS(hbuf + hlen - 8, hbuf, hbuf + hlen)
|
||||
: (u64a)0
|
||||
|
||||
};
|
||||
fdrUnpackState(fdr, &a, stream_state);
|
||||
|
||||
hwlm_error_t ret;
|
||||
if (unlikely(a.start_offset >= a.len)) {
|
||||
ret = HWLM_SUCCESS;
|
||||
} else {
|
||||
assert(funcs[fdr->engineID]);
|
||||
ret = funcs[fdr->engineID](fdr, &a);
|
||||
}
|
||||
|
||||
fdrPackState(fdr, &a, stream_state);
|
||||
return ret;
|
||||
}
|
||||
91
src/fdr/fdr.h
Normal file
91
src/fdr/fdr.h
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: runtime API.
|
||||
*/
|
||||
|
||||
#ifndef FDR_H
|
||||
#define FDR_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
|
||||
// C linkage in the API
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct FDR;
|
||||
|
||||
/** \brief Returns size in bytes of the given FDR engine. */
|
||||
size_t fdrSize(const struct FDR *fdr);
|
||||
|
||||
/** \brief Returns non-zero if the contents of the stream state indicate that
|
||||
* there is active FDR history beyond the regularly used history. */
|
||||
u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state);
|
||||
|
||||
/**
|
||||
* \brief Block-mode scan.
|
||||
*
|
||||
* \param fdr FDR matcher engine.
|
||||
* \param buf Buffer to scan.
|
||||
* \param len Length of buffer to scan.
|
||||
* \param start First offset in buf at which a match may end.
|
||||
* \param cb Callback to call when a match is found.
|
||||
* \param ctxt Caller-provided context pointer supplied to callback on match.
|
||||
* \param groups Initial groups mask.
|
||||
*/
|
||||
hwlm_error_t fdrExec(const struct FDR *fdr, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups);
|
||||
|
||||
/**
|
||||
* \brief Streaming-mode scan.
|
||||
*
|
||||
* \param fdr FDR matcher engine.
|
||||
* \param hbuf History buffer.
|
||||
* \param hlen Length of history buffer (hbuf).
|
||||
* \param buf Buffer to scan.
|
||||
* \param len Length of buffer to scan (buf).
|
||||
* \param start First offset in buf at which a match may end.
|
||||
* \param cb Callback to call when a match is found.
|
||||
* \param ctxt Caller-provided context pointer supplied to callback on match.
|
||||
* \param groups Initial groups mask.
|
||||
* \param stream_state Persistent stream state for use by FDR.
|
||||
*/
|
||||
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups, u8 *stream_state);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // FDR_H
|
||||
574
src/fdr/fdr_autogen.py
Executable file
574
src/fdr/fdr_autogen.py
Executable file
@@ -0,0 +1,574 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class OrStep(Step):
|
||||
def __init__(self, context, offset, width):
|
||||
Step.__init__(self, context, offset)
|
||||
s_var = self.gv("st%d" % offset)
|
||||
if width < 128:
|
||||
self.val = "s |= %s;" % s_var.name
|
||||
else:
|
||||
self.val = "s = or%d(s, %s);" % (width, s_var.name)
|
||||
|
||||
class ShiftStateStep(Step):
|
||||
def __init__(self, context, offset = 0, stride_used = 1):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
state = m.state_variable
|
||||
shift_distance = -1 * stride_used * m.num_buckets
|
||||
self.val = "%s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
|
||||
class BulkLoadStep(Step):
|
||||
def __init__(self, context, offset, size, define_var = True, aligned = True):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 4
|
||||
blt = m.bulk_load_type
|
||||
if aligned:
|
||||
init_string = blt.load_expr_data(self.offset, code = "aligned")
|
||||
else:
|
||||
init_string = blt.load_expr_data(self.offset)
|
||||
|
||||
var_name = "current_data_%d" % offset
|
||||
if define_var:
|
||||
lb_var = self.nv(blt, var_name)
|
||||
self.val = lb_var.gen_initializer_stmt(init_string)
|
||||
else:
|
||||
lb_var = self.gv(var_name, reader = False, writer = True)
|
||||
self.val = "%s = %s;" % (var_name, init_string)
|
||||
|
||||
class ValueExtractStep(Step):
|
||||
def __init__(self, context, offset, sub_load_cautious = False):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 2
|
||||
dsb = m.datasize_bytes
|
||||
modval = offset % dsb
|
||||
|
||||
if m.domain > 8 and modval == dsb - 1:
|
||||
# Case 1: reading more than one byte over the end of the bulk load
|
||||
|
||||
self.latency = 4
|
||||
if sub_load_cautious:
|
||||
code_string = "cautious_forward"
|
||||
else:
|
||||
code_string = "normal"
|
||||
load_string = m.single_load_type.load_expr_data(self.offset, code_string)
|
||||
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
||||
else:
|
||||
# Case 2: reading a value that can be found entirely in the current register
|
||||
if m.fdr2_force_naive_load:
|
||||
load_string = m.single_load_type.load_expr_data(self.offset, "normal")
|
||||
temp_string = "(%s << %d)" % (load_string, m.reach_shift_adjust)
|
||||
else:
|
||||
lb_var = self.gv("current_data_%d" % (offset - modval))
|
||||
if modval == 0:
|
||||
# Case 2a: value is at LSB end of the register and must be left-
|
||||
# shifted into place if there is a "reach_shift_adjust" required
|
||||
temp_string = "(%s << %d)" % (lb_var.name, m.reach_shift_adjust)
|
||||
else:
|
||||
# Case 2b: value is in the middle of the register and will be
|
||||
# right-shifted into place (adjusted by "reach_shift_adjust")
|
||||
temp_string = "(%s >> %d)" % (lb_var.name, modval*8 - m.reach_shift_adjust)
|
||||
|
||||
|
||||
init_string = "(%s) & 0x%x" % (temp_string, m.reach_mask)
|
||||
v_var = self.nv(m.value_extract_type, "v%d" % offset)
|
||||
self.val = v_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class TableLookupStep(Step):
|
||||
def __init__(self, context, reach_multiplier, offset = 0):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
self.latency = 4
|
||||
v_var = self.gv("v%d" % offset)
|
||||
s_var = self.nv(m.state_type, "st%d" % offset)
|
||||
init_string = "*(const %s *)(ft + %s*%dU)" % ( m.state_type.get_name(),
|
||||
v_var.name, reach_multiplier)
|
||||
self.val = s_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class ShiftReachMaskStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
extr = m.extract_frequency
|
||||
modval = offset % extr
|
||||
s_var = self.gv("st%d" % offset, writer = True)
|
||||
self.val = "%s = %s;" % (s_var.name, s_var.type.shift_expr(s_var.name, modval * m.num_buckets))
|
||||
|
||||
class ConfExtractStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
if m.state_type.isSIMDOnIntel():
|
||||
self.latency = 2
|
||||
init_string = m.state_type.lowbit_extract_expr("s", m.extract_size)
|
||||
extr_var = self.nv(m.extr_type, "extr%d" % offset)
|
||||
self.val = extr_var.gen_initializer_stmt(init_string)
|
||||
|
||||
class ConfAccumulateStep(Step):
|
||||
def __init__(self, context, extract_offset, conf_offset, define_var = True):
|
||||
Step.__init__(self, context, extract_offset)
|
||||
m = self.matcher
|
||||
extr_var = self.gv("extr%d" % extract_offset)
|
||||
extr_var_cast = "((%s)%s)" % (m.conf_type.get_name(), extr_var.name)
|
||||
if extract_offset == conf_offset:
|
||||
# create conf_var as a straight copy of extr
|
||||
if define_var:
|
||||
conf_var = self.nv(m.conf_type, "conf%d" % conf_offset)
|
||||
self.val = conf_var.gen_initializer_stmt(extr_var_cast)
|
||||
else:
|
||||
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
||||
self.val = "%s = %s;" % (conf_var.name, extr_var_cast)
|
||||
else:
|
||||
# shift extr_var and insert/OR it in conf_var
|
||||
conf_var = self.gv("conf%d" % conf_offset, writer = True, reader = True)
|
||||
shift_dist = (extract_offset - conf_offset) * m.num_buckets
|
||||
self.val = "%s |= %s;" % (conf_var.name, m.conf_type.shift_expr(extr_var_cast, shift_dist))
|
||||
self.latency = 2
|
||||
|
||||
class ConfirmFlipStep(Step):
|
||||
def __init__(self, context, offset):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
conf_var = self.gv("conf%d" % self.offset, writer = True)
|
||||
self.val = "%s = %s;" % (conf_var.name,
|
||||
conf_var.type.flip_lowbits_expr(conf_var.name, self.matcher.confirm_frequency * m.num_buckets))
|
||||
|
||||
class ConfirmStep(Step):
|
||||
def __init__(self, context, offset, cautious = False):
|
||||
Step.__init__(self, context, offset)
|
||||
m = self.matcher
|
||||
conf_var = self.gv("conf%d" % offset, writer = True)
|
||||
self.val = m.produce_confirm_base(conf_var.name, conf_var.type.size, offset, cautious,
|
||||
enable_confirmless = m.stride == 1, do_bailout = False)
|
||||
|
||||
class M3(MatcherBase):
|
||||
def get_hash_safety_parameters(self):
|
||||
h_size = self.single_load_type.size_in_bytes()
|
||||
return (0, h_size - 1)
|
||||
|
||||
def produce_compile_call(self):
|
||||
print " { %d, %d, %d, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.state_width, self.num_buckets,
|
||||
self.stride, self.domain,
|
||||
self.arch.target, self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def produce_main_loop(self, switch_variant = False):
|
||||
stride_offsets = xrange(0, self.loop_bytes, self.stride)
|
||||
stride_offsetSet = set(stride_offsets)
|
||||
so_steps_last_block = []
|
||||
sh = None
|
||||
last_confirm = None
|
||||
ctxt = CodeGenContext(self)
|
||||
|
||||
if switch_variant:
|
||||
print " ptr -= (iterBytes - dist);"
|
||||
print " { " # need an extra scope around switch variant to stop its globals escaping
|
||||
else:
|
||||
print " if (doMainLoop) {"
|
||||
print " for (; ptr + LOOP_READ_AHEAD < buf + len; ptr += iterBytes) {"
|
||||
print self.produce_flood_check()
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print " assert(((size_t)ptr % START_MOD) == 0);"
|
||||
|
||||
|
||||
# just do globally for now
|
||||
if switch_variant:
|
||||
subsidiary_load_cautious = True
|
||||
confirm_cautious = True
|
||||
else:
|
||||
subsidiary_load_cautious = False
|
||||
confirm_cautious = False
|
||||
|
||||
if not self.fdr2_force_naive_load:
|
||||
bulk_load_steps = [ off for off in range(self.loop_bytes)
|
||||
if off % self.datasize_bytes == 0 and
|
||||
(set(range(off, off + self.datasize_bytes - 1)) & stride_offsetSet)]
|
||||
else:
|
||||
bulk_load_steps = []
|
||||
|
||||
confirm_steps = [ off for off in range(self.loop_bytes) if off % self.confirm_frequency == 0 ]
|
||||
|
||||
for off in bulk_load_steps:
|
||||
lb_var = ctxt.new_var(None, self.bulk_load_type, "current_data_%d" % off)
|
||||
print " " + lb_var.gen_initializer_stmt()
|
||||
|
||||
|
||||
for off in confirm_steps:
|
||||
var_name = "conf%d" % off
|
||||
conf_def_var = ctxt.new_var(None, self.conf_type, var_name)
|
||||
if switch_variant:
|
||||
init_string = "(%s)-1" % self.conf_type.get_name()
|
||||
else:
|
||||
init_string = ""
|
||||
print " " + conf_def_var.gen_initializer_stmt(init_string)
|
||||
|
||||
if switch_variant:
|
||||
print " switch(iterBytes - dist) {"
|
||||
for i in range(0, self.loop_bytes):
|
||||
print " case %d:" % i
|
||||
|
||||
# init and poison conf; over-precise but harmless
|
||||
conf_id = (i / self.confirm_frequency) * self.confirm_frequency
|
||||
if i % self.confirm_frequency:
|
||||
conf_fixup_bits = self.conf_type.size - (self.num_buckets * (i % self.confirm_frequency))
|
||||
print " conf%d >>= %d;" % (conf_id, conf_fixup_bits)
|
||||
else:
|
||||
print " conf%d = 0;" % conf_id
|
||||
|
||||
# init state
|
||||
state_fixup = i % self.extract_frequency
|
||||
state = self.state_variable
|
||||
shift_distance = self.num_buckets * state_fixup
|
||||
if state_fixup:
|
||||
print " %s = %s;" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
if self.state_width < 128:
|
||||
print " %s |= %s;" % (state.name, state.type.lowbit_mask(shift_distance))
|
||||
else:
|
||||
print " %s = or%d(%s, %s);" % (state.name, self.state_width, state.name, state.type.lowbit_mask(shift_distance))
|
||||
|
||||
if not self.fdr2_force_naive_load:
|
||||
# init current_data (could poison it in some cases)
|
||||
load_mod = i % self.datasize_bytes
|
||||
load_offset = i - load_mod
|
||||
if load_mod:
|
||||
# not coming in on an even boundary means having to do a load var
|
||||
# actually, there are a bunch of things we can do on this bulk load
|
||||
# to avoid having to be 'cautious_backwards' but I'm not completely
|
||||
# sure they are good ideas
|
||||
init_string = self.bulk_load_type.load_expr_data(load_offset,
|
||||
code = "cautious_backward")
|
||||
var_name = "current_data_%d" % load_offset
|
||||
lb_var = ctxt.get_var(None, var_name, reader = False, writer = True)
|
||||
print " %s = %s;" % (lb_var.name, init_string)
|
||||
|
||||
print " goto off%d;" % i
|
||||
print " case %d: goto skipSwitch;" % self.loop_bytes
|
||||
print " }"
|
||||
print " {"
|
||||
|
||||
|
||||
for off in range(self.loop_bytes):
|
||||
# X_mod is the offset we're up to relative to the last X operation
|
||||
# X_offset is which of the last X operations matches this iteration
|
||||
|
||||
if (switch_variant):
|
||||
LabelStep(ctxt, off)
|
||||
|
||||
if off in bulk_load_steps:
|
||||
if not self.fdr2_force_naive_load:
|
||||
BulkLoadStep(ctxt, off, self.datasize, define_var = False, aligned = not switch_variant)
|
||||
|
||||
if off in stride_offsets:
|
||||
if switch_variant:
|
||||
OpenScopeStep(ctxt, off)
|
||||
ValueExtractStep(ctxt, off, sub_load_cautious = subsidiary_load_cautious)
|
||||
TableLookupStep(ctxt, self.reach_mult, off)
|
||||
if off % self.extract_frequency:
|
||||
ShiftReachMaskStep(ctxt, off)
|
||||
so = OrStep(ctxt, off, self.state_width)
|
||||
if switch_variant:
|
||||
CloseScopeStep(ctxt, off)
|
||||
if sh != None:
|
||||
so.add_dependency(sh)
|
||||
so_steps_last_block += [ so ]
|
||||
|
||||
extract_mod = off % self.extract_frequency
|
||||
extract_offset = off - extract_mod
|
||||
extract_ready = extract_mod == self.extract_frequency - 1
|
||||
if extract_ready:
|
||||
if switch_variant:
|
||||
OpenScopeStep(ctxt, off)
|
||||
ex = ConfExtractStep(ctxt, extract_offset)
|
||||
ConfAccumulateStep(ctxt, extract_offset, confirm_offset, define_var = False)
|
||||
for so_step in so_steps_last_block:
|
||||
ex.add_dependency(so_step)
|
||||
if switch_variant:
|
||||
CloseScopeStep(ctxt, off)
|
||||
so_steps_last_block = []
|
||||
sh = ShiftStateStep(ctxt, extract_offset, stride_used = self.extract_frequency)
|
||||
sh.add_dependency(ex)
|
||||
|
||||
confirm_mod = off % self.confirm_frequency
|
||||
confirm_offset = off - confirm_mod
|
||||
confirm_ready = confirm_mod == self.confirm_frequency - 1
|
||||
if confirm_ready:
|
||||
cflip = ConfirmFlipStep(ctxt, confirm_offset)
|
||||
cf = ConfirmStep(ctxt, confirm_offset, cautious = confirm_cautious )
|
||||
if last_confirm:
|
||||
cf.add_dependency(last_confirm)
|
||||
last_confirm = cf
|
||||
|
||||
|
||||
if not switch_variant:
|
||||
print ctxt.schedule([ last_confirm, sh ])
|
||||
else:
|
||||
print ctxt.dontschedule([ last_confirm, sh ])
|
||||
|
||||
if switch_variant:
|
||||
print "skipSwitch:;"
|
||||
print " ptr += iterBytes;"
|
||||
print " }" # close extra scope around switch variant
|
||||
print " }"
|
||||
|
||||
|
||||
def produce_init_state(self):
|
||||
state = self.state_variable
|
||||
s_type = self.state_type
|
||||
shift_distance = -1 * self.num_buckets
|
||||
shift_expr = "%s = %s" % (state.name, state.type.shift_expr(state.name, shift_distance))
|
||||
|
||||
s = Template("""
|
||||
$TYPENAME s;
|
||||
if (a->len_history) {
|
||||
u32 tmp = getPreStartVal(a, $DOMAIN);
|
||||
s = *((const $TYPENAME *)ft + tmp);
|
||||
$SHIFT_EXPR;
|
||||
} else {
|
||||
s = *(const $TYPENAME *)&fdr->start;
|
||||
}
|
||||
""").substitute(TYPENAME = s_type.get_name(),
|
||||
ZERO_EXPR = s_type.zero_expression(),
|
||||
DOMAIN = self.domain,
|
||||
SHIFT_EXPR = shift_expr)
|
||||
return s
|
||||
|
||||
def produce_code(self):
|
||||
|
||||
(behind, ahead) = self.get_hash_safety_parameters()
|
||||
loop_read_behind = behind
|
||||
loop_read_ahead = self.loop_bytes + ahead
|
||||
|
||||
# we set up mask and shift stuff for extracting our masks from registers
|
||||
#
|
||||
# we have a choice as to whether to mask out the value early or
|
||||
# extract the value (shift first) then mask it
|
||||
#
|
||||
# Intel has a free scaling factor from 1/2/4/8 so we want to combine
|
||||
# the extra needed shift for SSE registers with the mask operation
|
||||
|
||||
ssb = self.state_type.size / 8 # state size in bytes
|
||||
|
||||
# Intel path
|
||||
if ssb == 16 and self.domain == 16:
|
||||
# obscure corner - we don't have the room in the register to
|
||||
# do this for all values so we don't. domain==16 is pretty
|
||||
# bad anyhow, of course
|
||||
self.reach_mult = 8
|
||||
else:
|
||||
self.reach_mult = ssb
|
||||
|
||||
shift_amts = { 1 : 0, 2 : 1, 4 : 2, 8 : 3, 16: 4 }
|
||||
self.reach_shift_adjust = shift_amts[ ssb/self.reach_mult ]
|
||||
self.reach_mask = ((1 << self.domain) - 1) << self.reach_shift_adjust
|
||||
|
||||
print self.produce_header(visible = False)
|
||||
|
||||
print "// ",
|
||||
print " Arch: " + self.arch.name,
|
||||
print " State type: " + self.state_type.get_name(),
|
||||
print " Num buckets: %d" % self.num_buckets,
|
||||
print " Domain: %d" % self.domain,
|
||||
print " Stride: %d" % self.stride
|
||||
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
print "\tconst size_t tabSize = %d;" % self.table_size
|
||||
print """
|
||||
const u8 * ft = (const u8 *)fdr + ROUNDUP_16(sizeof(struct FDR));
|
||||
const u32 * confBase = (const u32 *)(ft + tabSize);
|
||||
"""
|
||||
print self.produce_init_state()
|
||||
print "\tconst size_t iterBytes = %d;" % self.loop_bytes
|
||||
print "\tconst size_t START_MOD = %d;" % self.datasize_bytes
|
||||
print "\tconst size_t LOOP_READ_AHEAD = %d;" % loop_read_ahead
|
||||
|
||||
print """
|
||||
while (ptr < buf + len) {
|
||||
|
||||
u8 doMainLoop = 1;
|
||||
size_t remaining = len - (ptr - buf);
|
||||
size_t dist;
|
||||
if (remaining <= iterBytes) {
|
||||
dist = remaining; // once through the switch and we're done
|
||||
} else if (remaining < 2 * iterBytes) {
|
||||
// nibble some stuff off the front, skip the main loop,
|
||||
// then come back here
|
||||
dist = iterBytes; // maybe could be cleverer
|
||||
} else {
|
||||
// now, we need to see if we can make it to a main loop iteration
|
||||
// if so, we need to ensure that the main loop iteration is aligned
|
||||
// to a START_MOD boundary and i >= 8 so we can read ptr + i - 8
|
||||
|
||||
// see if we can do it - if not, just switch the main loop off,
|
||||
// eat iterBytes in cautious mode, and come back to this loop
|
||||
|
||||
const u8 * target = MAX(buf + 8, ptr);
|
||||
target = ROUNDUP_PTR(target, START_MOD);
|
||||
dist = target - ptr;
|
||||
if (dist > iterBytes) {
|
||||
doMainLoop = 0;
|
||||
dist = iterBytes;
|
||||
}
|
||||
}
|
||||
"""
|
||||
self.produce_main_loop(switch_variant = True)
|
||||
self.produce_main_loop(switch_variant = False)
|
||||
print """
|
||||
}
|
||||
"""
|
||||
print self.produce_footer()
|
||||
|
||||
def get_name(self):
|
||||
return "fdr_exec_%s_d%d_s%d_w%d" % (self.arch.name, self.domain, self.stride, self.state_width)
|
||||
|
||||
def __init__(self, state_width, domain, stride,
|
||||
arch,
|
||||
table_state_width = None,
|
||||
num_buckets = 8,
|
||||
extract_frequency = None,
|
||||
confirm_frequency = None):
|
||||
|
||||
# First - set up the values that are fundamental to how this matcher will operate
|
||||
self.arch = arch
|
||||
|
||||
# get the width of the state width on which we operate internally
|
||||
if state_width not in [ 128 ]:
|
||||
fail_out("Unknown state width: %d" % state_width)
|
||||
self.state_width = state_width
|
||||
self.state_type = getRequiredType(self.state_width)
|
||||
self.state_variable = IntegerVariable("s", self.state_type)
|
||||
|
||||
table_state_width = state_width
|
||||
self.table_state_width = state_width
|
||||
self.table_state_type = getRequiredType(self.table_state_width)
|
||||
|
||||
# domain is the number of bits that we draw from our input to
|
||||
# index our 'reach' table
|
||||
if not 8 <= domain <= 16:
|
||||
fail_out("Unsupported domain: %d" % domain)
|
||||
self.domain = domain
|
||||
# this is the load type required for this domain if we want to
|
||||
# load it one at a time
|
||||
self.single_load_type = getRequiredType(self.domain)
|
||||
|
||||
# table size
|
||||
self.table_size = 2**domain * table_state_width // 8
|
||||
|
||||
# stride is the frequency with which we make data-driven
|
||||
# accesses to our reach table
|
||||
if stride not in [ 1, 2, 4, 8]:
|
||||
fail_out("Unsupported stride: %d" % stride)
|
||||
if stride * num_buckets > state_width:
|
||||
fail_out("Stride %d is too big for the number of buckets %d given state width %d\n" % (stride, num_buckets, state_width))
|
||||
self.stride = stride
|
||||
|
||||
if num_buckets != 8:
|
||||
fail_out("Unsupported number of buckets: %d" % num_buckets)
|
||||
if state_width % num_buckets and state_width == 128:
|
||||
fail_out("Bucket scheme requires bit-shifts on m128 (failing)")
|
||||
self.num_buckets = num_buckets
|
||||
|
||||
# Second - set up derived or optimization values - these can be
|
||||
# overridden by arguments that are passed in
|
||||
|
||||
self.datasize = 64
|
||||
self.bulk_load_type = IntegerType(self.datasize)
|
||||
self.datasize_bytes = self.datasize/8
|
||||
|
||||
self.value_extract_type = IntegerType(self.datasize)
|
||||
|
||||
self.fdr2_force_naive_load = False # disable everywhere for trunk
|
||||
|
||||
# extract frequency is how frequently (in bytes) we destructively shift
|
||||
# our state value after having pulled out that many bytes into a
|
||||
# confirm register (of one sort or another).
|
||||
# none means a default value - datasize, our biggest easily available GPR
|
||||
if extract_frequency is None:
|
||||
extract_frequency = self.datasize_bytes
|
||||
self.extract_frequency = extract_frequency
|
||||
self.extract_size = self.extract_frequency*self.num_buckets
|
||||
if extract_frequency < stride:
|
||||
fail_out("Can't extract at extract frequency %d with stride %d" % (extract_frequency, stride))
|
||||
if extract_frequency not in [ None, 1, 2, 4, 8, 16]:
|
||||
fail_out("Weird extract frequency: %d" % extract_frequency)
|
||||
|
||||
if self.extract_size <= 32:
|
||||
self.extr_type = IntegerType(32)
|
||||
elif self.extract_size <= 64:
|
||||
self.extr_type = IntegerType(64)
|
||||
else:
|
||||
fail_out("Implausible size %d required for confirm extract step" % size)
|
||||
|
||||
# extract_frequency is how often we pull out our state and place
|
||||
# it somewhere in a lossless fashion
|
||||
# confirm_frequency, on the other hand, is how frequently we
|
||||
# take the state extracted by extract_frequency and cobble it
|
||||
# together into a matching loop
|
||||
# confirm_frequency must be a multiple of extract_frequency
|
||||
# and must fit into a fast register; for now; we're going to
|
||||
# stay in the GPR domain
|
||||
if confirm_frequency is None:
|
||||
confirm_frequency = self.extract_frequency
|
||||
self.confirm_frequency = confirm_frequency
|
||||
if confirm_frequency % self.extract_frequency:
|
||||
fail_out("Confirm frequency %d must be evenly divisible by extract_frequency %d" % (confirm_frequency, self.extract_frequency))
|
||||
|
||||
self.conf_size = self.confirm_frequency * self.num_buckets
|
||||
if self.conf_size <= 32:
|
||||
self.conf_type = IntegerType(32)
|
||||
elif self.conf_size <= 64:
|
||||
self.conf_type = IntegerType(64)
|
||||
else:
|
||||
fail_out("Implausible size %d required for confirm accumulate step" % self.conf_size)
|
||||
|
||||
# how many bytes in flight at once
|
||||
self.loop_bytes = 16
|
||||
|
||||
# confirm configuration
|
||||
|
||||
# how many entries in the top-level confirm table - 256 means
|
||||
# complete split on the last character
|
||||
self.conf_top_level_split = 256
|
||||
|
||||
# how much we 'pull back' in confirm - this is obviously related
|
||||
# to the first level conf but we will keep two separate paramters
|
||||
# for this to avoid the risk of conflating these
|
||||
self.conf_pull_back = 1
|
||||
|
||||
if self.conf_pull_back > 0 and self.conf_top_level_split < 256:
|
||||
fail_out("Pull back distance %d not supported by top level split %d" % (self.conf_pull_back, self.conf_top_level_split))
|
||||
|
||||
# minor stuff
|
||||
self.default_body_indent = 8
|
||||
562
src/fdr/fdr_compile.cpp
Normal file
562
src/fdr/fdr_compile.cpp
Normal file
@@ -0,0 +1,562 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: build API.
|
||||
*/
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "teddy_compile.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/dump_mask.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
class FDRCompiler : boost::noncopyable {
|
||||
private:
|
||||
const FDREngineDescription ŋ
|
||||
vector<u8> tab;
|
||||
const vector<hwlmLiteral> &lits;
|
||||
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
|
||||
bool make_small;
|
||||
|
||||
u8 *tabIndexToMask(u32 indexInTable);
|
||||
void assignStringToBucket(LiteralIndex l, BucketIndex b);
|
||||
void assignStringsToBuckets();
|
||||
#ifdef DEBUG
|
||||
void dumpMasks(const u8 *defaultMask);
|
||||
#endif
|
||||
void setupTab();
|
||||
aligned_unique_ptr<FDR> setupFDR(pair<u8 *, size_t> link);
|
||||
void createInitialState(FDR *fdr);
|
||||
|
||||
public:
|
||||
FDRCompiler(const vector<hwlmLiteral> &lits_in,
|
||||
const FDREngineDescription &eng_in, bool make_small_in)
|
||||
: eng(eng_in), tab(eng_in.getTabSizeBytes()), lits(lits_in),
|
||||
make_small(make_small_in) {}
|
||||
|
||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
||||
};
|
||||
|
||||
u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
|
||||
assert(indexInTable < tab.size());
|
||||
return &tab[0] + (indexInTable * (eng.getSchemeWidth() / 8));
|
||||
}
|
||||
|
||||
static
|
||||
void setbit(u8 *msk, u32 bit) {
|
||||
msk[bit / 8] |= 1U << (bit % 8);
|
||||
}
|
||||
|
||||
static
|
||||
void clearbit(u8 *msk, u32 bit) {
|
||||
msk[bit / 8] &= ~(1U << (bit % 8));
|
||||
}
|
||||
|
||||
static
|
||||
void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) {
|
||||
for (u32 i = 0; i < num_bytes; i++) {
|
||||
dest[i] = a[i] & b[i];
|
||||
}
|
||||
}
|
||||
|
||||
void FDRCompiler::createInitialState(FDR *fdr) {
|
||||
u8 *start = (u8 *)&fdr->start;
|
||||
|
||||
/* initial state should to be 1 in each slot in the bucket up to bucket
|
||||
* minlen - 1, and 0 thereafter */
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
// Find the minimum length for the literals in this bucket.
|
||||
const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
|
||||
u32 min_len = ~0U;
|
||||
for (vector<LiteralIndex>::const_iterator it = bucket_lits.begin(),
|
||||
ite = bucket_lits.end();
|
||||
it != ite; ++it) {
|
||||
min_len = min(min_len, verify_u32(lits[*it].s.length()));
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
|
||||
assert(min_len);
|
||||
|
||||
for (PositionInBucket i = 0; i < eng.getBucketWidth(b); i++) {
|
||||
if (i < min_len - 1) {
|
||||
setbit(start, eng.getSchemeBit(b, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> FDRCompiler::setupFDR(pair<u8 *, size_t> link) {
|
||||
size_t tabSize = eng.getTabSizeBytes();
|
||||
|
||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
|
||||
pair<u8 *, size_t> confirmTmp =
|
||||
setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
assert(ISALIGNED_16(tabSize));
|
||||
assert(ISALIGNED_16(confirmTmp.second));
|
||||
assert(ISALIGNED_16(floodControlTmp.second));
|
||||
assert(ISALIGNED_16(link.second));
|
||||
size_t headerSize = ROUNDUP_16(sizeof(FDR));
|
||||
size_t size = ROUNDUP_16(headerSize + tabSize + confirmTmp.second +
|
||||
floodControlTmp.second + link.second);
|
||||
|
||||
DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
|
||||
"total=%zu\n",
|
||||
headerSize, tabSize, confirmTmp.second, floodControlTmp.second,
|
||||
size);
|
||||
|
||||
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
|
||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
fdr->size = size;
|
||||
fdr->engineID = eng.getID();
|
||||
fdr->maxStringLen = verify_u32(maxLen(lits));
|
||||
createInitialState(fdr.get());
|
||||
|
||||
u8 *fdr_base = (u8 *)fdr.get();
|
||||
u8 * ptr = fdr_base + ROUNDUP_16(sizeof(FDR));
|
||||
copy(tab.begin(), tab.end(), ptr);
|
||||
ptr += tabSize;
|
||||
|
||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
||||
ptr += confirmTmp.second;
|
||||
aligned_free(confirmTmp.first);
|
||||
|
||||
fdr->floodOffset = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
||||
ptr += floodControlTmp.second;
|
||||
aligned_free(floodControlTmp.first);
|
||||
|
||||
if (link.first) {
|
||||
fdr->link = verify_u32(ptr - fdr_base);
|
||||
memcpy(ptr, link.first, link.second);
|
||||
aligned_free(link.first);
|
||||
} else {
|
||||
fdr->link = 0;
|
||||
}
|
||||
|
||||
return fdr;
|
||||
}
|
||||
|
||||
void FDRCompiler::assignStringToBucket(LiteralIndex l, BucketIndex b) {
|
||||
bucketToLits[b].push_back(l);
|
||||
}
|
||||
|
||||
struct LitOrder {
|
||||
explicit LitOrder(const vector<hwlmLiteral> &vl_) : vl(vl_) {}
|
||||
bool operator()(const u32 &i1, const u32 &i2) const {
|
||||
const string &i1s = vl[i1].s;
|
||||
const string &i2s = vl[i2].s;
|
||||
|
||||
size_t len1 = i1s.size(), len2 = i2s.size();
|
||||
|
||||
if (len1 != len2) {
|
||||
return len1 < len2;
|
||||
} else {
|
||||
string::const_reverse_iterator it1, it2;
|
||||
tie(it1, it2) =
|
||||
std::mismatch(i1s.rbegin(), i1s.rend(), i2s.rbegin());
|
||||
if (it1 == i1s.rend()) {
|
||||
return false;
|
||||
}
|
||||
return *it1 < *it2;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const vector<hwlmLiteral> &vl;
|
||||
};
|
||||
|
||||
static u64a getScoreUtil(u32 len, u32 count) {
|
||||
if (len == 0) {
|
||||
return (u64a)-1;
|
||||
}
|
||||
const u32 LEN_THRESH = 128;
|
||||
const u32 elen = (len > LEN_THRESH) ? LEN_THRESH : len;
|
||||
const u64a lenScore =
|
||||
(LEN_THRESH * LEN_THRESH * LEN_THRESH) / (elen * elen * elen);
|
||||
return count * lenScore; // deemphasize count - possibly more than needed
|
||||
// this might be overkill in the other direction
|
||||
}
|
||||
|
||||
//#define DEBUG_ASSIGNMENT
|
||||
void FDRCompiler::assignStringsToBuckets() {
|
||||
typedef u64a SCORE; // 'Score' type
|
||||
const SCORE MAX_SCORE = (SCORE)-1;
|
||||
const u32 CHUNK_MAX = 512;
|
||||
const u32 BUCKET_MAX = 16;
|
||||
typedef pair<SCORE, u32> SCORE_INDEX_PAIR;
|
||||
|
||||
u32 ls = verify_u32(lits.size());
|
||||
// make a vector that contains our literals as pointers or u32 LiteralIndex values
|
||||
vector<LiteralIndex> vli;
|
||||
vli.resize(ls);
|
||||
map<u32, u32> lenCounts;
|
||||
for (LiteralIndex l = 0; l < ls; l++) {
|
||||
vli[l] = l;
|
||||
lenCounts[lits[l].s.size()]++;
|
||||
}
|
||||
// sort vector by literal length + if tied on length, 'magic' criteria of some kind (tbd)
|
||||
stable_sort(vli.begin(), vli.end(), LitOrder(lits));
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (map<u32, u32>::iterator i = lenCounts.begin(), e = lenCounts.end();
|
||||
i != e; ++i) {
|
||||
printf("l<%d>:%d ", i->first, i->second);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
// TODO: detailed early stage literal analysis for v. small cases (actually look at lits)
|
||||
// yes - after we factor this out and merge in the Teddy style of building we can look
|
||||
// at this, although the teddy merge modelling is quite different. It's still probably
|
||||
// adaptable to some extent for this class of problem
|
||||
|
||||
u32 firstIds[CHUNK_MAX]; // how many are in this chunk (CHUNK_MAX - 1 contains 'last' bound)
|
||||
u32 count[CHUNK_MAX]; // how many are in this chunk
|
||||
u32 length[CHUNK_MAX]; // how long things in the chunk are
|
||||
|
||||
const u32 MAX_CONSIDERED_LENGTH = 16;
|
||||
u32 currentChunk = 0;
|
||||
u32 currentSize = 0;
|
||||
u32 chunkStartID = 0;
|
||||
u32 maxPerChunk = ls/(CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1;
|
||||
|
||||
for (u32 i = 0; i < ls && currentChunk < CHUNK_MAX - 1; i++) {
|
||||
LiteralIndex l = vli[i];
|
||||
if ((currentSize < MAX_CONSIDERED_LENGTH && (lits[l].s.size() != currentSize)) ||
|
||||
(currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
|
||||
currentSize = lits[l].s.size();
|
||||
if (currentChunk) {
|
||||
count[currentChunk - 1 ] = i - chunkStartID;
|
||||
}
|
||||
chunkStartID = firstIds[currentChunk] = i;
|
||||
length[currentChunk] = currentSize;
|
||||
currentChunk++;
|
||||
}
|
||||
}
|
||||
count[currentChunk - 1] = ls - chunkStartID;
|
||||
// close off chunks with an empty row
|
||||
firstIds[currentChunk] = ls;
|
||||
length[currentChunk] = 0;
|
||||
count[currentChunk] = 0;
|
||||
u32 nChunks = currentChunk + 1;
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (u32 j = 0; j < nChunks; j++) {
|
||||
printf("%d %d %d %d\n", j, firstIds[j], count[j], length[j]);
|
||||
}
|
||||
#endif
|
||||
|
||||
SCORE_INDEX_PAIR t[CHUNK_MAX][BUCKET_MAX]; // pair of score, index
|
||||
u32 nb = eng.getNumBuckets();
|
||||
|
||||
for (u32 j = 0; j < nChunks; j++) {
|
||||
u32 cnt = 0;
|
||||
for (u32 k = j; k < nChunks; ++k) {
|
||||
cnt += count[k];
|
||||
}
|
||||
t[j][0] = make_pair(getScoreUtil(length[j], cnt), 0);
|
||||
}
|
||||
|
||||
for (u32 i = 1; i < nb; i++) {
|
||||
for (u32 j = 0; j < nChunks - 1; j++) { // don't process last, empty row
|
||||
SCORE_INDEX_PAIR best = make_pair(MAX_SCORE, 0);
|
||||
u32 cnt = count[j];
|
||||
for (u32 k = j + 1; k < nChunks - 1; k++, cnt += count[k]) {
|
||||
SCORE score = getScoreUtil(length[j], cnt);
|
||||
if (score > best.first) {
|
||||
break; // if we're now worse locally than our best score, give up
|
||||
}
|
||||
score += t[k][i-1].first;
|
||||
if (score < best.first) {
|
||||
best = make_pair(score, k);
|
||||
}
|
||||
}
|
||||
t[j][i] = best;
|
||||
}
|
||||
t[nChunks - 1][i] = make_pair(0,0); // fill in empty final row for next iteration
|
||||
}
|
||||
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
for (u32 j = 0; j < nChunks; j++) {
|
||||
for (u32 i = 0; i < nb; i++) {
|
||||
SCORE_INDEX_PAIR v = t[j][i];
|
||||
printf("<%7lld,%3d>", v.first, v.second);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
// our best score is in best[0][N_BUCKETS-1] and we can follow the links
|
||||
// to find where our buckets should start and what goes into them
|
||||
for (u32 i = 0, n = nb; n && (i != nChunks - 1); n--) {
|
||||
u32 j = t[i][n - 1].second;
|
||||
if (j == 0) {
|
||||
j = nChunks - 1;
|
||||
}
|
||||
// put chunks between i - j into bucket (NBUCKETS-1) - n
|
||||
#ifdef DEBUG_ASSIGNMENT
|
||||
printf("placing from %d to %d in bucket %d\n", firstIds[i], firstIds[j],
|
||||
nb - n);
|
||||
#endif
|
||||
for (u32 k = firstIds[i]; k < firstIds[j]; k++) {
|
||||
assignStringToBucket((LiteralIndex)vli[k], nb - n);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
void FDRCompiler::dumpMasks(const u8 *defaultMask) {
|
||||
const size_t width = eng.getSchemeWidth();
|
||||
printf("default mask: %s\n", dumpMask(defaultMask, width).c_str());
|
||||
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
|
||||
u8 *m = tabIndexToMask(i);
|
||||
if (memcmp(m, defaultMask, width / 8)) {
|
||||
printf("tab %04x: %s\n", i, dumpMask(m, width).c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
|
||||
const vector<LiteralIndex> &vl,
|
||||
const vector<hwlmLiteral> &lits,
|
||||
SuffixPositionInString pos,
|
||||
std::map<u32, ue2::unordered_set<u32> > &m2) {
|
||||
u32 distance = 0;
|
||||
if (eng.bits <= 8) {
|
||||
distance = 1;
|
||||
} else if (eng.bits <= 16) {
|
||||
distance = 2;
|
||||
} else if (eng.bits <= 32) {
|
||||
distance = 4;
|
||||
}
|
||||
|
||||
for (vector<LiteralIndex>::const_iterator i = vl.begin(), e = vl.end();
|
||||
i != e; ++i) {
|
||||
if (e - i > 5) {
|
||||
__builtin_prefetch(&lits[*(i + 5)]);
|
||||
}
|
||||
const hwlmLiteral &lit = lits[*i];
|
||||
const size_t sz = lit.s.size();
|
||||
u32 mask = 0;
|
||||
u32 dontCares = 0;
|
||||
for (u32 cnt = 0; cnt < distance; cnt++) {
|
||||
int newPos = pos - cnt;
|
||||
u8 dontCareByte = 0x0;
|
||||
u8 maskByte = 0x0;
|
||||
if (newPos < 0 || ((u32)newPos >= sz)) {
|
||||
dontCareByte = 0xff;
|
||||
} else {
|
||||
u8 c = lit.s[sz - newPos - 1];
|
||||
maskByte = c;
|
||||
u32 remainder = eng.bits - cnt * 8;
|
||||
assert(remainder != 0);
|
||||
if (remainder < 8) {
|
||||
u8 cmask = (1U << remainder) - 1;
|
||||
maskByte &= cmask;
|
||||
dontCareByte |= ~cmask;
|
||||
}
|
||||
if (lit.nocase && ourisalpha(c)) {
|
||||
maskByte &= 0xdf;
|
||||
dontCareByte |= 0x20;
|
||||
}
|
||||
}
|
||||
u32 loc = cnt * 8;
|
||||
mask |= maskByte << loc;
|
||||
dontCares |= dontCareByte << loc;
|
||||
}
|
||||
|
||||
// truncate m and dc down to nBits
|
||||
mask &= (1U << eng.bits) - 1;
|
||||
dontCares &= (1U << eng.bits) - 1;
|
||||
if (dontCares == ((1U << eng.bits) - 1)) {
|
||||
return true;
|
||||
}
|
||||
m2[dontCares].insert(mask);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void FDRCompiler::setupTab() {
|
||||
const size_t mask_size = eng.getSchemeWidth() / 8;
|
||||
assert(mask_size);
|
||||
|
||||
vector<u8> defaultMask(mask_size, 0xff);
|
||||
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
|
||||
memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
|
||||
}
|
||||
|
||||
typedef std::map<u32, ue2::unordered_set<u32> > M2SET;
|
||||
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
const vector<LiteralIndex> &vl = bucketToLits[b];
|
||||
SuffixPositionInString pLimit = eng.getBucketWidth(b);
|
||||
for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
|
||||
u32 bit = eng.getSchemeBit(b, pos);
|
||||
M2SET m2;
|
||||
bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
|
||||
if (done) {
|
||||
clearbit(&defaultMask[0], bit);
|
||||
continue;
|
||||
}
|
||||
for (M2SET::const_iterator i = m2.begin(), e = m2.end(); i != e;
|
||||
++i) {
|
||||
u32 dc = i->first;
|
||||
const ue2::unordered_set<u32> &mskSet = i->second;
|
||||
u32 v = ~dc;
|
||||
do {
|
||||
u32 b2 = v & dc;
|
||||
for (ue2::unordered_set<u32>::const_iterator
|
||||
i2 = mskSet.begin(),
|
||||
e2 = mskSet.end();
|
||||
i2 != e2; ++i2) {
|
||||
u32 val = (*i2 & ~dc) | b2;
|
||||
clearbit(tabIndexToMask(val), bit);
|
||||
}
|
||||
v = (v + (dc & -dc)) | ~dc;
|
||||
} while (v != ~dc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
|
||||
u8 *m = tabIndexToMask(i);
|
||||
andMask(m, m, &defaultMask[0], mask_size);
|
||||
}
|
||||
#ifdef DEBUG
|
||||
dumpMasks(&defaultMask[0]);
|
||||
#endif
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> FDRCompiler::build(pair<u8 *, size_t> link) {
|
||||
assignStringsToBuckets();
|
||||
setupTab();
|
||||
return setupFDR(link);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
static
|
||||
aligned_unique_ptr<FDR>
|
||||
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
|
||||
const target_t &target, const Grey &grey, u32 hint,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
pair<u8 *, size_t> link(nullptr, 0);
|
||||
if (stream_control) {
|
||||
link = fdrBuildTableStreaming(lits, stream_control);
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
|
||||
|
||||
if (grey.fdrAllowTeddy) {
|
||||
aligned_unique_ptr<FDR> fdr
|
||||
= teddyBuildTableHinted(lits, make_small, hint, target, link);
|
||||
if (fdr) {
|
||||
DEBUG_PRINTF("build with teddy succeeded\n");
|
||||
return fdr;
|
||||
} else {
|
||||
DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
|
||||
}
|
||||
}
|
||||
|
||||
const unique_ptr<FDREngineDescription> des =
|
||||
(hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
|
||||
: getFdrDescription(hint);
|
||||
|
||||
if (!des) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
FDRCompiler fc(lits, *des, make_small);
|
||||
return fc.build(link);
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> fdrBuildTable(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, const target_t &target,
|
||||
const Grey &grey,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
return fdrBuildTableInternal(lits, make_small, target, grey, HINT_INVALID,
|
||||
stream_control);
|
||||
}
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
|
||||
aligned_unique_ptr<FDR>
|
||||
fdrBuildTableHinted(const vector<hwlmLiteral> &lits, bool make_small, u32 hint,
|
||||
const target_t &target, const Grey &grey,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
pair<u8 *, size_t> link(nullptr, 0);
|
||||
return fdrBuildTableInternal(lits, make_small, target, grey, hint,
|
||||
stream_control);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
// FIXME: should be compile-time only
|
||||
size_t fdrSize(const FDR *fdr) {
|
||||
assert(fdr);
|
||||
return fdr->size;
|
||||
}
|
||||
66
src/fdr/fdr_compile.h
Normal file
66
src/fdr/fdr_compile.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: build API.
|
||||
*/
|
||||
|
||||
#ifndef FDR_COMPILE_H
|
||||
#define FDR_COMPILE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
struct FDR;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct hwlmLiteral;
|
||||
struct hwlmStreamingControl;
|
||||
struct Grey;
|
||||
struct target_t;
|
||||
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
fdrBuildTable(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
const target_t &target, const Grey &grey,
|
||||
hwlmStreamingControl *stream_control = nullptr);
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
fdrBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
u32 hint, const target_t &target, const Grey &grey,
|
||||
hwlmStreamingControl *stream_control = nullptr);
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
88
src/fdr/fdr_compile_internal.h
Normal file
88
src/fdr/fdr_compile_internal.h
Normal file
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_COMPILE_INTERNAL_H
|
||||
#define FDR_COMPILE_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
struct FDRConfirm;
|
||||
struct LitInfo;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
// a pile of decorative typedefs
|
||||
// good for documentation purposes more than anything else
|
||||
typedef u32 LiteralIndex;
|
||||
typedef u32 ConfirmIndex;
|
||||
typedef u32 SuffixPositionInString; // zero is last byte, counting back
|
||||
// into the string
|
||||
typedef u32 BucketIndex;
|
||||
typedef u32 SchemeBitIndex;
|
||||
typedef u32 PositionInBucket; // zero is 'we are matching right now!",
|
||||
// counting towards future matches
|
||||
|
||||
class EngineDescription;
|
||||
class FDREngineDescription;
|
||||
struct hwlmStreamingControl;
|
||||
|
||||
size_t getFDRConfirm(const std::vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
bool make_small);
|
||||
|
||||
std::pair<u8 *, size_t> setupFullMultiConfs(
|
||||
const std::vector<hwlmLiteral> &lits, const EngineDescription &eng,
|
||||
std::map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small);
|
||||
|
||||
// all suffixes include an implicit max_bucket_width suffix to ensure that
|
||||
// we always read a full-scale flood "behind" us in terms of what's in our
|
||||
// state; if we don't have a flood that's long enough we won't be in the
|
||||
// right state yet to allow blindly advancing
|
||||
std::pair<u8 *, size_t>
|
||||
setupFDRFloodControl(const std::vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng);
|
||||
|
||||
std::pair<u8 *, size_t>
|
||||
fdrBuildTableStreaming(const std::vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control);
|
||||
|
||||
static constexpr u32 HINT_INVALID = 0xffffffff;
|
||||
|
||||
// fdr_compile_util.cpp utilities
|
||||
size_t maxLen(const std::vector<hwlmLiteral> &lits);
|
||||
size_t minLenCount(const std::vector<hwlmLiteral> &lits, size_t *count);
|
||||
u32 absdiff(u32 i, u32 j);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
65
src/fdr/fdr_compile_util.cpp
Normal file
65
src/fdr/fdr_compile_util.cpp
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "hwlm/hwlm_literal.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
size_t maxLen(const vector<hwlmLiteral> &lits) {
|
||||
size_t rv = 0;
|
||||
for (const auto &lit : lits) {
|
||||
rv = max(rv, lit.s.size());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
size_t minLenCount(const vector<hwlmLiteral> &lits, size_t *count) {
|
||||
size_t rv = (size_t)-1;
|
||||
*count = 0;
|
||||
for (const auto &lit : lits) {
|
||||
if (lit.s.size() < rv) {
|
||||
rv = lit.s.size();
|
||||
*count = 1;
|
||||
} else if (lit.s.size() == rv) {
|
||||
(*count)++;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
u32 absdiff(u32 i, u32 j) {
|
||||
return (i > j) ? (i - j) : (j - i);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
100
src/fdr/fdr_confirm.h
Normal file
100
src/fdr/fdr_confirm.h
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_CONFIRM_H
|
||||
#define FDR_CONFIRM_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
|
||||
static really_inline
|
||||
u32 mul_hash_64(u64a lv, u64a andmsk, u64a mult, u32 nBits) {
|
||||
return ((lv & andmsk) * mult) >> (sizeof(u64a)*8 - nBits);
|
||||
}
|
||||
|
||||
// data structures
|
||||
// TODO: fix this hard-coding
|
||||
#define CONF_TYPE u64a
|
||||
#define CONF_HASH_CALL mul_hash_64
|
||||
|
||||
typedef enum LitInfoFlags {
|
||||
NoFlags = 0,
|
||||
Caseless = 1,
|
||||
NoRepeat = 2,
|
||||
ComplexConfirm = 4
|
||||
} LitInfoFlags;
|
||||
|
||||
/**
|
||||
* \brief Structure describing a literal, linked to by FDRConfirm.
|
||||
*
|
||||
* This structure is followed in memory by a variable-sized string prefix at
|
||||
* LitInfo::s, for strings that are longer than CONF_TYPE.
|
||||
*/
|
||||
struct LitInfo {
|
||||
CONF_TYPE v;
|
||||
CONF_TYPE msk;
|
||||
hwlm_group_t groups;
|
||||
u32 size;
|
||||
u32 id; // literal ID as passed in
|
||||
u8 flags; /* LitInfoFlags */
|
||||
u8 next;
|
||||
u8 extended_size;
|
||||
u8 s[1]; // literal prefix, which continues "beyond" this struct.
|
||||
};
|
||||
|
||||
#define FDRC_FLAG_NO_CONFIRM 1
|
||||
|
||||
/**
|
||||
* \brief FDR confirm header.
|
||||
*
|
||||
* This structure is followed in memory by:
|
||||
*
|
||||
* -# lit index mapping (array of u32)
|
||||
* -# list of LitInfo structures
|
||||
*/
|
||||
struct FDRConfirm {
|
||||
CONF_TYPE andmsk;
|
||||
CONF_TYPE mult;
|
||||
u32 nBitsOrSoleID; // if flags is NO_CONFIRM then this is soleID
|
||||
u32 flags; // sole meaning is 'non-zero means no-confirm' (that is all)
|
||||
hwlm_group_t groups;
|
||||
u32 soleLitSize;
|
||||
u32 soleLitCmp;
|
||||
u32 soleLitMsk;
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const u32 *getConfirmLitIndex(const struct FDRConfirm *fdrc) {
|
||||
const u8 *base = (const u8 *)fdrc;
|
||||
const u32 *litIndex =
|
||||
(const u32 *)(base + ROUNDUP_N(sizeof(*fdrc), alignof(u32)));
|
||||
assert(ISALIGNED(litIndex));
|
||||
return litIndex;
|
||||
}
|
||||
|
||||
#endif // FDR_CONFIRM_H
|
||||
479
src/fdr/fdr_confirm_compile.cpp
Normal file
479
src/fdr/fdr_confirm_compile.cpp
Normal file
@@ -0,0 +1,479 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "engine_description.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
typedef u8 ConfSplitType;
|
||||
typedef pair<BucketIndex, ConfSplitType> BucketSplitPair;
|
||||
typedef map<BucketSplitPair, pair<FDRConfirm *, size_t> > BC2CONF;
|
||||
|
||||
// return the number of bytes beyond a length threshold in all strings in lits
|
||||
static
|
||||
size_t thresholdedSize(const vector<hwlmLiteral> &lits, size_t threshold) {
|
||||
size_t tot = 0;
|
||||
for (const auto &lit : lits) {
|
||||
size_t sz = lit.s.size();
|
||||
if (sz > threshold) {
|
||||
tot += ROUNDUP_N(sz - threshold, 8);
|
||||
}
|
||||
}
|
||||
return tot;
|
||||
}
|
||||
|
||||
static
|
||||
u64a make_u64a_mask(const vector<u8> &v) {
|
||||
assert(v.size() <= sizeof(u64a));
|
||||
if (v.size() > sizeof(u64a)) {
|
||||
throw std::exception();
|
||||
}
|
||||
|
||||
u64a mask = 0;
|
||||
size_t vlen = v.size();
|
||||
size_t len = std::min(vlen, sizeof(mask));
|
||||
unsigned char *m = (unsigned char *)&mask;
|
||||
memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
|
||||
return mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a temporary vector of LitInfo structures (without the corresponding
|
||||
* pointers to the actual strings; these cannot be laid out yet). These
|
||||
* stay in 1:1 correspondence with the lits[] vector as that's the only
|
||||
* place we have to obtain our full strings.
|
||||
*/
|
||||
static
|
||||
void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
|
||||
CONF_TYPE &andmsk) {
|
||||
const CONF_TYPE all_ones = ~(u64a)0;
|
||||
andmsk = all_ones; // fill in with 'and' of all literal masks
|
||||
|
||||
for (LiteralIndex i = 0; i < lits.size(); i++) {
|
||||
const hwlmLiteral &lit = lits[i];
|
||||
LitInfo &info = tmpLitInfo[i];
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.id = lit.id;
|
||||
u8 flags = NoFlags;
|
||||
if (lit.nocase) {
|
||||
flags |= Caseless;
|
||||
}
|
||||
if (lit.noruns) {
|
||||
flags |= NoRepeat;
|
||||
}
|
||||
if (lit.msk.size() > lit.s.size()) {
|
||||
flags |= ComplexConfirm;
|
||||
info.extended_size = verify_u8(lit.msk.size());
|
||||
}
|
||||
info.flags = flags;
|
||||
info.size = verify_u32(lit.s.size());
|
||||
info.groups = lit.groups;
|
||||
|
||||
// these are built up assuming a LE machine
|
||||
CONF_TYPE msk = all_ones;
|
||||
CONF_TYPE val = 0;
|
||||
for (u32 j = 0; j < sizeof(CONF_TYPE); j++) {
|
||||
u32 shiftLoc = (sizeof(CONF_TYPE) - j - 1) * 8;
|
||||
if (j >= lit.s.size()) {
|
||||
msk &= ~((CONF_TYPE)0xff << shiftLoc);
|
||||
} else {
|
||||
u8 c = lit.s[lit.s.size() - j - 1];
|
||||
if (lit.nocase && ourisalpha(c)) {
|
||||
msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc);
|
||||
val |= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc;
|
||||
} else {
|
||||
val |= (CONF_TYPE)c << shiftLoc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info.v = val;
|
||||
info.msk = msk;
|
||||
if (!lit.msk.empty()) {
|
||||
u64a l_msk = make_u64a_mask(lit.msk);
|
||||
u64a l_cmp = make_u64a_mask(lit.cmp);
|
||||
|
||||
// test for consistency - if there's intersection, then v and msk
|
||||
// values must line up
|
||||
UNUSED u64a intersection = l_msk & info.msk;
|
||||
assert((info.v & intersection) == (l_cmp & intersection));
|
||||
|
||||
// incorporate lit.msk, lit.cmp into v and msk
|
||||
info.msk |= l_msk;
|
||||
info.v |= l_cmp;
|
||||
}
|
||||
|
||||
andmsk &= info.msk;
|
||||
}
|
||||
}
|
||||
|
||||
//#define FDR_CONFIRM_DUMP 1
|
||||
|
||||
static
|
||||
size_t getFDRConfirm(const vector<hwlmLiteral> &lits, FDRConfirm **fdrc_p,
|
||||
bool applyOneCharOpt, bool make_small, bool make_confirm) {
|
||||
vector<LitInfo> tmpLitInfo(lits.size());
|
||||
CONF_TYPE andmsk;
|
||||
fillLitInfo(lits, tmpLitInfo, andmsk);
|
||||
|
||||
#ifdef FDR_CONFIRM_DUMP
|
||||
printf("-------------------\n");
|
||||
#endif
|
||||
|
||||
// just magic numbers and crude measures for now
|
||||
u32 nBits;
|
||||
if (make_small) {
|
||||
nBits = min(10U, lg2(lits.size()) + 1);
|
||||
} else {
|
||||
nBits = min(13U, lg2(lits.size()) + 4);
|
||||
}
|
||||
|
||||
CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
|
||||
u32 flags = 0;
|
||||
// we use next three variables for 'confirmless' case to speed-up
|
||||
// confirmation process
|
||||
u32 soleLitSize = 0;
|
||||
u32 soleLitCmp = 0;
|
||||
u32 soleLitMsk = 0;
|
||||
|
||||
if ((applyOneCharOpt && lits.size() == 1 && lits[0].s.size() == 0 &&
|
||||
lits[0].msk.empty()) || make_confirm == false) {
|
||||
flags = FDRC_FLAG_NO_CONFIRM;
|
||||
if (lits[0].noruns) {
|
||||
flags |= NoRepeat; // messy - need to clean this up later as flags is sorta kinda obsoleted
|
||||
}
|
||||
mult = 0;
|
||||
soleLitSize = lits[0].s.size() - 1;
|
||||
// we can get to this point only in confirmless case;
|
||||
// it means that we have only one literal per FDRConfirm (no packing),
|
||||
// with no literal mask and size of literal is less or equal
|
||||
// to the number of masks of Teddy engine;
|
||||
// maximum number of masks for Teddy is 4, so the size of
|
||||
// literal is definitely less or equal to size of u32
|
||||
assert(lits[0].s.size() <= sizeof(u32));
|
||||
for (u32 i = 0; i < lits[0].s.size(); i++) {
|
||||
u32 shiftLoc = (sizeof(u32) - i - 1) * 8;
|
||||
u8 c = lits[0].s[lits[0].s.size() - i - 1];
|
||||
if (lits[0].nocase && ourisalpha(c)) {
|
||||
soleLitCmp |= (u32)(c & CASE_CLEAR) << shiftLoc;
|
||||
soleLitMsk |= (u32)CASE_CLEAR << shiftLoc;
|
||||
}
|
||||
else {
|
||||
soleLitCmp |= (u32)c << shiftLoc;
|
||||
soleLitMsk |= (u32)0xff << shiftLoc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we can walk the vector and assign elements from the vectors to a
|
||||
// map by hash value
|
||||
map<u32, vector<LiteralIndex> > res2lits;
|
||||
hwlm_group_t gm = 0;
|
||||
for (LiteralIndex i = 0; i < lits.size(); i++) {
|
||||
LitInfo & li = tmpLitInfo[i];
|
||||
u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
|
||||
DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
|
||||
res2lits[hash].push_back(i);
|
||||
gm |= li.groups;
|
||||
}
|
||||
|
||||
#ifdef FDR_CONFIRM_DUMP
|
||||
// print out the literals reversed - makes it easier to line up analyses
|
||||
// that are end-offset based
|
||||
for (map<u32, vector<LiteralIndex> >::iterator i = res2lits.begin(),
|
||||
e = res2lits.end(); i != e; ++i) {
|
||||
u32 hash = i->first;
|
||||
vector<LiteralIndex> & vlidx = i->second;
|
||||
if (vlidx.size() > 1) {
|
||||
printf("%x -> %zu literals\n", hash, vlidx.size());
|
||||
u32 min_len = lits[vlidx.front()].s.size();
|
||||
vector<set<u8> > vsl; // contains the set of chars at each location
|
||||
// reversed from the end
|
||||
vsl.resize(1024);
|
||||
u32 total_string_size = 0;
|
||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
total_string_size += lits[litIdx].s.size();
|
||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
||||
vsl[lits[litIdx].s.size()-j].insert(lits[litIdx].s.c_str()[j - 1]);
|
||||
}
|
||||
min_len = MIN(min_len, lits[litIdx].s.size());
|
||||
}
|
||||
printf("common ");
|
||||
for (u32 j = 0; j < min_len; j++) {
|
||||
if (vsl[j].size() == 1) {
|
||||
printf("%02x", (u32)*vsl[j].begin());
|
||||
} else {
|
||||
printf("__");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
for (vector<LiteralIndex>::iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
printf("%8x %c", lits[litIdx].id, lits[litIdx].nocase ? '!' : ' ');
|
||||
for (u32 j = lits[litIdx].s.size(); j != 0 ; j--) {
|
||||
u32 dist_from_end = lits[litIdx].s.size() - j;
|
||||
if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
|
||||
printf("__");
|
||||
} else {
|
||||
printf("%02x", (u32)lits[litIdx].s.c_str()[j-1]);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
u32 total_compares = 0;
|
||||
for (u32 j = 0; j < 1024; j++) { // naughty
|
||||
total_compares += vsl[j].size();
|
||||
}
|
||||
printf("Total compare load: %d Total string size: %d\n\n", total_compares, total_string_size);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
|
||||
const size_t totalLitSize = thresholdedSize(lits, sizeof(CONF_TYPE));
|
||||
|
||||
// this size can now be a worst-case as we can always be a bit smaller
|
||||
size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
|
||||
ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
|
||||
sizeof(LitInfo) * lits.size() + totalLitSize;
|
||||
size = ROUNDUP_N(size, alignof(FDRConfirm));
|
||||
|
||||
FDRConfirm *fdrc = (FDRConfirm *)aligned_zmalloc(size);
|
||||
assert(fdrc); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
fdrc->andmsk = andmsk;
|
||||
fdrc->mult = mult;
|
||||
fdrc->nBitsOrSoleID = (flags & FDRC_FLAG_NO_CONFIRM) ? lits[0].id : nBits;
|
||||
fdrc->flags = flags;
|
||||
fdrc->soleLitSize = soleLitSize;
|
||||
fdrc->soleLitCmp = soleLitCmp;
|
||||
fdrc->soleLitMsk = soleLitMsk;
|
||||
|
||||
fdrc->groups = gm;
|
||||
|
||||
// After the FDRConfirm, we have the lit index array.
|
||||
u8 *fdrc_base = (u8 *)fdrc;
|
||||
u8 *ptr = fdrc_base + sizeof(*fdrc);
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(u32));
|
||||
u32 *bitsToLitIndex = (u32 *)ptr;
|
||||
ptr += bitsToLitIndexSize;
|
||||
|
||||
// After the lit index array, we have the LitInfo structures themselves,
|
||||
// which vary in size (as each may have a variable-length string after it).
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
||||
|
||||
// Walk the map by hash value assigning indexes and laying out the
|
||||
// elements (and their associated string confirm material) in memory.
|
||||
for (std::map<u32, vector<LiteralIndex> >::const_iterator
|
||||
i = res2lits.begin(), e = res2lits.end(); i != e; ++i) {
|
||||
const u32 hash = i->first;
|
||||
const vector<LiteralIndex> &vlidx = i->second;
|
||||
bitsToLitIndex[hash] = verify_u32(ptr - (u8 *)fdrc);
|
||||
for (vector<LiteralIndex>::const_iterator i2 = vlidx.begin(),
|
||||
e2 = vlidx.end(); i2 != e2; ++i2) {
|
||||
LiteralIndex litIdx = *i2;
|
||||
|
||||
// Write LitInfo header.
|
||||
u8 *oldPtr = ptr;
|
||||
LitInfo &finalLI = *(LitInfo *)ptr;
|
||||
finalLI = tmpLitInfo[litIdx];
|
||||
|
||||
ptr += sizeof(LitInfo); // String starts directly after LitInfo.
|
||||
|
||||
// Write literal prefix (everything before the last N characters,
|
||||
// as the last N are already confirmed).
|
||||
const string &t = lits[litIdx].s;
|
||||
if (t.size() > sizeof(CONF_TYPE)) {
|
||||
size_t prefix_len = t.size() - sizeof(CONF_TYPE);
|
||||
memcpy(&finalLI.s[0], t.c_str(), prefix_len);
|
||||
ptr = &finalLI.s[0] + prefix_len;
|
||||
}
|
||||
|
||||
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
|
||||
if (i2 + 1 == e2) {
|
||||
finalLI.next = 0x0;
|
||||
} else {
|
||||
// our next field represents an adjustment on top of
|
||||
// current address + the actual size of the literal
|
||||
// so we track any rounding up done for alignment and
|
||||
// add this in - that way we don't have to use bigger
|
||||
// than a u8 (for now)
|
||||
assert((size_t)(ptr - oldPtr) > t.size());
|
||||
finalLI.next = verify_u8(ptr - oldPtr - t.size());
|
||||
}
|
||||
}
|
||||
assert((size_t)(ptr - fdrc_base) <= size);
|
||||
}
|
||||
|
||||
*fdrc_p = fdrc;
|
||||
|
||||
// Return actual used size, not worst-case size. Must be rounded up to
|
||||
// FDRConfirm alignment so that the caller can lay out a sequence of these.
|
||||
size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
|
||||
alignof(FDRConfirm));
|
||||
assert(actual_size <= size);
|
||||
return actual_size;
|
||||
}
|
||||
|
||||
static
|
||||
u32 setupMultiConfirms(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng, BC2CONF &bc2Conf,
|
||||
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small) {
|
||||
u32 pullBack = eng.getConfirmPullBackDistance();
|
||||
u32 splitMask = eng.getConfirmTopLevelSplit() - 1;
|
||||
bool splitHasCase = splitMask & 0x20;
|
||||
|
||||
bool makeConfirm = true;
|
||||
unique_ptr<TeddyEngineDescription> teddyDescr =
|
||||
getTeddyDescription(eng.getID());
|
||||
if (teddyDescr) {
|
||||
makeConfirm = teddyDescr->needConfirm(lits);
|
||||
}
|
||||
|
||||
u32 totalConfirmSize = 0;
|
||||
for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
|
||||
if (!bucketToLits[b].empty()) {
|
||||
vector<vector<hwlmLiteral> > vl(eng.getConfirmTopLevelSplit());
|
||||
for (vector<LiteralIndex>::const_iterator
|
||||
i = bucketToLits[b].begin(),
|
||||
e = bucketToLits[b].end();
|
||||
i != e; ++i) {
|
||||
hwlmLiteral lit = lits[*i]; // copy
|
||||
// c is last char of this literal
|
||||
u8 c = *(lit.s.rbegin());
|
||||
|
||||
bool suppressSplit = false;
|
||||
if (pullBack) {
|
||||
// make a shorter string to work over if we're pulling back
|
||||
// getFDRConfirm doesn't know about that stuff
|
||||
assert(lit.s.size() >= pullBack);
|
||||
lit.s.resize(lit.s.size() - pullBack);
|
||||
|
||||
u8 c_sub, c_sub_msk;
|
||||
if (lit.msk.empty()) {
|
||||
c_sub = 0;
|
||||
c_sub_msk = 0;
|
||||
} else {
|
||||
c_sub = *(lit.cmp.rbegin());
|
||||
c_sub_msk = *(lit.msk.rbegin());
|
||||
size_t len = lit.msk.size() -
|
||||
min(lit.msk.size(), (size_t)pullBack);
|
||||
lit.msk.resize(len);
|
||||
lit.cmp.resize(len);
|
||||
}
|
||||
|
||||
// if c_sub_msk is 0xff and lit.nocase
|
||||
// resteer 'c' to an exact value and set suppressSplit
|
||||
if ((c_sub_msk == 0xff) && (lit.nocase)) {
|
||||
suppressSplit = true;
|
||||
c = c_sub;
|
||||
}
|
||||
}
|
||||
|
||||
if (!suppressSplit && splitHasCase && lit.nocase &&
|
||||
ourisalpha(c)) {
|
||||
vl[(u8)(mytoupper(c) & splitMask)].push_back(lit);
|
||||
vl[(u8)(mytolower(c) & splitMask)].push_back(lit);
|
||||
} else {
|
||||
vl[c & splitMask].push_back(lit);
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 c = 0; c < eng.getConfirmTopLevelSplit(); c++) {
|
||||
if (!vl[c].empty()) {
|
||||
DEBUG_PRINTF("b %d c %02x sz %zu\n", b, c, vl[c].size());
|
||||
FDRConfirm *fdrc;
|
||||
size_t size = getFDRConfirm(vl[c], &fdrc,
|
||||
eng.typicallyHoldsOneCharLits(),
|
||||
make_small, makeConfirm);
|
||||
BucketSplitPair p = make_pair(b, c);
|
||||
bc2Conf[p] = make_pair(fdrc, size);
|
||||
totalConfirmSize += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalConfirmSize;
|
||||
}
|
||||
|
||||
pair<u8 *, size_t> setupFullMultiConfs(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng,
|
||||
map<BucketIndex, vector<LiteralIndex> > &bucketToLits,
|
||||
bool make_small) {
|
||||
BC2CONF bc2Conf;
|
||||
u32 totalConfirmSize = setupMultiConfirms(lits, eng, bc2Conf, bucketToLits,
|
||||
make_small);
|
||||
|
||||
u32 primarySwitch = eng.getConfirmTopLevelSplit();
|
||||
u32 nBuckets = eng.getNumBuckets();
|
||||
u32 totalConfSwitchSize = primarySwitch * nBuckets * sizeof(u32);
|
||||
u32 totalSize = ROUNDUP_16(totalConfSwitchSize + totalConfirmSize);
|
||||
|
||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u32 *confBase = (u32 *)buf;
|
||||
u8 *ptr = buf + totalConfSwitchSize;
|
||||
|
||||
for (BC2CONF::const_iterator i = bc2Conf.begin(), e = bc2Conf.end(); i != e;
|
||||
++i) {
|
||||
const pair<FDRConfirm *, size_t> &p = i->second;
|
||||
// confirm offset is relative to the base of this structure, now
|
||||
u32 confirm_offset = verify_u32(ptr - (u8 *)buf);
|
||||
memcpy(ptr, p.first, p.second);
|
||||
ptr += p.second;
|
||||
aligned_free(p.first);
|
||||
BucketIndex b = i->first.first;
|
||||
u8 c = i->first.second;
|
||||
u32 idx = c * nBuckets + b;
|
||||
confBase[idx] = confirm_offset;
|
||||
}
|
||||
return make_pair(buf, totalSize);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
244
src/fdr/fdr_confirm_runtime.h
Normal file
244
src/fdr/fdr_confirm_runtime.h
Normal file
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_CONFIRM_RUNTIME_H
|
||||
#define FDR_CONFIRM_RUNTIME_H
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_loadval.h"
|
||||
#include "hwlm/hwlm.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
|
||||
#define CONF_LOADVAL_CALL lv_u64a
|
||||
#define CONF_LOADVAL_CALL_CAUTIOUS lv_u64a_ce
|
||||
|
||||
// this is ordinary confirmation function which runs through
|
||||
// the whole confirmation procedure
|
||||
static really_inline
|
||||
void confWithBit(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
u32 pullBackAmount,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
|
||||
CONF_TYPE v;
|
||||
const u8 * confirm_loc = buf + i - pullBackAmount - 7;
|
||||
if (likely(r == NOT_CAUTIOUS || confirm_loc >= buf)) {
|
||||
v = CONF_LOADVAL_CALL(confirm_loc, buf, buf + len);
|
||||
} else { // r == VECTORING, confirm_loc < buf
|
||||
u64a histBytes = a->histBytes;
|
||||
v = CONF_LOADVAL_CALL_CAUTIOUS(confirm_loc, buf, buf + len);
|
||||
// stitch together v (which doesn't move) and history (which does)
|
||||
u32 overhang = buf - confirm_loc;
|
||||
histBytes >>= 64 - (overhang * 8);
|
||||
v |= histBytes;
|
||||
}
|
||||
|
||||
u32 c = CONF_HASH_CALL(v, fdrc->andmsk, fdrc->mult, fdrc->nBitsOrSoleID);
|
||||
u32 start = getConfirmLitIndex(fdrc)[c];
|
||||
if (P0(start)) {
|
||||
const struct LitInfo *l =
|
||||
(const struct LitInfo *)((const u8 *)fdrc + start);
|
||||
|
||||
u8 oldNext; // initialized in loop
|
||||
do {
|
||||
assert(ISALIGNED(l));
|
||||
|
||||
if (P0( (v & l->msk) != l->v)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((*last_match == l->id) && (l->flags & NoRepeat)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
const u8 * loc = buf + i - l->size + 1 - pullBackAmount;
|
||||
|
||||
u8 caseless = l->flags & Caseless;
|
||||
if (loc < buf) {
|
||||
u32 full_overhang = buf - loc;
|
||||
|
||||
const u8 * history = (caseless) ?
|
||||
a->buf_history_nocase : a->buf_history;
|
||||
size_t len_history = (caseless) ?
|
||||
a->len_history_nocase : a->len_history;
|
||||
|
||||
// can't do a vectored confirm either if we don't have
|
||||
// the bytes
|
||||
if (full_overhang > len_history) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
// as for the regular case, no need to do a full confirm if
|
||||
// we're a short literal
|
||||
if (unlikely(l->size > sizeof(CONF_TYPE))) {
|
||||
const u8 * s1 = l->s;
|
||||
const u8 * s2 = s1 + full_overhang;
|
||||
const u8 * loc1 = history + len_history - full_overhang;
|
||||
const u8 * loc2 = buf;
|
||||
size_t size1 = MIN(full_overhang,
|
||||
l->size - sizeof(CONF_TYPE));
|
||||
size_t wind_size2_back = sizeof(CONF_TYPE) +
|
||||
full_overhang;
|
||||
size_t size2 = wind_size2_back > l->size ?
|
||||
0 : l->size - wind_size2_back;
|
||||
|
||||
if (cmpForward(loc1, s1, size1, caseless)) {
|
||||
goto out;
|
||||
}
|
||||
if (cmpForward(loc2, s2, size2, caseless)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} else { // NON-VECTORING PATH
|
||||
|
||||
// if string < conf_type we don't need regular string cmp
|
||||
if (unlikely(l->size > sizeof(CONF_TYPE))) {
|
||||
if (cmpForward(loc, l->s, l->size - sizeof(CONF_TYPE), caseless)) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (P0(!(l->groups & *control))) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely(l->flags & ComplexConfirm)) {
|
||||
const u8 * loc2 = buf + i - l->extended_size + 1 - pullBackAmount;
|
||||
if (loc2 < buf) {
|
||||
u32 full_overhang = buf - loc2;
|
||||
size_t len_history = (caseless) ?
|
||||
a->len_history_nocase : a->len_history;
|
||||
if (full_overhang > len_history) {
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*last_match = l->id;
|
||||
*control = a->cb(loc - buf, i, l->id, a->ctxt);
|
||||
out:
|
||||
oldNext = l->next; // oldNext is either 0 or an 'adjust' value
|
||||
l = (const struct LitInfo*)((const u8 *)l + oldNext + l->size);
|
||||
} while (oldNext);
|
||||
}
|
||||
}
|
||||
|
||||
// 'light-weight' confirmation function which is used by 1-mask Teddy;
|
||||
// in the 'confirmless' case it simply calls callback function,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBit1(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, r, 0, control, last_match);
|
||||
return;
|
||||
} else {
|
||||
u32 id = fdrc->nBitsOrSoleID;
|
||||
|
||||
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
|
||||
return;
|
||||
}
|
||||
*last_match = id;
|
||||
*control = a->cb(i, i, id, a->ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
// This is 'light-weight' confirmation function which is used by 2-3-4-mask Teddy
|
||||
// In the 'confirmless' case it makes fast 32-bit comparison,
|
||||
// otherwise it calls 'confWithBit' function for the full confirmation procedure
|
||||
static really_inline
|
||||
void confWithBitMany(const struct FDRConfirm * fdrc,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
size_t i,
|
||||
CautionReason r,
|
||||
hwlmcb_rv_t *control,
|
||||
u32 * last_match) {
|
||||
assert(i < a->len);
|
||||
assert(ISALIGNED(fdrc));
|
||||
|
||||
if (i < a->start_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(fdrc->mult)) {
|
||||
confWithBit(fdrc, a, i, r, 0, control, last_match);
|
||||
return;
|
||||
} else {
|
||||
const u32 id = fdrc->nBitsOrSoleID;
|
||||
const u32 len = fdrc->soleLitSize;
|
||||
|
||||
if ((*last_match == id) && (fdrc->flags & NoRepeat)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (r == VECTORING && len > i - a->start_offset) {
|
||||
if (len > (i + a->len_history)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 cmp = (u32)a->buf[i] << 24;
|
||||
|
||||
if (len <= i) {
|
||||
for (u32 j = 1; j <= len; j++) {
|
||||
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
|
||||
}
|
||||
} else {
|
||||
for (u32 j = 1; j <= i; j++) {
|
||||
cmp |= (u32)a->buf[i - j] << (24 - (j * 8));
|
||||
}
|
||||
cmp |= (u32)(a->histBytes >> (40 + i * 8));
|
||||
}
|
||||
|
||||
if ((fdrc->soleLitMsk & cmp) != fdrc->soleLitCmp) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
*last_match = id;
|
||||
*control = a->cb(i - len, i, id, a->ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
98
src/fdr/fdr_dump.cpp
Normal file
98
src/fdr/fdr_dump.cpp
Normal file
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_dump.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <memory>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using std::unique_ptr;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
bool fdrIsTeddy(const FDR *fdr) {
|
||||
assert(fdr);
|
||||
u32 engine = fdr->engineID;
|
||||
|
||||
/* teddys don't have an fdr engine description (which is why the dump code
|
||||
* is so broken). */
|
||||
|
||||
return !getFdrDescription(engine);
|
||||
}
|
||||
|
||||
void fdrPrintStats(const FDR *fdr, FILE *f) {
|
||||
const bool isTeddy = fdrIsTeddy(fdr);
|
||||
|
||||
if (isTeddy) {
|
||||
fprintf(f, "TEDDY: %u\n", fdr->engineID);
|
||||
} else {
|
||||
fprintf(f, "FDR: %u\n", fdr->engineID);
|
||||
}
|
||||
|
||||
if (isTeddy) {
|
||||
unique_ptr<TeddyEngineDescription> des =
|
||||
getTeddyDescription(fdr->engineID);
|
||||
if (des) {
|
||||
fprintf(f, " masks %u\n", des->numMasks);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
fprintf(f, " packed %s\n", des->packed ? "true" : "false");
|
||||
} else {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
}
|
||||
} else {
|
||||
unique_ptr<FDREngineDescription> des =
|
||||
getFdrDescription(fdr->engineID);
|
||||
if (des) {
|
||||
fprintf(f, " stride %u\n", des->stride);
|
||||
fprintf(f, " buckets %u\n", des->getNumBuckets());
|
||||
fprintf(f, " width %u\n", des->schemeWidth);
|
||||
} else {
|
||||
fprintf(f, " <unknown engine>\n");
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, " strings ???\n");
|
||||
fprintf(f, " size %zu bytes\n", fdrSize(fdr));
|
||||
fprintf(f, " max length %u\n", fdr->maxStringLen);
|
||||
fprintf(f, " floodoff %u (%x)\n", fdr->floodOffset, fdr->floodOffset);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
49
src/fdr/fdr_dump.h
Normal file
49
src/fdr/fdr_dump.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: dump API.
|
||||
*/
|
||||
|
||||
#ifndef FDR_DUMP_H
|
||||
#define FDR_DUMP_H
|
||||
|
||||
#if defined(DUMP_SUPPORT)
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
struct FDR;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void fdrPrintStats(const struct FDR *fdr, FILE *f);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
#endif // FDR_DUMP_H
|
||||
216
src/fdr/fdr_engine_description.cpp
Normal file
216
src/fdr/fdr_engine_description.cpp
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "hs_compile.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/compare.h" // for ourisalpha()
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#include "fdr_autogen_compiler.cpp"
|
||||
|
||||
FDREngineDescription::FDREngineDescription(const FDREngineDef &def)
|
||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||
def.numBuckets, def.confirmPullBackDistance,
|
||||
def.confirmTopLevelSplit),
|
||||
schemeWidth(def.schemeWidth), stride(def.stride), bits(def.bits) {}
|
||||
|
||||
u32 FDREngineDescription::getDefaultFloodSuffixLength() const {
|
||||
// rounding up, so that scheme width 32 and 6 buckets is 6 not 5!
|
||||
// the +1 avoids pain due to various reach choices
|
||||
return ((getSchemeWidth() + getNumBuckets() - 1) / getNumBuckets()) + 1;
|
||||
}
|
||||
|
||||
static
|
||||
u32 findDesiredStride(size_t num_lits, size_t min_len, size_t min_len_count) {
|
||||
u32 desiredStride = 1; // always our safe fallback
|
||||
if (min_len > 1) {
|
||||
if (num_lits < 250) {
|
||||
// small cases we just go for it
|
||||
desiredStride = min_len;
|
||||
} else if (num_lits < 800) {
|
||||
// intermediate cases
|
||||
desiredStride = min_len - 1;
|
||||
} else if (num_lits < 5000) {
|
||||
// for larger but not huge sizes, go to stride 2 only if we have at
|
||||
// least minlen 3
|
||||
desiredStride = MIN(min_len - 1, 2);
|
||||
}
|
||||
}
|
||||
|
||||
// patch if count is quite large - a ton of length 2 literals can
|
||||
// break things
|
||||
#ifdef TRY_THIS_LATER
|
||||
if ((min_len == 2) && (desiredStride == 2) && (min_len_count > 20)) {
|
||||
desiredStride = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// patch stuff just for the stride 4 case; don't let min_len=4,
|
||||
// desiredStride=4 through as even a few length 4 literals can break things
|
||||
// (far more fragile)
|
||||
if ((min_len == 4) && (desiredStride == 4) && (min_len_count > 2)) {
|
||||
desiredStride = 2;
|
||||
}
|
||||
|
||||
return desiredStride;
|
||||
}
|
||||
|
||||
unique_ptr<FDREngineDescription> chooseEngine(const target_t &target,
|
||||
const vector<hwlmLiteral> &vl,
|
||||
bool make_small) {
|
||||
vector<FDREngineDescription> allDescs;
|
||||
getFdrDescriptions(&allDescs);
|
||||
|
||||
// find desired stride
|
||||
size_t count;
|
||||
size_t msl = minLenCount(vl, &count);
|
||||
u32 desiredStride = findDesiredStride(vl.size(), msl, count);
|
||||
|
||||
DEBUG_PRINTF("%zu lits, msl=%zu, desiredStride=%u\n", vl.size(), msl,
|
||||
desiredStride);
|
||||
|
||||
const FDREngineDescription *best = nullptr;
|
||||
u32 best_score = 0;
|
||||
|
||||
for (size_t engineID = 0; engineID < allDescs.size(); engineID++) {
|
||||
const FDREngineDescription &eng = allDescs[engineID];
|
||||
if (!eng.isValidOnTarget(target)) {
|
||||
continue;
|
||||
}
|
||||
if (msl < eng.stride) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 score = 100;
|
||||
|
||||
score -= absdiff(desiredStride, eng.stride);
|
||||
|
||||
if (eng.stride <= desiredStride) {
|
||||
score += eng.stride;
|
||||
}
|
||||
|
||||
u32 effLits = vl.size(); /* * desiredStride;*/
|
||||
u32 ideal;
|
||||
if (effLits < eng.getNumBuckets()) {
|
||||
if (eng.stride == 1) {
|
||||
ideal = 8;
|
||||
} else {
|
||||
ideal = 10;
|
||||
}
|
||||
} else if (effLits < 20) {
|
||||
ideal = 10;
|
||||
} else if (effLits < 100) {
|
||||
ideal = 11;
|
||||
} else if (effLits < 1000) {
|
||||
ideal = 12;
|
||||
} else if (effLits < 10000) {
|
||||
ideal = 13;
|
||||
} else {
|
||||
ideal = 15;
|
||||
}
|
||||
|
||||
if (ideal != 8 && eng.schemeWidth == 32) {
|
||||
ideal += 1;
|
||||
}
|
||||
|
||||
if (make_small) {
|
||||
ideal -= 2;
|
||||
}
|
||||
|
||||
if (eng.stride > 1) {
|
||||
ideal++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("effLits %u\n", effLits);
|
||||
|
||||
if (target.is_atom_class() && !make_small && effLits < 4000) {
|
||||
/* Unless it is a very heavy case, we want to build smaller tables
|
||||
* on lightweight machines due to their small caches. */
|
||||
ideal -= 2;
|
||||
}
|
||||
|
||||
score -= absdiff(ideal, eng.bits);
|
||||
|
||||
DEBUG_PRINTF("fdr %u: width=%u, bits=%u, buckets=%u, stride=%u "
|
||||
"-> score=%u\n",
|
||||
eng.getID(), eng.schemeWidth, eng.bits,
|
||||
eng.getNumBuckets(), eng.stride, score);
|
||||
|
||||
if (!best || score > best_score) {
|
||||
best = ŋ
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
|
||||
if (!best) {
|
||||
DEBUG_PRINTF("failed to find engine\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("using engine %u\n", best->getID());
|
||||
return ue2::make_unique<FDREngineDescription>(*best);
|
||||
}
|
||||
|
||||
SchemeBitIndex FDREngineDescription::getSchemeBit(BucketIndex b,
|
||||
PositionInBucket p) const {
|
||||
assert(p < getBucketWidth(b));
|
||||
SchemeBitIndex sbi = p * getNumBuckets() + b;
|
||||
assert(sbi < getSchemeWidth());
|
||||
return sbi;
|
||||
}
|
||||
|
||||
u32 FDREngineDescription::getBucketWidth(BucketIndex) const {
|
||||
u32 sw = getSchemeWidth();
|
||||
u32 nm = getNumBuckets();
|
||||
assert(sw % nm == 0);
|
||||
return sw/nm;
|
||||
}
|
||||
|
||||
unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID) {
|
||||
vector<FDREngineDescription> allDescs;
|
||||
getFdrDescriptions(&allDescs);
|
||||
|
||||
if (engineID >= allDescs.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ue2::make_unique<FDREngineDescription>(allDescs[engineID]);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
80
src/fdr/fdr_engine_description.h
Normal file
80
src/fdr/fdr_engine_description.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_ENGINE_DESCRIPTION_H
|
||||
#define FDR_ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct FDREngineDef {
|
||||
u32 id;
|
||||
u32 schemeWidth;
|
||||
u32 numBuckets;
|
||||
u32 stride;
|
||||
u32 bits;
|
||||
u64a cpu_features;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
};
|
||||
|
||||
class FDREngineDescription : public EngineDescription {
|
||||
public:
|
||||
u32 schemeWidth;
|
||||
u32 stride;
|
||||
u32 bits;
|
||||
|
||||
u32 getSchemeWidth() const { return schemeWidth; }
|
||||
u32 getBucketWidth(BucketIndex b) const;
|
||||
SchemeBitIndex getSchemeBit(BucketIndex b, PositionInBucket p) const;
|
||||
u32 getNumTableEntries() const { return 1 << bits; }
|
||||
u32 getTabSizeBytes() const {
|
||||
return schemeWidth / 8 * getNumTableEntries();
|
||||
}
|
||||
|
||||
explicit FDREngineDescription(const FDREngineDef &def);
|
||||
|
||||
u32 getDefaultFloodSuffixLength() const override;
|
||||
bool typicallyHoldsOneCharLits() const override { return stride == 1; }
|
||||
};
|
||||
|
||||
std::unique_ptr<FDREngineDescription>
|
||||
chooseEngine(const target_t &target, const std::vector<hwlmLiteral> &vl,
|
||||
bool make_small);
|
||||
std::unique_ptr<FDREngineDescription> getFdrDescription(u32 engineID);
|
||||
void getFdrDescriptions(std::vector<FDREngineDescription> *out);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
111
src/fdr/fdr_internal.h
Normal file
111
src/fdr/fdr_internal.h
Normal file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: data structures.
|
||||
*/
|
||||
|
||||
#ifndef FDR_INTERNAL_H
|
||||
#define FDR_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hwlm/hwlm.h" // for hwlm_group_t, HWLMCallback
|
||||
|
||||
typedef enum {
|
||||
NOT_CAUTIOUS, //!< not near a boundary (quantify?)
|
||||
VECTORING //!< potentially vectoring
|
||||
} CautionReason;
|
||||
|
||||
/** \brief number of different ids that can be triggered by floods of any given
|
||||
* character. */
|
||||
#define FDR_FLOOD_MAX_IDS 16
|
||||
|
||||
struct FDRFlood {
|
||||
hwlm_group_t allGroups; //!< all the groups or'd together
|
||||
u32 suffix;
|
||||
|
||||
/** \brief 0 to FDR_FLOOD_MAX_IDS-1 ids that are generated once per char on
|
||||
* a flood.
|
||||
* If larger we won't handle this through the flood path at all. */
|
||||
u16 idCount;
|
||||
|
||||
u32 ids[FDR_FLOOD_MAX_IDS]; //!< the ids
|
||||
hwlm_group_t groups[FDR_FLOOD_MAX_IDS]; //!< group ids to go with string ids
|
||||
u32 len[FDR_FLOOD_MAX_IDS]; //!< lengths to go with the string ids
|
||||
};
|
||||
|
||||
/** \brief FDR structure.
|
||||
*
|
||||
* 1. struct as-is
|
||||
* 2. primary matching table
|
||||
* 3. confirm stuff
|
||||
*/
|
||||
struct FDR {
|
||||
u32 engineID;
|
||||
u32 size;
|
||||
u32 maxStringLen;
|
||||
u32 floodOffset;
|
||||
|
||||
/** link is the relative offset of a secondary included FDR table for
|
||||
* stream handling if we're a primary FDR table or the subsidiary tertiary
|
||||
* structures (spillover strings and hash table) if we're a secondary
|
||||
* structure. */
|
||||
u32 link;
|
||||
u32 pad1;
|
||||
u32 pad2;
|
||||
u32 pad3;
|
||||
|
||||
union {
|
||||
u32 s_u32;
|
||||
u64a s_u64a;
|
||||
m128 s_m128;
|
||||
} start;
|
||||
};
|
||||
|
||||
/** \brief FDR runtime arguments.
|
||||
*
|
||||
* This structure handles read-only things that are passed extensively around
|
||||
* the FDR run-time functions. They are set by the API, passed by value into
|
||||
* the main function, then a pointer is passed around to all the various
|
||||
* sub-functions (confirm & flood). */
|
||||
struct FDR_Runtime_Args {
|
||||
const u8 *buf;
|
||||
size_t len;
|
||||
const u8 *buf_history;
|
||||
size_t len_history;
|
||||
const u8 *buf_history_nocase;
|
||||
size_t len_history_nocase;
|
||||
size_t start_offset;
|
||||
HWLMCallback cb;
|
||||
void *ctxt;
|
||||
hwlm_group_t *groups;
|
||||
const u8 *firstFloodDetect;
|
||||
const u64a histBytes;
|
||||
};
|
||||
|
||||
#endif
|
||||
216
src/fdr/fdr_loadval.h
Normal file
216
src/fdr/fdr_loadval.h
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_LOADVAL_H
|
||||
#define FDR_LOADVAL_H
|
||||
|
||||
#include "fdr_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/unaligned.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#define MAKE_LOADVAL(type, name) \
|
||||
static really_inline type name (const u8 * ptr, UNUSED const u8 * lo, UNUSED const u8 * hi)
|
||||
|
||||
#define NORMAL_SAFE(type) assert(ptr >= lo && (ptr + sizeof(type) - 1) < hi)
|
||||
#define ALIGNED_SAFE(type) NORMAL_SAFE(type); assert(((size_t)ptr % sizeof(type)) == 0);
|
||||
// these ones need asserts to test the property that we're not handling dynamically
|
||||
#define CAUTIOUS_FORWARD_SAFE(type) assert(ptr >= lo)
|
||||
#define CAUTIOUS_BACKWARD_SAFE(type) assert((ptr + sizeof(type) - 1) < hi)
|
||||
|
||||
#define CF_INDEX_CHECK (ptr + i < hi)
|
||||
#define CB_INDEX_CHECK (lo <= ptr + i)
|
||||
#define CE_INDEX_CHECK (lo <= ptr + i) && (ptr + i < hi)
|
||||
|
||||
#define MAKE_LOOP(TYPE, COND, SHIFT_FIDDLE) \
|
||||
TYPE v = 0; \
|
||||
for (TYPE i = 0; i < sizeof(TYPE); i++) { \
|
||||
if (COND) { \
|
||||
v += (TYPE)ptr[i] << ((SHIFT_FIDDLE)*8); \
|
||||
} \
|
||||
} \
|
||||
return v;
|
||||
|
||||
#define MAKE_LOOP_BE(TYPE, COND) \
|
||||
MAKE_LOOP(TYPE, COND, sizeof(TYPE)-i-1)
|
||||
|
||||
#define MAKE_LOOP_LE(TYPE, COND) \
|
||||
MAKE_LOOP(TYPE, COND, i)
|
||||
|
||||
|
||||
#define MAKE_LOOP_BE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CF_INDEX_CHECK)
|
||||
#define MAKE_LOOP_BE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_BE(TYPE, CB_INDEX_CHECK)
|
||||
#define MAKE_LOOP_BE_CE(TYPE) MAKE_LOOP_BE(TYPE, CE_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CF(TYPE) CAUTIOUS_FORWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CF_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CB(TYPE) CAUTIOUS_BACKWARD_SAFE(TYPE); MAKE_LOOP_LE(TYPE, CB_INDEX_CHECK)
|
||||
#define MAKE_LOOP_LE_CE(TYPE) MAKE_LOOP_LE(TYPE, CE_INDEX_CHECK)
|
||||
|
||||
// no suffix = normal (unaligned)
|
||||
// _a = aligned
|
||||
// _cf = cautious forwards, base is always in bounds, but may read over the end of the buffer (test against hi)
|
||||
// _cb = cautious backwards, final byte is always in bounds, but may read over the start of the buffer (test against lo)
|
||||
// _ce = cautious everywhere (in both directions); test against hi and lo
|
||||
|
||||
// u8 loadvals
|
||||
MAKE_LOADVAL(u8, lv_u8) {
|
||||
NORMAL_SAFE(u8);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_cf) {
|
||||
CAUTIOUS_FORWARD_SAFE(u8);
|
||||
if (ptr < hi) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_cb) {
|
||||
CAUTIOUS_BACKWARD_SAFE(u8);
|
||||
if (lo <= ptr) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u8, lv_u8_ce) {
|
||||
if ((lo <= ptr) && (ptr < hi)) {
|
||||
return *ptr;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16) {
|
||||
NORMAL_SAFE(u16);
|
||||
return unaligned_load_u16(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16_a) {
|
||||
ALIGNED_SAFE(u16);
|
||||
return *(const u16 *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32) {
|
||||
NORMAL_SAFE(u32);
|
||||
return unaligned_load_u32(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32_a) {
|
||||
ALIGNED_SAFE(u32);
|
||||
return *(const u32 *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a) {
|
||||
NORMAL_SAFE(u32);
|
||||
return unaligned_load_u64a(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a_a) {
|
||||
ALIGNED_SAFE(u64a);
|
||||
return *(const u64a *)ptr;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(u16, lv_u16_cf) { MAKE_LOOP_LE_CF(u16); }
|
||||
MAKE_LOADVAL(u16, lv_u16_cb) { MAKE_LOOP_LE_CB(u16); }
|
||||
MAKE_LOADVAL(u16, lv_u16_ce) { MAKE_LOOP_LE_CE(u16); }
|
||||
|
||||
MAKE_LOADVAL(u32, lv_u32_cf) { MAKE_LOOP_LE_CF(u32); }
|
||||
MAKE_LOADVAL(u32, lv_u32_cb) { MAKE_LOOP_LE_CB(u32); }
|
||||
MAKE_LOADVAL(u32, lv_u32_ce) { MAKE_LOOP_LE_CE(u32); }
|
||||
|
||||
MAKE_LOADVAL(u64a, lv_u64a_cf) { MAKE_LOOP_LE_CF(u64a); }
|
||||
MAKE_LOADVAL(u64a, lv_u64a_cb) { MAKE_LOOP_LE_CB(u64a); }
|
||||
MAKE_LOADVAL(u64a, lv_u64a_ce) { MAKE_LOOP_LE_CE(u64a); }
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128) {
|
||||
NORMAL_SAFE(m128);
|
||||
return loadu128(ptr);
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_a) {
|
||||
ALIGNED_SAFE(m128);
|
||||
assert((size_t)ptr % sizeof(m128) == 0);
|
||||
return *(const m128 *)ptr;
|
||||
}
|
||||
|
||||
// m128 cases need to be manually created
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_cf) {
|
||||
CAUTIOUS_FORWARD_SAFE(m128);
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if (ptr + i < hi) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_cb) {
|
||||
CAUTIOUS_BACKWARD_SAFE(m128);
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if (lo <= ptr + i) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
MAKE_LOADVAL(m128, lv_m128_ce) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
|
||||
for (u32 i = 0; i < 16; i++) {
|
||||
if ((lo <= ptr + i) && (ptr + i < hi)) {
|
||||
u.val8[i] = ptr[i];
|
||||
} else {
|
||||
u.val8[i] = 0;
|
||||
}
|
||||
}
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
#endif
|
||||
445
src/fdr/fdr_streaming_compile.cpp
Normal file
445
src/fdr/fdr_streaming_compile.cpp
Normal file
@@ -0,0 +1,445 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_streaming_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "hwlm/hwlm_build.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::dynamic_bitset;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
struct LongLitOrder {
|
||||
bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const {
|
||||
if (i1.nocase != i2.nocase) {
|
||||
return i1.nocase < i2.nocase;
|
||||
} else {
|
||||
return i1.s < i2.s;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) {
|
||||
return l1.s == l2.s && l1.nocase == l2.nocase;
|
||||
}
|
||||
|
||||
static
|
||||
u32 roundUpToPowerOfTwo(u32 x) {
|
||||
x -= 1;
|
||||
x |= (x >> 1);
|
||||
x |= (x >> 2);
|
||||
x |= (x >> 4);
|
||||
x |= (x >> 8);
|
||||
x |= (x >> 16);
|
||||
return x + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Creates a long literals vector containing all literals of length > max_len.
|
||||
*
|
||||
* The last char of each literal is trimmed as we're not interested in full
|
||||
* matches, only partial matches.
|
||||
*
|
||||
* Literals are sorted (by caseful/caseless, then lexicographical order) and
|
||||
* made unique.
|
||||
*
|
||||
* The ID of each literal is set to its position in the vector.
|
||||
*
|
||||
* \return False if there aren't any long literals.
|
||||
*/
|
||||
static
|
||||
bool setupLongLits(const vector<hwlmLiteral> &lits,
|
||||
vector<hwlmLiteral> &long_lits, size_t max_len) {
|
||||
long_lits.reserve(lits.size());
|
||||
for (vector<hwlmLiteral>::const_iterator it = lits.begin();
|
||||
it != lits.end(); ++it) {
|
||||
if (it->s.length() > max_len) {
|
||||
hwlmLiteral tmp = *it; // copy
|
||||
tmp.s.erase(tmp.s.size() - 1, 1); // erase last char
|
||||
tmp.id = 0; // recalc later
|
||||
tmp.groups = 0; // filled in later by hash bucket(s)
|
||||
long_lits.push_back(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
if (long_lits.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// sort long_literals by caseful/caseless and in lexicographical order,
|
||||
// remove duplicates
|
||||
stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
|
||||
vector<hwlmLiteral>::iterator new_end =
|
||||
unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
|
||||
long_lits.erase(new_end, long_lits.end());
|
||||
|
||||
// fill in ids; not currently used
|
||||
for (vector<hwlmLiteral>::iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
i->id = i - long_lits.begin();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// boundaries are the 'start' boundaries for each 'mode'
|
||||
// so boundary[CASEFUL] is the index one above the largest caseful index
|
||||
// positions[CASEFUL] is the # of positions in caseful strings (stream)
|
||||
// hashedPositions[CASEFUL] is the # of positions in caseful strings
|
||||
// (not returned - a temporary)
|
||||
// hashEntries[CASEFUL] is the # of positions hashed for caseful strings
|
||||
// (rounded up to the nearest power of two)
|
||||
static
|
||||
void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
u32 *boundaries, u32 *positions, u32 *hashEntries) {
|
||||
u32 hashedPositions[MAX_MODES];
|
||||
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
boundaries[m] = verify_u32(long_lits.size());
|
||||
positions[m] = 0;
|
||||
hashedPositions[m] = 0;
|
||||
}
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
if (i->nocase) {
|
||||
boundaries[CASEFUL] = verify_u32(i - long_lits.begin());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
MODES m = i->nocase ? CASELESS : CASEFUL;
|
||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
||||
hashedPositions[m]++;
|
||||
}
|
||||
positions[m] += i->s.size();
|
||||
}
|
||||
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; m++) {
|
||||
hashEntries[m] = hashedPositions[m]
|
||||
? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m]))
|
||||
: 0;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_COMPILE
|
||||
printf("analyzeLits:\n");
|
||||
for (MODES m = CASEFUL; m < MAX_MODES; m++) {
|
||||
printf("mode %s boundary %d positions %d hashedPositions %d "
|
||||
"hashEntries %d\n",
|
||||
(m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
|
||||
positions[m], hashedPositions[m], hashEntries[m]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static
|
||||
u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, MODES m) {
|
||||
return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
|
||||
}
|
||||
|
||||
// sort by 'distance from start'
|
||||
namespace {
|
||||
struct OffsetIDFromEndOrder {
|
||||
const vector<hwlmLiteral> &lits; // not currently used
|
||||
explicit OffsetIDFromEndOrder(const vector<hwlmLiteral> &lits_in)
|
||||
: lits(lits_in) {}
|
||||
bool operator()(const pair<u32, u32> &i1, const pair<u32, u32> &i2) const {
|
||||
if (i1.second != i2.second) {
|
||||
// longest is 'first', so > not <
|
||||
return i1.second > i2.second;
|
||||
}
|
||||
return i1.first < i2.first;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
|
||||
FDRSHashEntry *tab, size_t numEntries, MODES m,
|
||||
map<u32, u32> &litToOffsetVal) {
|
||||
const u32 nbits = lg2(numEntries);
|
||||
map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
|
||||
map<u32, u64a> bucketToBitfield;
|
||||
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
const hwlmLiteral &l = *i;
|
||||
if ((m == CASELESS) != i->nocase) {
|
||||
continue;
|
||||
}
|
||||
for (u32 j = 1; j < i->s.size() - max_len + 1; j++) {
|
||||
u32 h = hashLit(l, j, max_len, m);
|
||||
u32 h_ent = h & ((1U << nbits) - 1);
|
||||
u32 h_low = (h >> nbits) & 63;
|
||||
bucketToLitOffPairs[h_ent].push_back(make_pair(i->id, j));
|
||||
bucketToBitfield[h_ent] |= (1ULL << h_low);
|
||||
}
|
||||
}
|
||||
|
||||
// this used to be a set<u32>, but a bitset is much much faster given that
|
||||
// we're using it only for membership testing.
|
||||
dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default.
|
||||
|
||||
// sweep out bitfield entries and save the results swapped accordingly
|
||||
// also, anything with bitfield entries is put in filledBuckets
|
||||
for (map<u32, u64a>::const_iterator i = bucketToBitfield.begin(),
|
||||
e = bucketToBitfield.end();
|
||||
i != e; ++i) {
|
||||
u32 bucket = i->first;
|
||||
u64a contents = i->second;
|
||||
tab[bucket].bitfield = contents;
|
||||
filledBuckets.set(bucket);
|
||||
}
|
||||
|
||||
// store out all our chains based on free values in our hash table.
|
||||
// find nearest free locations that are empty (there will always be more
|
||||
// entries than strings, at present)
|
||||
for (map<u32, deque<pair<u32, u32> > >::iterator
|
||||
i = bucketToLitOffPairs.begin(),
|
||||
e = bucketToLitOffPairs.end();
|
||||
i != e; ++i) {
|
||||
u32 bucket = i->first;
|
||||
deque<pair<u32, u32> > &d = i->second;
|
||||
|
||||
// sort d by distance of the residual string (len minus our depth into
|
||||
// the string). We need to put the 'furthest back' string first...
|
||||
stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits));
|
||||
|
||||
while (1) {
|
||||
// first time through is always at bucket, then we fill in links
|
||||
filledBuckets.set(bucket);
|
||||
FDRSHashEntry *ent = &tab[bucket];
|
||||
u32 lit_id = d.front().first;
|
||||
u32 offset = d.front().second;
|
||||
|
||||
ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len);
|
||||
ent->link = (u32)LINK_INVALID;
|
||||
|
||||
d.pop_front();
|
||||
if (d.empty()) {
|
||||
break;
|
||||
}
|
||||
// now, if there is another value
|
||||
// find a bucket for it and put in 'bucket' and repeat
|
||||
// all we really need to do is find something not in filledBuckets,
|
||||
// ideally something close to bucket
|
||||
// we search backward and forward from bucket, trying to stay as
|
||||
// close as possible.
|
||||
UNUSED bool found = false;
|
||||
int bucket_candidate = 0;
|
||||
for (u32 k = 1; k < numEntries * 2; k++) {
|
||||
bucket_candidate = bucket + (((k & 1) == 0)
|
||||
? (-(int)k / 2) : (k / 2));
|
||||
if (bucket_candidate < 0 ||
|
||||
(size_t)bucket_candidate >= numEntries) {
|
||||
continue;
|
||||
}
|
||||
if (!filledBuckets.test(bucket_candidate)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(found);
|
||||
bucket = bucket_candidate;
|
||||
ent->link = bucket;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
|
||||
size_t rv = 0;
|
||||
vector<hwlmLiteral>::const_iterator it, ite;
|
||||
for (it = lits.begin(), ite = lits.end(); it != ite; ++it) {
|
||||
rv = max(rv, it->msk.size());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
pair<u8 *, size_t>
|
||||
fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control) {
|
||||
// refuse to compile if we are forced to have smaller than minimum
|
||||
// history required for long-literal support, full stop
|
||||
// otherwise, choose the maximum of the preferred history quantity
|
||||
// (currently a fairly extravagant 32) or the already used history
|
||||
// quantity - subject to the limitation of stream_control->history_max
|
||||
|
||||
const size_t MIN_HISTORY_REQUIRED = 32;
|
||||
|
||||
if (MIN_HISTORY_REQUIRED > stream_control->history_max) {
|
||||
throw std::logic_error("Cannot set history to minimum history required");
|
||||
}
|
||||
|
||||
size_t max_len =
|
||||
MIN(stream_control->history_max,
|
||||
MAX(MIN_HISTORY_REQUIRED, stream_control->history_min));
|
||||
assert(max_len >= MIN_HISTORY_REQUIRED);
|
||||
size_t max_mask_len = maxMaskLen(lits);
|
||||
|
||||
vector<hwlmLiteral> long_lits;
|
||||
if (!setupLongLits(lits, long_lits, max_len) || false) {
|
||||
// "Don't need to do anything" path, not really a fail
|
||||
DEBUG_PRINTF("Streaming literal path produces no table\n");
|
||||
|
||||
// we want enough history to manage the longest literal and the longest
|
||||
// mask.
|
||||
stream_control->literal_history_required =
|
||||
max(maxLen(lits), max_mask_len) - 1;
|
||||
stream_control->literal_stream_state_required = 0;
|
||||
return make_pair(nullptr, size_t{0});
|
||||
}
|
||||
|
||||
// Ensure that we have enough room for the longest mask.
|
||||
if (max_mask_len) {
|
||||
max_len = max(max_len, max_mask_len - 1);
|
||||
}
|
||||
|
||||
u32 boundary[MAX_MODES];
|
||||
u32 positions[MAX_MODES];
|
||||
u32 hashEntries[MAX_MODES];
|
||||
|
||||
analyzeLits(long_lits, max_len, boundary, positions, hashEntries);
|
||||
|
||||
// first assess the size and find our caseless threshold
|
||||
size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader));
|
||||
|
||||
size_t litTabOffset = headerSize;
|
||||
|
||||
size_t litTabNumEntries = long_lits.size() + 1;
|
||||
size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral));
|
||||
|
||||
size_t wholeLitTabOffset = litTabOffset + litTabSize;
|
||||
size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] +
|
||||
positions[CASELESS]);
|
||||
|
||||
size_t htOffset[MAX_MODES];
|
||||
size_t htSize[MAX_MODES];
|
||||
|
||||
htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize;
|
||||
htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry);
|
||||
htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL];
|
||||
htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry);
|
||||
|
||||
size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]);
|
||||
|
||||
// need to add +2 to both of these to allow space for the actual largest
|
||||
// value as well as handling the fact that we add one to the space when
|
||||
// storing out a position to allow zero to mean "no stream state value"
|
||||
u8 streamBits[MAX_MODES];
|
||||
streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2));
|
||||
streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
|
||||
u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
|
||||
|
||||
u8 * secondaryTable = (u8 *)aligned_zmalloc(tabSize);
|
||||
assert(secondaryTable); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
// then fill it in
|
||||
u8 * ptr = secondaryTable;
|
||||
FDRSTableHeader * header = (FDRSTableHeader *)ptr;
|
||||
// fill in header
|
||||
header->pseudoEngineID = (u32)0xffffffff;
|
||||
header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
header->boundary[m] = boundary[m];
|
||||
header->hashOffset[m] = verify_u32(htOffset[m]);
|
||||
header->hashNBits[m] = lg2(hashEntries[m]);
|
||||
header->streamStateBits[m] = streamBits[m];
|
||||
}
|
||||
assert(tot_state_bytes < sizeof(u64a));
|
||||
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
|
||||
|
||||
ptr += headerSize;
|
||||
|
||||
// now fill in the rest
|
||||
|
||||
FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr;
|
||||
ptr += litTabSize;
|
||||
|
||||
map<u32, u32> litToOffsetVal;
|
||||
for (vector<hwlmLiteral>::const_iterator i = long_lits.begin(),
|
||||
e = long_lits.end();
|
||||
i != e; ++i) {
|
||||
u32 entry = verify_u32(i - long_lits.begin());
|
||||
u32 offset = verify_u32(ptr - secondaryTable);
|
||||
|
||||
// point the table entry to the string location
|
||||
litTabPtr[entry].offset = offset;
|
||||
|
||||
litToOffsetVal[entry] = offset;
|
||||
|
||||
// copy the string into the string location
|
||||
memcpy(ptr, i->s.c_str(), i->s.size());
|
||||
|
||||
ptr += i->s.size(); // and the string location
|
||||
}
|
||||
|
||||
// fill in final lit table entry with current ptr (serves as end value)
|
||||
litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable);
|
||||
|
||||
// fill hash tables
|
||||
ptr = secondaryTable + htOffset[CASEFUL];
|
||||
for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
|
||||
fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
|
||||
(MODES)m, litToOffsetVal);
|
||||
ptr += htSize[m];
|
||||
}
|
||||
|
||||
// tell the world what we did
|
||||
stream_control->literal_history_required = max_len;
|
||||
stream_control->literal_stream_state_required = tot_state_bytes;
|
||||
return make_pair(secondaryTable, tabSize);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
152
src/fdr/fdr_streaming_internal.h
Normal file
152
src/fdr/fdr_streaming_internal.h
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_STREAMING_INTERNAL_H
|
||||
#define FDR_STREAMING_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
// tertiary table:
|
||||
// a header (FDRSTableHeader)
|
||||
// long_lits.size()+1 entries holding an offset to the string in the
|
||||
// 'whole literal table' (FDRSLiteral structure)
|
||||
// the whole literal table - every string packed in (freeform)
|
||||
// hash table (caseful) (FDRSHashEntry)
|
||||
// hash table (caseless) (FDRSHashEntry)
|
||||
|
||||
typedef enum {
|
||||
CASEFUL = 0,
|
||||
CASELESS = 1,
|
||||
MAX_MODES = 2
|
||||
} MODES;
|
||||
|
||||
// We have one of these structures hanging off the 'link' of our secondary
|
||||
// FDR table that handles streaming strings
|
||||
struct FDRSTableHeader {
|
||||
u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR
|
||||
|
||||
// string id one beyond the maximum entry for this type of literal
|
||||
// boundary[CASEFUL] is the end of the caseful literals
|
||||
// boundary[CASELESS] is the end of the caseless literals and one beyond
|
||||
// the largest literal id (the size of the littab)
|
||||
u32 boundary[MAX_MODES];
|
||||
|
||||
// offsets are 0 if no such table exists
|
||||
// offset from the base of the tertiary structure to the hash table
|
||||
u32 hashOffset[MAX_MODES];
|
||||
u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table
|
||||
|
||||
u8 streamStateBits[MAX_MODES];
|
||||
u8 streamStateBytes; // total size of packed stream state in bytes
|
||||
u8 N; // prefix lengths
|
||||
u16 pad;
|
||||
};
|
||||
|
||||
// One of these structures per literal entry in our secondary FDR table.
|
||||
struct FDRSLiteral {
|
||||
u32 offset;
|
||||
// potentially - another u32 to point to the 'next lesser included literal'
|
||||
// which would be a literal that overlaps this one in such a way that a
|
||||
// failure to match _this_ literal can leave us in a state that we might
|
||||
// still match that literal. Offset information might also be called for,
|
||||
// in which case we might be wanting to use a FDRSLiteralOffset
|
||||
};
|
||||
|
||||
typedef u32 FDRSLiteralOffset;
|
||||
|
||||
#define LINK_INVALID 0xffffffff
|
||||
|
||||
// One of these structures per hash table entry in our secondary FDR table
|
||||
struct FDRSHashEntry {
|
||||
u64a bitfield;
|
||||
FDRSLiteralOffset state;
|
||||
u32 link;
|
||||
};
|
||||
|
||||
static really_inline
|
||||
u32 get_start_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
||||
return m == CASEFUL ? 0 : h->boundary[m-1];
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 get_end_lit_idx(const struct FDRSTableHeader * h, MODES m) {
|
||||
return h->boundary[m];
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
|
||||
return (const struct FDRSLiteral *) (((const u8 *)h) +
|
||||
ROUNDUP_16(sizeof(struct FDRSTableHeader)));
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, MODES m) {
|
||||
return getLitTab(h)[get_start_lit_idx(h, m)].offset;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 packStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
||||
return v - getBaseOffsetOfLits(h, m) + 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 unpackStateVal(const struct FDRSTableHeader * h, MODES m, u32 v) {
|
||||
return v + getBaseOffsetOfLits(h, m) - 1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
|
||||
return (ent->bitfield >> bit) & 0x1;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 streaming_hash(const u8 *ptr, UNUSED size_t len, MODES mode) {
|
||||
const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
|
||||
const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
|
||||
assert(len >= 32);
|
||||
|
||||
u64a v1 = unaligned_load_u64a(ptr);
|
||||
u64a v2 = unaligned_load_u64a(ptr + 8);
|
||||
u64a v3 = unaligned_load_u64a(ptr + 16);
|
||||
if (mode == CASELESS) {
|
||||
v1 &= CASEMASK;
|
||||
v2 &= CASEMASK;
|
||||
v3 &= CASEMASK;
|
||||
}
|
||||
v1 *= MULTIPLIER;
|
||||
v2 *= (MULTIPLIER*MULTIPLIER);
|
||||
v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER);
|
||||
v1 >>= 32;
|
||||
v2 >>= 32;
|
||||
v3 >>= 32;
|
||||
return v1 ^ v2 ^ v3;
|
||||
}
|
||||
|
||||
#endif
|
||||
365
src/fdr/fdr_streaming_runtime.h
Normal file
365
src/fdr/fdr_streaming_runtime.h
Normal file
@@ -0,0 +1,365 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FDR_STREAMING_RUNTIME_H
|
||||
#define FDR_STREAMING_RUNTIME_H
|
||||
|
||||
#include "fdr_streaming_internal.h"
|
||||
#include "util/partial_store.h"
|
||||
|
||||
static really_inline
|
||||
const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) {
|
||||
const u8 * linkPtr = ((const u8 *)fdr) + fdr->link;
|
||||
// test if it's not really a engineID, but a 'pseudo engine id'
|
||||
assert(*(const u32 *)linkPtr == 0xffffffff);
|
||||
assert(linkPtr);
|
||||
return (const struct FDRSTableHeader *)linkPtr;
|
||||
}
|
||||
|
||||
// Reads from stream state and unpacks values into stream state table.
|
||||
static really_inline
|
||||
void getStreamStates(const struct FDRSTableHeader * streamingTable,
|
||||
const u8 * stream_state, u32 * table) {
|
||||
assert(streamingTable);
|
||||
assert(stream_state);
|
||||
assert(table);
|
||||
|
||||
u8 ss_bytes = streamingTable->streamStateBytes;
|
||||
u8 ssb = streamingTable->streamStateBits[CASEFUL];
|
||||
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
|
||||
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
|
||||
|
||||
#if defined(ARCH_32_BIT)
|
||||
// On 32-bit hosts, we may be able to avoid having to do any u64a
|
||||
// manipulation at all.
|
||||
if (ss_bytes <= 4) {
|
||||
u32 ssb_mask = (1U << ssb) - 1;
|
||||
u32 streamVal = partial_load_u32(stream_state, ss_bytes);
|
||||
table[CASEFUL] = (u32)(streamVal & ssb_mask);
|
||||
table[CASELESS] = (u32)(streamVal >> ssb);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
u64a ssb_mask = (1ULL << ssb) - 1;
|
||||
u64a streamVal = partial_load_u64a(stream_state, ss_bytes);
|
||||
table[CASEFUL] = (u32)(streamVal & ssb_mask);
|
||||
table[CASELESS] = (u32)(streamVal >> (u64a)ssb);
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Defensive checking (used in assert) that these table values don't overflow
|
||||
// outside the range available.
|
||||
static really_inline UNUSED
|
||||
u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) {
|
||||
u32 ssb_mask = (1ULL << (ssb)) - 1;
|
||||
if (table[CASEFUL] & ~ssb_mask) {
|
||||
return 1;
|
||||
}
|
||||
u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
|
||||
if (table[CASELESS] & ~ssb_nc_mask) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Reads from stream state table and packs values into stream state.
|
||||
static really_inline
|
||||
void setStreamStates(const struct FDRSTableHeader * streamingTable,
|
||||
u8 * stream_state, u32 * table) {
|
||||
assert(streamingTable);
|
||||
assert(stream_state);
|
||||
assert(table);
|
||||
|
||||
u8 ss_bytes = streamingTable->streamStateBytes;
|
||||
u8 ssb = streamingTable->streamStateBits[CASEFUL];
|
||||
UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
|
||||
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
|
||||
assert(!streamingTableOverflow(table, ssb, ssb_nc));
|
||||
|
||||
#if defined(ARCH_32_BIT)
|
||||
// On 32-bit hosts, we may be able to avoid having to do any u64a
|
||||
// manipulation at all.
|
||||
if (ss_bytes <= 4) {
|
||||
u32 stagingStreamState = table[CASEFUL];
|
||||
stagingStreamState |= (table[CASELESS] << ssb);
|
||||
|
||||
partial_store_u32(stream_state, stagingStreamState, ss_bytes);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
u64a stagingStreamState = (u64a)table[CASEFUL];
|
||||
stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb);
|
||||
partial_store_u64a(stream_state, stagingStreamState, ss_bytes);
|
||||
}
|
||||
|
||||
u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
|
||||
if (!stream_state) {
|
||||
return 0;
|
||||
}
|
||||
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
|
||||
u8 ss_bytes = streamingTable->streamStateBytes;
|
||||
|
||||
// We just care if there are any bits set, and the test below is faster
|
||||
// than a partial_load_u64a (especially on 32-bit hosts).
|
||||
for (u32 i = 0; i < ss_bytes; i++) {
|
||||
if (*stream_state) {
|
||||
return 1;
|
||||
}
|
||||
++stream_state;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// binary search for the literal index that contains the current state
|
||||
static really_inline
|
||||
u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
|
||||
u32 stateValue, MODES m) {
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
u32 lo = get_start_lit_idx(streamingTable, m);
|
||||
u32 hi = get_end_lit_idx(streamingTable, m);
|
||||
|
||||
// Now move stateValue back by one so that we're looking for the
|
||||
// litTab entry that includes it the string, not the one 'one past' it
|
||||
stateValue -= 1;
|
||||
assert(lo != hi);
|
||||
assert(litTab[lo].offset <= stateValue);
|
||||
assert(litTab[hi].offset > stateValue);
|
||||
|
||||
// binary search to find the entry e such that:
|
||||
// litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
|
||||
while (lo + 1 < hi) {
|
||||
u32 mid = (lo + hi) / 2;
|
||||
if (litTab[mid].offset <= stateValue) {
|
||||
lo = mid;
|
||||
} else { //(litTab[mid].offset > stateValue) {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
assert(litTab[lo].offset <= stateValue);
|
||||
assert(litTab[hi].offset > stateValue);
|
||||
return lo;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
const struct FDRSLiteral * litTab,
|
||||
const u32 *state_table,
|
||||
const MODES m) {
|
||||
if (!state_table[m]) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]);
|
||||
u32 idx = findLitTabEntry(streamingTable, stateValue, m);
|
||||
size_t found_offset = litTab[idx].offset;
|
||||
const u8 * found_buf = found_offset + (const u8 *)streamingTable;
|
||||
size_t found_sz = stateValue - found_offset;
|
||||
if (m == CASEFUL) {
|
||||
a->buf_history = found_buf;
|
||||
a->len_history = found_sz;
|
||||
} else {
|
||||
a->buf_history_nocase = found_buf;
|
||||
a->len_history_nocase = found_sz;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
|
||||
const u8 * stream_state) {
|
||||
// nothing to do if there's no stream state for the case
|
||||
if (!stream_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
|
||||
u32 state_table[MAX_MODES];
|
||||
getStreamStates(streamingTable, stream_state, state_table);
|
||||
|
||||
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL);
|
||||
fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
u32 do_single_confirm(const struct FDRSTableHeader * streamingTable,
|
||||
const struct FDR_Runtime_Args * a, u32 hashState, MODES m) {
|
||||
const struct FDRSLiteral * litTab = getLitTab(streamingTable);
|
||||
u32 idx = findLitTabEntry(streamingTable, hashState, m);
|
||||
size_t found_offset = litTab[idx].offset;
|
||||
const u8 * s1 = found_offset + (const u8 *)streamingTable;
|
||||
assert(hashState > found_offset);
|
||||
size_t l1 = hashState - found_offset;
|
||||
const u8 * buf = a->buf;
|
||||
size_t len = a->len;
|
||||
const char nocase = m != CASEFUL;
|
||||
|
||||
if (l1 > len) {
|
||||
const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history;
|
||||
size_t hist_len = nocase ? a->len_history_nocase : a->len_history;
|
||||
|
||||
if (l1 > len+hist_len) {
|
||||
return 0; // Break out - not enough total history
|
||||
}
|
||||
|
||||
size_t overhang = l1 - len;
|
||||
assert(overhang <= hist_len);
|
||||
|
||||
if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) {
|
||||
return 0;
|
||||
}
|
||||
s1 += overhang;
|
||||
l1 -= overhang;
|
||||
}
|
||||
// if we got here, we don't need history or we compared ok out of history
|
||||
assert(l1 <= len);
|
||||
|
||||
if (cmpForward(buf + len - l1, s1, l1, nocase)) {
|
||||
return 0;
|
||||
}
|
||||
return hashState; // our new state
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
u8 hash_len, u32 *hashes) {
|
||||
u8 tempbuf[128];
|
||||
const u8 *base;
|
||||
if (hash_len > a->len) {
|
||||
assert(hash_len <= 128);
|
||||
size_t overhang = hash_len - a->len;
|
||||
assert(overhang <= a->len_history);
|
||||
memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang);
|
||||
memcpy(tempbuf + overhang, a->buf, a->len);
|
||||
base = tempbuf;
|
||||
} else {
|
||||
assert(hash_len <= a->len);
|
||||
base = a->buf + a->len - hash_len;
|
||||
}
|
||||
|
||||
if (streamingTable->hashNBits[CASEFUL]) {
|
||||
hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL);
|
||||
}
|
||||
if (streamingTable->hashNBits[CASELESS]) {
|
||||
hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS);
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
|
||||
u32 h, const MODES m) {
|
||||
u32 nbits = streamingTable->hashNBits[m];
|
||||
if (!nbits) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
u32 h_ent = h & ((1 << nbits) - 1);
|
||||
u32 h_low = (h >> nbits) & 63;
|
||||
|
||||
const struct FDRSHashEntry *tab =
|
||||
(const struct FDRSHashEntry *)((const u8 *)streamingTable
|
||||
+ streamingTable->hashOffset[m]);
|
||||
const struct FDRSHashEntry *ent = tab + h_ent;
|
||||
|
||||
if (!has_bit(ent, h_low)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ent;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
|
||||
const struct FDRSTableHeader *streamingTable,
|
||||
const struct FDRSHashEntry *ent, const MODES m) {
|
||||
assert(ent);
|
||||
assert(streamingTable->hashNBits[m]);
|
||||
|
||||
const struct FDRSHashEntry *tab =
|
||||
(const struct FDRSHashEntry *)((const u8 *)streamingTable
|
||||
+ streamingTable->hashOffset[m]);
|
||||
|
||||
while (1) {
|
||||
u32 tmp = 0;
|
||||
if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) {
|
||||
state_table[m] = packStateVal(streamingTable, m, tmp);
|
||||
break;
|
||||
}
|
||||
if (ent->link == LINK_INVALID) {
|
||||
break;
|
||||
}
|
||||
ent = tab + ent->link;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a,
|
||||
u8 *stream_state) {
|
||||
// nothing to do if there's no stream state for the case
|
||||
if (!stream_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
// get pointers to the streamer FDR and the tertiary structure
|
||||
const struct FDRSTableHeader *streamingTable = getSHDR(fdr);
|
||||
|
||||
assert(streamingTable->N);
|
||||
|
||||
u32 state_table[MAX_MODES] = {0, 0};
|
||||
|
||||
// if we don't have enough history, we don't need to do anything
|
||||
if (streamingTable->N <= a->len + a->len_history) {
|
||||
u32 hashes[MAX_MODES] = {0, 0};
|
||||
|
||||
fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes);
|
||||
|
||||
const struct FDRSHashEntry *ent_ful = getEnt(streamingTable,
|
||||
hashes[CASEFUL], CASEFUL);
|
||||
const struct FDRSHashEntry *ent_less = getEnt(streamingTable,
|
||||
hashes[CASELESS], CASELESS);
|
||||
|
||||
if (ent_ful) {
|
||||
fdrPackStateMode(state_table, a, streamingTable, ent_ful,
|
||||
CASEFUL);
|
||||
}
|
||||
|
||||
if (ent_less) {
|
||||
fdrPackStateMode(state_table, a, streamingTable, ent_less,
|
||||
CASELESS);
|
||||
}
|
||||
}
|
||||
|
||||
setStreamStates(streamingTable, stream_state, state_table);
|
||||
}
|
||||
|
||||
#endif
|
||||
222
src/fdr/flood_compile.cpp
Normal file
222
src/fdr/flood_compile.cpp
Normal file
@@ -0,0 +1,222 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
struct FloodComparator {
|
||||
bool operator()(const FDRFlood &f1, const FDRFlood &f2) const {
|
||||
return std::memcmp(&f1, &f2, sizeof(f1)) < 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
bool isDifferent(u8 oldC, u8 c, bool caseless) {
|
||||
if (caseless) {
|
||||
return mytolower(oldC) != mytolower(c);
|
||||
} else {
|
||||
return oldC != c;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) {
|
||||
FDRFlood &fl = tmpFlood[c];
|
||||
fl.suffix = MAX(fl.suffix, suffix + 1);
|
||||
DEBUG_PRINTF("Updated Flood Suffix for char '%c' to %u\n", c, fl.suffix);
|
||||
}
|
||||
|
||||
static
|
||||
void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit,
|
||||
u32 suffix) {
|
||||
FDRFlood &fl = tmpFlood[c];
|
||||
fl.suffix = MAX(fl.suffix, suffix + 1);
|
||||
if (fl.idCount < FDR_FLOOD_MAX_IDS) {
|
||||
fl.ids[fl.idCount] = lit.id;
|
||||
fl.allGroups |= lit.groups;
|
||||
fl.groups[fl.idCount] = lit.groups;
|
||||
fl.len[fl.idCount] = suffix;
|
||||
// when idCount gets to max_ids this flood no longer happens
|
||||
// only incremented one more time to avoid arithmetic overflow
|
||||
DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n",
|
||||
c, fl.suffix, fl.idCount, suffix);
|
||||
fl.idCount++;
|
||||
}
|
||||
}
|
||||
|
||||
pair<u8 *, size_t> setupFDRFloodControl(const vector<hwlmLiteral> &lits,
|
||||
const EngineDescription &eng) {
|
||||
vector<FDRFlood> tmpFlood(N_CHARS);
|
||||
u32 default_suffix = eng.getDefaultFloodSuffixLength();
|
||||
|
||||
// zero everything to avoid spurious distinctions in the compares
|
||||
memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood));
|
||||
|
||||
for (u32 c = 0; c < N_CHARS; c++) {
|
||||
tmpFlood[c].suffix = default_suffix;
|
||||
}
|
||||
|
||||
for (const auto &lit : lits) {
|
||||
DEBUG_PRINTF("lit: '%s'%s\n", escapeString(lit.s).c_str(),
|
||||
lit.nocase ? " (nocase)" : "");
|
||||
u32 litSize = verify_u32(lit.s.size());
|
||||
u32 maskSize = (u32)lit.msk.size();
|
||||
u8 c = lit.s[litSize - 1];
|
||||
bool nocase = ourisalpha(c) ? lit.nocase : false;
|
||||
|
||||
if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) {
|
||||
c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c);
|
||||
nocase = false;
|
||||
}
|
||||
|
||||
u32 iEnd = MAX(litSize, maskSize);
|
||||
u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length
|
||||
// for case-less, or as a suffix length for case-sensitive;
|
||||
u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix
|
||||
// length;
|
||||
|
||||
for (u32 i = 0; i < iEnd; i++) {
|
||||
if (i < litSize) {
|
||||
if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) {
|
||||
DEBUG_PRINTF("non-flood char in literal[%u] %c != %c\n",
|
||||
i, c, lit.s[litSize - i - 1]);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
loSuffix = MIN(loSuffix, i); // makes sense only for case-less
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i < maskSize) {
|
||||
u8 m = lit.msk[maskSize - i - 1];
|
||||
u8 cm = lit.cmp[maskSize - i - 1] & m;
|
||||
if(nocase) {
|
||||
if ((mytoupper(c) & m) != cm) {
|
||||
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
|
||||
i, mytoupper(c), cm);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
}
|
||||
if ((mytolower(c) & m) != cm) {
|
||||
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n",
|
||||
i, mytolower(c), cm);
|
||||
loSuffix = MIN(loSuffix, i);
|
||||
}
|
||||
if (loSuffix != iEnd && upSuffix != iEnd) {
|
||||
break;
|
||||
}
|
||||
} else if ((c & m) != cm) {
|
||||
DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n", i, c, cm);
|
||||
upSuffix = MIN(upSuffix, i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(upSuffix != iEnd) {
|
||||
updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix);
|
||||
} else {
|
||||
addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix);
|
||||
}
|
||||
if (nocase) {
|
||||
if(loSuffix != iEnd) {
|
||||
updateFloodSuffix(tmpFlood, mytolower(c), loSuffix);
|
||||
} else {
|
||||
addFlood(tmpFlood, mytolower(c), lit, loSuffix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
FDRFlood &fl = tmpFlood[i];
|
||||
if (!fl.idCount) {
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is "
|
||||
"%016llx\n", i, fl.idCount, fl.suffix, fl.allGroups);
|
||||
for (u32 j = 0; j < fl.idCount; j++) {
|
||||
printf("j is %d fl.groups[j] %016llx fl.len[j] %d \n", j,
|
||||
fl.groups[j], fl.len[j]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
map<FDRFlood, CharReach, FloodComparator> flood2chars;
|
||||
for (u32 i = 0; i < N_CHARS; i++) {
|
||||
FDRFlood fl = tmpFlood[i];
|
||||
flood2chars[fl].set(i);
|
||||
}
|
||||
|
||||
u32 nDistinctFloods = flood2chars.size();
|
||||
size_t floodHeaderSize = sizeof(u32) * N_CHARS;
|
||||
size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods;
|
||||
size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize);
|
||||
u8 *buf = (u8 *)aligned_zmalloc(totalSize);
|
||||
assert(buf); // otherwise would have thrown std::bad_alloc
|
||||
|
||||
u32 *floodHeader = (u32 *)buf;
|
||||
FDRFlood *layoutFlood = (FDRFlood * )(buf + floodHeaderSize);
|
||||
|
||||
u32 currentFloodIndex = 0;
|
||||
for (const auto &m : flood2chars) {
|
||||
const FDRFlood &fl = m.first;
|
||||
const CharReach &cr = m.second;
|
||||
layoutFlood[currentFloodIndex] = fl;
|
||||
for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
|
||||
floodHeader[c] = currentFloodIndex;
|
||||
}
|
||||
currentFloodIndex++;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n",
|
||||
floodHeaderSize, floodStructSize, totalSize);
|
||||
|
||||
return make_pair((u8 *)buf, totalSize);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
347
src/fdr/flood_runtime.h
Normal file
347
src/fdr/flood_runtime.h
Normal file
@@ -0,0 +1,347 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef FLOOD_RUNTIME
|
||||
#define FLOOD_RUNTIME
|
||||
|
||||
#if defined(ARCH_64_BIT)
|
||||
#define FLOOD_64
|
||||
#else
|
||||
#define FLOOD_32
|
||||
#endif
|
||||
#define FLOOD_MINIMUM_SIZE 256
|
||||
#define FLOOD_BACKOFF_START 32
|
||||
|
||||
static really_inline
|
||||
const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) {
|
||||
// if we don't have a flood at either the start or end,
|
||||
// or have a very small buffer, don't bother with flood detection
|
||||
if (len < FLOOD_MINIMUM_SIZE) {
|
||||
return buf + len;
|
||||
}
|
||||
|
||||
/* entry points in runtime.c prefetch relevant data */
|
||||
#ifndef FLOOD_32
|
||||
u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8);
|
||||
u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8);
|
||||
if (x11 == x12) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8);
|
||||
u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8);
|
||||
if (x21 == x22) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8);
|
||||
u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8);
|
||||
if (x31 == x32) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
#else
|
||||
u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4);
|
||||
u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4);
|
||||
if (x11 == x12) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4);
|
||||
u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4);
|
||||
if (x21 == x22) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4);
|
||||
u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4);
|
||||
if (x31 == x32) {
|
||||
return buf + floodBackoff;
|
||||
}
|
||||
#endif
|
||||
return buf + len;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u8 * floodDetect(const struct FDR * fdr,
|
||||
const struct FDR_Runtime_Args * a,
|
||||
const u8 ** ptrPtr,
|
||||
const u8 * tryFloodDetect,
|
||||
u32 * floodBackoffPtr,
|
||||
hwlmcb_rv_t * control,
|
||||
u32 iterBytes) {
|
||||
DEBUG_PRINTF("attempting flood detection at %p\n", tryFloodDetect);
|
||||
const u8 * buf = a->buf;
|
||||
const size_t len = a->len;
|
||||
HWLMCallback cb = a->cb;
|
||||
void * ctxt = a->ctxt;
|
||||
|
||||
const u8 * ptr = *ptrPtr;
|
||||
// tryFloodDetect is never put in places where unconditional
|
||||
// reads a short distance forward or backward here
|
||||
// TODO: rationale for this line needs to be rediscovered!!
|
||||
size_t mainLoopLen = len > iterBytes ? len - iterBytes : 0;
|
||||
const u32 i = ptr - buf;
|
||||
u32 j = i;
|
||||
|
||||
// go from c to our FDRFlood structure
|
||||
u8 c = buf[i];
|
||||
const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset;
|
||||
u32 fIdx = ((const u32 *)fBase)[c];
|
||||
const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256);
|
||||
const struct FDRFlood * fl = &fsb[fIdx];
|
||||
|
||||
#ifndef FLOOD_32
|
||||
u64a cmpVal = c;
|
||||
cmpVal |= cmpVal << 8;
|
||||
cmpVal |= cmpVal << 16;
|
||||
cmpVal |= cmpVal << 32;
|
||||
u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8);
|
||||
#else
|
||||
u32 cmpVal = c;
|
||||
cmpVal |= cmpVal << 8;
|
||||
cmpVal |= cmpVal << 16;
|
||||
u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4);
|
||||
#endif
|
||||
|
||||
if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) {
|
||||
*floodBackoffPtr *= 2;
|
||||
goto floodout;
|
||||
}
|
||||
|
||||
if (i < fl->suffix + 7) {
|
||||
*floodBackoffPtr *= 2;
|
||||
goto floodout;
|
||||
}
|
||||
|
||||
j = i - fl->suffix;
|
||||
|
||||
#ifndef FLOOD_32
|
||||
j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs
|
||||
for (; j + 32 < mainLoopLen; j += 32) {
|
||||
u64a v = *(const u64a *)(buf + j);
|
||||
u64a v2 = *(const u64a *)(buf + j + 8);
|
||||
u64a v3 = *(const u64a *)(buf + j + 16);
|
||||
u64a v4 = *(const u64a *)(buf + j + 24);
|
||||
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; j + 8 < mainLoopLen; j += 8) {
|
||||
u64a v = *(const u64a *)(buf + j);
|
||||
if (v != cmpVal) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs
|
||||
for (; j + 16 < mainLoopLen; j += 16) {
|
||||
u32 v = *(const u32 *)(buf + j);
|
||||
u32 v2 = *(const u32 *)(buf + j + 4);
|
||||
u32 v3 = *(const u32 *)(buf + j + 8);
|
||||
u32 v4 = *(const u32 *)(buf + j + 12);
|
||||
if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; j + 4 < mainLoopLen; j += 4) {
|
||||
u32 v = *(const u32 *)(buf + j);
|
||||
if (v != cmpVal) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (; j < mainLoopLen; j++) {
|
||||
u8 v = *(const u8 *)(buf + j);
|
||||
if (v != c) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j > i ) {
|
||||
j--; // needed for some reaches
|
||||
u32 itersAhead = (j-i)/iterBytes;
|
||||
u32 floodSize = itersAhead*iterBytes;
|
||||
|
||||
DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu "
|
||||
"*control %016llx fl->allGroups %016llx\n",
|
||||
floodSize, j, i, fl->idCount, *control, fl->allGroups);
|
||||
DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n",
|
||||
mainLoopLen, len);
|
||||
|
||||
if (fl->idCount && (*control & fl->allGroups)) {
|
||||
switch (fl->idCount) {
|
||||
#if !defined(FLOOD_DEBUG)
|
||||
// Carefully unrolled code
|
||||
case 1:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
|
||||
t += 4) {
|
||||
DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 0 - len0, i + t + 0, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control =
|
||||
cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 2 - len0, i + t + 2, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 2 - len1, i + t + 2, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 3 - len0, i + t + 3, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 3 - len1, i + t + 3, fl->ids[1], ctxt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
u32 len2 = fl->len[2] - 1;
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// slow generalized loop
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
|
||||
u32 len0 = fl->len[0] - 1;
|
||||
u32 len1 = fl->len[1] - 1;
|
||||
u32 len2 = fl->len[2] - 1;
|
||||
u32 len3 = fl->len[3] - 1;
|
||||
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t - len0, i + t, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t - len1, i + t, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t - len2, i + t, fl->ids[2], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[3]) {
|
||||
*control = cb(i + t - len3, i + t, fl->ids[3], ctxt);
|
||||
}
|
||||
|
||||
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
if (*control & fl->groups[0]) {
|
||||
*control = cb(i + t + 1 - len0, i + t + 1, fl->ids[0], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[1]) {
|
||||
*control = cb(i + t + 1 - len1, i + t + 1, fl->ids[1], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[2]) {
|
||||
*control = cb(i + t + 1 - len2, i + t + 1, fl->ids[2], ctxt);
|
||||
}
|
||||
if (*control & fl->groups[3]) {
|
||||
*control = cb(i + t + 1 - len3, i + t + 1, fl->ids[3], ctxt);
|
||||
}
|
||||
|
||||
for (u32 t2 = 4; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t + 1 - (fl->len[t2] - 1), i + t + 1, fl->ids[t2], ctxt);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
#else
|
||||
// Fallback for debugging
|
||||
default:
|
||||
for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
|
||||
for (u32 t2 = 0; t2 < fl->idCount; t2++) {
|
||||
if (*control & fl->groups[t2]) {
|
||||
*control = cb(i + t - (fl->len[t2] - 1), i + t, fl->ids[t2], ctxt);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
ptr += floodSize;
|
||||
} else {
|
||||
*floodBackoffPtr *= 2;
|
||||
}
|
||||
|
||||
floodout:
|
||||
if (j + *floodBackoffPtr < mainLoopLen - 128) {
|
||||
tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr;
|
||||
} else {
|
||||
tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect
|
||||
}
|
||||
*ptrPtr = ptr;
|
||||
DEBUG_PRINTF("finished flood detection at %p (next check %p)\n",
|
||||
ptr, tryFloodDetect);
|
||||
return tryFloodDetect;
|
||||
}
|
||||
|
||||
#endif
|
||||
244
src/fdr/teddy.c
Normal file
244
src/fdr/teddy.c
Normal file
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
#include "util/simd_utils.h"
|
||||
#include "util/simd_utils_ssse3.h"
|
||||
|
||||
static const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
// Note: p_mask is an output param that initialises a poison mask.
|
||||
UNUSED static really_inline
|
||||
m128 vectoredLoad128(m128 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
union {
|
||||
u8 val8[16];
|
||||
m128 val128;
|
||||
} u;
|
||||
u.val128 = zeroes128();
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 16) {
|
||||
*p_mask = load128((const void*)(p_mask_arr[16] + 16));
|
||||
return loadu128(ptr);
|
||||
}
|
||||
*p_mask = load128((const void*)(p_mask_arr[avail] + 16));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
u32 need = MIN((u32)(lo - ptr), MIN(len_history, nMasks - 1));
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start - need; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(16, (u32)(hi - ptr));
|
||||
*p_mask = loadu128((const void*)(p_mask_arr[end - start] + 16 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
}
|
||||
|
||||
return u.val128;
|
||||
}
|
||||
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad2x128(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history,
|
||||
const u32 nMasks) {
|
||||
m128 p_mask128;
|
||||
m256 ret = set2x128(vectoredLoad128(&p_mask128, ptr, lo, hi, buf_history, len_history, nMasks));
|
||||
*p_mask = set2x128(p_mask128);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const u8 ALIGN_AVX_DIRECTIVE p_mask_arr256[33][64] = {
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
|
||||
UNUSED static really_inline
|
||||
m256 vectoredLoad256(m256 *p_mask, const u8 *ptr, const u8 *lo, const u8 *hi,
|
||||
const u8 *buf_history, size_t len_history) {
|
||||
union {
|
||||
u8 val8[32];
|
||||
m256 val256;
|
||||
} u;
|
||||
|
||||
if (ptr >= lo) {
|
||||
u32 avail = (u32)(hi - ptr);
|
||||
if (avail >= 32) {
|
||||
*p_mask = load256((const void*)(p_mask_arr256[32] + 32));
|
||||
return loadu256(ptr);
|
||||
}
|
||||
*p_mask = load256((const void*)(p_mask_arr256[avail] + 32));
|
||||
for (u32 i = 0; i < avail; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
} else {
|
||||
// need contains "how many chars to pull from history"
|
||||
// calculate based on what we need, what we have in the buffer
|
||||
// and only what we need to make primary confirm work
|
||||
u32 start = (u32)(lo - ptr);
|
||||
u32 i;
|
||||
for (i = start; ptr + i < lo; i++) {
|
||||
u.val8[i] = buf_history[len_history - (lo - (ptr + i))];
|
||||
}
|
||||
u32 end = MIN(32, (u32)(hi - ptr));
|
||||
*p_mask = loadu256((const void*)(p_mask_arr256[end - start] + 32 - start));
|
||||
for (; i < end; i++) {
|
||||
u.val8[i] = ptr[i];
|
||||
}
|
||||
}
|
||||
|
||||
return u.val256;
|
||||
}
|
||||
|
||||
|
||||
#endif // __AVX2__
|
||||
|
||||
#define P0(cnd) unlikely(cnd)
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "flood_runtime.h"
|
||||
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_confirm_runtime.h"
|
||||
|
||||
#include "fdr_loadval.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "teddy_internal.h"
|
||||
|
||||
#include "teddy_autogen.c"
|
||||
545
src/fdr/teddy_autogen.py
Executable file
545
src/fdr/teddy_autogen.py
Executable file
@@ -0,0 +1,545 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright (c) 2015, Intel Corporation
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of Intel Corporation nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import sys
|
||||
from autogen_utils import *
|
||||
from base_autogen import *
|
||||
from string import Template
|
||||
|
||||
class MT(MatcherBase):
|
||||
def produce_confirm(self, iter, var_name, offset, bits, cautious = True):
|
||||
if self.packed:
|
||||
print self.produce_confirm_base(var_name, bits, iter*16 + offset, cautious, enable_confirmless = False, do_bailout = False)
|
||||
else:
|
||||
if self.num_masks == 1:
|
||||
conf_func = "confWithBit1"
|
||||
else:
|
||||
conf_func = "confWithBitMany"
|
||||
|
||||
if cautious:
|
||||
caution_string = "VECTORING"
|
||||
else:
|
||||
caution_string = "NOT_CAUTIOUS"
|
||||
|
||||
print " if (P0(!!%s)) {" % var_name
|
||||
print " do {"
|
||||
if bits == 64:
|
||||
print " bit = findAndClearLSB_64(&%s);" % (var_name)
|
||||
else:
|
||||
print " bit = findAndClearLSB_32(&%s);" % (var_name)
|
||||
print " byte = bit / %d + %d;" % (self.num_buckets, iter*16 + offset)
|
||||
print " idx = bit %% %d;" % self.num_buckets
|
||||
print " cf = confBase[idx];"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " if (!(fdrc->groups & *control))"
|
||||
print " continue;"
|
||||
print " %s(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % (conf_func, caution_string)
|
||||
print " } while(P0(!!%s));" % var_name
|
||||
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
|
||||
print " *a->groups = controlVal;"
|
||||
print " return HWLM_TERMINATED;"
|
||||
print " }"
|
||||
print " }"
|
||||
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " m128 p_mask;"
|
||||
for iter in range(0, max_iterations):
|
||||
print " m128 val_%d;" % iter
|
||||
print " m128 val_%d_lo;" % iter
|
||||
print " m128 val_%d_hi;" % iter
|
||||
for x in range(self.num_masks):
|
||||
print " m128 res_%d_%d;" % (iter, x)
|
||||
if x != 0:
|
||||
print " m128 res_shifted_%d_%d;" % (iter, x)
|
||||
print " m128 r_%d;" % iter
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_%d_lopart;" % iter
|
||||
print " u64a r_%d_hipart;" % iter
|
||||
print "#else"
|
||||
print " u32 r_%d_part1;" % iter
|
||||
print " u32 r_%d_part2;" % iter
|
||||
print " u32 r_%d_part3;" % iter
|
||||
print " u32 r_%d_part4;" % iter
|
||||
print "#endif"
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
|
||||
cautious, save_old):
|
||||
if cautious:
|
||||
print " val_%d = vectoredLoad128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
|
||||
else:
|
||||
print " val_%d = load128(ptr + %d);" % (iter, iter*16)
|
||||
print " val_%d_lo = and128(val_%d, lomask);" % (iter, iter)
|
||||
print " val_%d_hi = rshift2x64(val_%d, 4);" % (iter, iter)
|
||||
print " val_%d_hi = and128(val_%d_hi, lomask);" % (iter, iter)
|
||||
print
|
||||
for x in range(self.num_masks):
|
||||
print Template("""
|
||||
res_${ITER}_${X} = and128(pshufb(maskBase[${X}*2] , val_${ITER}_lo),
|
||||
pshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
|
||||
if x != 0:
|
||||
if iter == 0:
|
||||
print " res_shifted_%d_%d = palignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
|
||||
else:
|
||||
print " res_shifted_%d_%d = palignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
|
||||
if x != 0 and iter == effective_num_iterations - 1 and save_old:
|
||||
print " res_old_%d = res_%d_%d;" % (x, iter, x)
|
||||
print
|
||||
if cautious:
|
||||
print " r_%d = and128(res_%d_0, p_mask);" % (iter, iter)
|
||||
else:
|
||||
print " r_%d = res_%d_0;" % (iter, iter)
|
||||
for x in range(1, self.num_masks):
|
||||
print " r_%d = and128(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
|
||||
print
|
||||
|
||||
def produce_one_iteration_confirm(self, iter, confirmCautious):
|
||||
setup64 = [ (0, "r_%d_lopart" % iter, "movq(r_%d)" % iter),
|
||||
(8, "r_%d_hipart" % iter, "movq(byteShiftRight128(r_%d, 8))" % iter) ]
|
||||
|
||||
setup32 = [ (0, "r_%d_part1" % iter, "movd(r_%d)" % iter),
|
||||
(4, "r_%d_part2" % iter, "movd(byteShiftRight128(r_%d, 4))" % iter),
|
||||
(8, "r_%d_part3" % iter, "movd(byteShiftRight128(r_%d, 8))" % iter),
|
||||
(12, "r_%d_part4" % iter, "movd(byteShiftRight128(r_%d, 12))" % iter) ]
|
||||
|
||||
print " if (P0(isnonzero128(r_%d))) {" % (iter)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
for (off, val, init) in setup64:
|
||||
print " %s = %s;" % (val, init)
|
||||
for (off, val, init) in setup64:
|
||||
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
|
||||
print "#else"
|
||||
for (off, val, init) in setup32:
|
||||
print " %s = %s;" % (val, init)
|
||||
for (off, val, init) in setup32:
|
||||
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
def produce_one_iteration(self, iter, effective_num_iterations, cautious = False,
|
||||
confirmCautious = True, save_old = True):
|
||||
self.produce_one_iteration_state_calc(iter, effective_num_iterations, cautious, save_old)
|
||||
self.produce_one_iteration_confirm(iter, confirmCautious)
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
print
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32));" % self.num_masks
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
|
||||
for x in range(self.num_masks):
|
||||
if (x != 0):
|
||||
print " m128 res_old_%d = ones128();" % x
|
||||
print " m128 lomask = set16x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 16;"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 16 < buf + len) {"
|
||||
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
|
||||
for iter in range(self.num_iterations):
|
||||
self.produce_one_iteration(iter, self.num_iterations, cautious = False, confirmCautious = False)
|
||||
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 16) {"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def produce_compile_call(self):
|
||||
packed_str = { False : "false", True : "true"}[self.packed]
|
||||
print " { %d, %s, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
|
||||
self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def get_name(self):
|
||||
if self.packed:
|
||||
pck_string = "_pck"
|
||||
else:
|
||||
pck_string = ""
|
||||
|
||||
if self.num_buckets == 16:
|
||||
type_string = "_fat"
|
||||
else:
|
||||
type_string = ""
|
||||
|
||||
return "fdr_exec_teddy_%s_msks%d%s%s" % (self.arch.name, self.num_masks, pck_string, type_string)
|
||||
|
||||
def __init__(self, arch, packed = False, num_masks = 1, num_buckets = 8):
|
||||
self.arch = arch
|
||||
self.packed = packed
|
||||
self.num_masks = num_masks
|
||||
self.num_buckets = num_buckets
|
||||
self.num_iterations = 2
|
||||
|
||||
if packed:
|
||||
self.conf_top_level_split = 32
|
||||
else:
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
||||
|
||||
class MTFat(MT):
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " m256 p_mask;"
|
||||
for iter in range(0, max_iterations):
|
||||
print " m256 val_%d;" % iter
|
||||
print " m256 val_%d_lo;" % iter
|
||||
print " m256 val_%d_hi;" % iter
|
||||
for x in range(self.num_masks):
|
||||
print " m256 res_%d_%d;" % (iter, x)
|
||||
if x != 0:
|
||||
print " m256 res_shifted_%d_%d;" % (iter, x)
|
||||
print " m256 r_%d;" % iter
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_%d_part1;" % iter
|
||||
print " u64a r_%d_part2;" % iter
|
||||
print " u64a r_%d_part3;" % iter
|
||||
print " u64a r_%d_part4;" % iter
|
||||
print "#else"
|
||||
print " u32 r_%d_part1;" % iter
|
||||
print " u32 r_%d_part2;" % iter
|
||||
print " u32 r_%d_part3;" % iter
|
||||
print " u32 r_%d_part4;" % iter
|
||||
print " u32 r_%d_part5;" % iter
|
||||
print " u32 r_%d_part6;" % iter
|
||||
print " u32 r_%d_part7;" % iter
|
||||
print " u32 r_%d_part8;" % iter
|
||||
print "#endif"
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
print
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m256 * maskBase = (const m256 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + (%d*32*2));" % self.num_masks
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 16);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 16)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
|
||||
for x in range(self.num_masks):
|
||||
if (x != 0):
|
||||
print " m256 res_old_%d = ones256();" % x
|
||||
print " m256 lomask = set32x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 16;"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 16 < buf + len) {"
|
||||
self.produce_one_iteration(0, 1, cautious = False, confirmCautious = True, save_old = True)
|
||||
print " ptr += 16;"
|
||||
print " }"
|
||||
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
|
||||
for iter in range(self.num_iterations):
|
||||
self.produce_one_iteration(iter, self.num_iterations, False, confirmCautious = False)
|
||||
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 16) {"
|
||||
self.produce_one_iteration(0, 1, cautious = True, confirmCautious = True, save_old = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, effective_num_iterations,
|
||||
cautious, save_old):
|
||||
if cautious:
|
||||
print " val_%d = vectoredLoad2x128(&p_mask, ptr + %d, buf, buf+len, a->buf_history, a->len_history, %d);" % (iter, iter*16, self.num_masks)
|
||||
else:
|
||||
print " val_%d = load2x128(ptr + %d);" % (iter, iter*16)
|
||||
print " val_%d_lo = and256(val_%d, lomask);" % (iter, iter)
|
||||
print " val_%d_hi = rshift4x64(val_%d, 4);" % (iter, iter)
|
||||
print " val_%d_hi = and256(val_%d_hi, lomask);" % (iter, iter)
|
||||
print
|
||||
for x in range(self.num_masks):
|
||||
print Template("""
|
||||
res_${ITER}_${X} = and256(vpshufb(maskBase[${X}*2] , val_${ITER}_lo),
|
||||
vpshufb(maskBase[${X}*2+1], val_${ITER}_hi));""").substitute(ITER = iter, X = x)
|
||||
if x != 0:
|
||||
if iter == 0:
|
||||
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_old_%d, 16-%d);" % (iter, x, iter, x, x, x)
|
||||
else:
|
||||
print " res_shifted_%d_%d = vpalignr(res_%d_%d, res_%d_%d, 16-%d);" % (iter, x, iter, x, iter-1, x, x)
|
||||
if x != 0 and iter == effective_num_iterations - 1 and save_old:
|
||||
print " res_old_%d = res_%d_%d;" % (x, iter, x)
|
||||
print
|
||||
if cautious:
|
||||
print " r_%d = and256(res_%d_0, p_mask);" % (iter, iter)
|
||||
else:
|
||||
print " r_%d = res_%d_0;" % (iter, iter)
|
||||
for x in range(1, self.num_masks):
|
||||
print " r_%d = and256(r_%d, res_shifted_%d_%d);" % (iter, iter, iter, x)
|
||||
print
|
||||
|
||||
def produce_one_iteration_confirm(self, iter, confirmCautious):
|
||||
setup64 = [ (0, "r_%d_part1" % iter, "extractlow64from256(r)"),
|
||||
(4, "r_%d_part2" % iter, "extract64from256(r, 1);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
|
||||
(8, "r_%d_part3" % iter, "extractlow64from256(r)"),
|
||||
(12, "r_%d_part4" % iter, "extract64from256(r, 1)") ]
|
||||
|
||||
setup32 = [ (0, "r_%d_part1" % iter, "extractlow32from256(r)"),
|
||||
(2, "r_%d_part2" % iter, "extract32from256(r, 1)"),
|
||||
(4, "r_%d_part3" % iter, "extract32from256(r, 2)"),
|
||||
(6, "r_%d_part4" % iter, "extract32from256(r, 3);\n r = interleave256hi(r_%d, r_swap)" % (iter)),
|
||||
(8, "r_%d_part5" % iter, "extractlow32from256(r)"),
|
||||
(10, "r_%d_part6" % iter, "extract32from256(r, 1)"),
|
||||
(12, "r_%d_part7" % iter, "extract32from256(r, 2)"),
|
||||
(14, "r_%d_part8" % iter, "extract32from256(r, 3)") ]
|
||||
|
||||
print " if (P0(isnonzero256(r_%d))) {" % (iter)
|
||||
print " m256 r_swap = swap128in256(r_%d);" % (iter)
|
||||
print " m256 r = interleave256lo(r_%d, r_swap);" % (iter)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
for (off, val, init) in setup64:
|
||||
print " %s = %s;" % (val, init)
|
||||
|
||||
for (off, val, init) in setup64:
|
||||
self.produce_confirm(iter, val, off, 64, cautious = confirmCautious)
|
||||
print "#else"
|
||||
for (off, val, init) in setup32:
|
||||
print " %s = %s;" % (val, init)
|
||||
|
||||
for (off, val, init) in setup32:
|
||||
self.produce_confirm(iter, val, off, 32, cautious = confirmCautious)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
class MTFast(MatcherBase):
|
||||
|
||||
def produce_confirm(self, cautious):
|
||||
if cautious:
|
||||
cautious_str = "VECTORING"
|
||||
else:
|
||||
cautious_str = "NOT_CAUTIOUS"
|
||||
|
||||
print " for (u32 i = 0; i < arrCnt; i++) {"
|
||||
print " byte = bitArr[i] / 8;"
|
||||
if self.packed:
|
||||
conf_split_mask = IntegerType(32).constant_to_string(
|
||||
self.conf_top_level_split - 1)
|
||||
print " bitRem = bitArr[i] % 8;"
|
||||
print " confSplit = *(ptr+byte) & 0x1f;"
|
||||
print " idx = confSplit * %d + bitRem;" % self.num_buckets
|
||||
print " cf = confBase[idx];"
|
||||
print " if (!cf)"
|
||||
print " continue;"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " if (!(fdrc->groups & *control))"
|
||||
print " continue;"
|
||||
print " confWithBit(fdrc, a, ptr - buf + byte, %s, 0, control, &last_match);" % cautious_str
|
||||
else:
|
||||
print " cf = confBase[bitArr[i] % 8];"
|
||||
print " fdrc = (const struct FDRConfirm *)((const u8 *)confBase + cf);"
|
||||
print " confWithBit1(fdrc, a, ptr - buf + byte, %s, control, &last_match);" % cautious_str
|
||||
print " if (P0(controlVal == HWLM_TERMINATE_MATCHING)) {"
|
||||
print " *a->groups = controlVal;"
|
||||
print " return HWLM_TERMINATED;"
|
||||
print " }"
|
||||
print " }"
|
||||
|
||||
def produce_needed_temporaries(self, max_iterations):
|
||||
print " u32 arrCnt;"
|
||||
print " u16 bitArr[512];"
|
||||
print " m256 p_mask;"
|
||||
print " m256 val_0;"
|
||||
print " m256 val_0_lo;"
|
||||
print " m256 val_0_hi;"
|
||||
print " m256 res_0;"
|
||||
print " m256 res_1;"
|
||||
print " m128 lo_part;"
|
||||
print " m128 hi_part;"
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " u64a r_0_part;"
|
||||
print "#else"
|
||||
print " u32 r_0_part;"
|
||||
print "#endif"
|
||||
|
||||
def produce_bit_scan(self, offset, bits):
|
||||
print " while (P0(!!r_0_part)) {"
|
||||
if bits == 64:
|
||||
print " bitArr[arrCnt++] = (u16)findAndClearLSB_64(&r_0_part) + 64 * %d;" % (offset)
|
||||
else:
|
||||
print " bitArr[arrCnt++] = (u16)findAndClearLSB_32(&r_0_part) + 32 * %d;" % (offset)
|
||||
print " }"
|
||||
|
||||
def produce_bit_check_128(self, var_name, offset):
|
||||
print " if (P0(isnonzero128(%s))) {" % (var_name)
|
||||
print "#ifdef ARCH_64_BIT"
|
||||
print " r_0_part = movq(%s);" % (var_name)
|
||||
self.produce_bit_scan(offset, 64)
|
||||
print " r_0_part = movq(byteShiftRight128(%s, 8));" % (var_name)
|
||||
self.produce_bit_scan(offset + 1, 64)
|
||||
print "#else"
|
||||
print " r_0_part = movd(%s);" % (var_name)
|
||||
self.produce_bit_scan(offset * 2, 32)
|
||||
for step in range(1, 4):
|
||||
print " r_0_part = movd(byteShiftRight128(%s, %d));" % (var_name, step * 4)
|
||||
self.produce_bit_scan(offset * 2 + step, 32)
|
||||
print "#endif"
|
||||
print " }"
|
||||
|
||||
def produce_bit_check_256(self, iter, single_iter, cautious):
|
||||
print " if (P0(isnonzero256(res_%d))) {" % (iter)
|
||||
if single_iter:
|
||||
print " arrCnt = 0;"
|
||||
print " lo_part = cast256to128(res_%d);" % (iter)
|
||||
print " hi_part = cast256to128(swap128in256(res_%d));" % (iter)
|
||||
self.produce_bit_check_128("lo_part", iter * 4)
|
||||
self.produce_bit_check_128("hi_part", iter * 4 + 2)
|
||||
if single_iter:
|
||||
self.produce_confirm(cautious)
|
||||
print " }"
|
||||
|
||||
def produce_one_iteration_state_calc(self, iter, cautious):
|
||||
if cautious:
|
||||
print " val_0 = vectoredLoad256(&p_mask, ptr + %d, buf+a->start_offset, buf+len, a->buf_history, a->len_history);" % (iter * 32)
|
||||
else:
|
||||
print " val_0 = load256(ptr + %d);" % (iter * 32)
|
||||
print " val_0_lo = and256(val_0, lomask);"
|
||||
print " val_0_hi = rshift4x64(val_0, 4);"
|
||||
print " val_0_hi = and256(val_0_hi, lomask);"
|
||||
print " res_%d = and256(vpshufb(maskLo , val_0_lo), vpshufb(maskHi, val_0_hi));" % (iter)
|
||||
if cautious:
|
||||
print " res_%d = and256(res_%d, p_mask);" % (iter, iter)
|
||||
|
||||
def produce_code(self):
|
||||
print self.produce_header(visible = True, header_only = False)
|
||||
print self.produce_common_declarations()
|
||||
print
|
||||
|
||||
self.produce_needed_temporaries(self.num_iterations)
|
||||
|
||||
print " const struct Teddy * teddy = (const struct Teddy *)fdr;"
|
||||
print " const m128 * maskBase = (const m128 *)((const u8 *)fdr + sizeof(struct Teddy));"
|
||||
print " const m256 maskLo = set2x128(maskBase[0]);"
|
||||
print " const m256 maskHi = set2x128(maskBase[1]);"
|
||||
print " const u32 * confBase = (const u32 *)((const u8 *)teddy + sizeof(struct Teddy) + 32);"
|
||||
print " const u8 * mainStart = ROUNDUP_PTR(ptr, 32);"
|
||||
print " const size_t iterBytes = %d;" % (self.num_iterations * 32)
|
||||
|
||||
print ' DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\\n",' \
|
||||
' buf, len, a->start_offset);'
|
||||
print ' DEBUG_PRINTF("derive: ptr: %p mainstart %p\\n", ptr,' \
|
||||
' mainStart);'
|
||||
print " const m256 lomask = set32x8(0xf);"
|
||||
|
||||
print " if (ptr < mainStart) {"
|
||||
print " ptr = mainStart - 32;"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " ptr += 32;"
|
||||
print " }"
|
||||
|
||||
print " if (ptr + 32 < buf + len) {"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = False)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " ptr += 32;"
|
||||
print " }"
|
||||
print " for ( ; ptr + iterBytes <= buf + len; ptr += iterBytes) {"
|
||||
print " __builtin_prefetch(ptr + (iterBytes*4));"
|
||||
print self.produce_flood_check()
|
||||
for iter in range (0, self.num_iterations):
|
||||
self.produce_one_iteration_state_calc(iter = iter, cautious = False)
|
||||
print " arrCnt = 0;"
|
||||
for iter in range (0, self.num_iterations):
|
||||
self.produce_bit_check_256(iter = iter, single_iter = False, cautious = False)
|
||||
self.produce_confirm(cautious = False)
|
||||
print " }"
|
||||
|
||||
print " for (; ptr < buf + len; ptr += 32) {"
|
||||
self.produce_one_iteration_state_calc(iter = 0, cautious = True)
|
||||
self.produce_bit_check_256(iter = 0, single_iter = True, cautious = True)
|
||||
print " }"
|
||||
|
||||
print self.produce_footer()
|
||||
|
||||
def get_name(self):
|
||||
if self.packed:
|
||||
pck_string = "_pck"
|
||||
else:
|
||||
pck_string = ""
|
||||
return "fdr_exec_teddy_%s_msks%d%s_fast" % (self.arch.name, self.num_masks, pck_string)
|
||||
|
||||
def produce_compile_call(self):
|
||||
packed_str = { False : "false", True : "true"}[self.packed]
|
||||
print " { %d, %s, %d, %d, %s, %d, %d }," % (
|
||||
self.id, self.arch.target, self.num_masks, self.num_buckets, packed_str,
|
||||
self.conf_pull_back, self.conf_top_level_split)
|
||||
|
||||
def __init__(self, arch, packed = False):
|
||||
self.arch = arch
|
||||
self.packed = packed
|
||||
self.num_masks = 1
|
||||
self.num_buckets = 8
|
||||
self.num_iterations = 2
|
||||
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
||||
if packed:
|
||||
self.conf_top_level_split = 32
|
||||
else:
|
||||
self.conf_top_level_split = 1
|
||||
self.conf_pull_back = 0
|
||||
459
src/fdr/teddy_compile.cpp
Normal file
459
src/fdr/teddy_compile.cpp
Normal file
@@ -0,0 +1,459 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/popcount.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include "teddy_compile.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_engine_description.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
//#define TEDDY_DEBUG
|
||||
|
||||
class TeddyCompiler : boost::noncopyable {
|
||||
const TeddyEngineDescription ŋ
|
||||
const vector<hwlmLiteral> &lits;
|
||||
bool make_small;
|
||||
|
||||
public:
|
||||
TeddyCompiler(const vector<hwlmLiteral> &lits_in,
|
||||
const TeddyEngineDescription &eng_in, bool make_small_in)
|
||||
: eng(eng_in), lits(lits_in), make_small(make_small_in) {}
|
||||
|
||||
aligned_unique_ptr<FDR> build(pair<u8 *, size_t> link);
|
||||
bool pack(map<BucketIndex, std::vector<LiteralIndex> > &bucketToLits);
|
||||
};
|
||||
|
||||
class TeddySet {
|
||||
const vector<hwlmLiteral> &lits;
|
||||
u32 len;
|
||||
// nibbleSets is a series of bitfields over 16 predicates
|
||||
// that represent the whether shufti nibble set
|
||||
// so for num_masks = 4 we will represent our strings by
|
||||
// 8 u16s in the vector that indicate what a shufti bucket
|
||||
// would have to look like
|
||||
vector<u16> nibbleSets;
|
||||
set<u32> litIds;
|
||||
public:
|
||||
TeddySet(const vector<hwlmLiteral> &lits_in, u32 len_in)
|
||||
: lits(lits_in), len(len_in), nibbleSets(len_in * 2, 0) {}
|
||||
const set<u32> & getLits() const { return litIds; }
|
||||
size_t litCount() const { return litIds.size(); }
|
||||
|
||||
bool operator<(const TeddySet & s) const {
|
||||
return litIds < s.litIds;
|
||||
}
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
void dump() const {
|
||||
printf("TS: ");
|
||||
for (u32 i = 0; i < nibbleSets.size(); i++) {
|
||||
printf("%04x ", (u32)nibbleSets[i]);
|
||||
}
|
||||
printf("\nnlits: %zu\nLit ids: ", litCount());
|
||||
printf("Prob: %llu\n", probability());
|
||||
for (set<u32>::iterator i = litIds.begin(), e = litIds.end(); i != e; ++i) {
|
||||
printf("%u ", *i);
|
||||
}
|
||||
printf("\n");
|
||||
printf("Flood prone : %s\n", isRunProne()?"yes":"no");
|
||||
}
|
||||
#endif
|
||||
|
||||
bool identicalTail(const TeddySet & ts) const {
|
||||
return nibbleSets == ts.nibbleSets;
|
||||
}
|
||||
|
||||
void addLiteral(u32 lit_id) {
|
||||
const string &s = lits[lit_id].s;
|
||||
for (u32 i = 0; i < len; i++) {
|
||||
if (i < s.size()) {
|
||||
u8 c = s[s.size() - i - 1];
|
||||
u8 c_hi = (c >> 4) & 0xf;
|
||||
u8 c_lo = c & 0xf;
|
||||
nibbleSets[i*2] = 1 << c_lo;
|
||||
if (lits[lit_id].nocase && ourisalpha(c)) {
|
||||
nibbleSets[i*2+1] = (1 << (c_hi&0xd)) | (1 << (c_hi|0x2));
|
||||
} else {
|
||||
nibbleSets[i*2+1] = 1 << c_hi;
|
||||
}
|
||||
} else {
|
||||
nibbleSets[i*2] = nibbleSets[i*2+1] = 0xffff;
|
||||
}
|
||||
}
|
||||
litIds.insert(lit_id);
|
||||
}
|
||||
|
||||
void merge(const TeddySet &ts) {
|
||||
for (u32 i = 0; i < nibbleSets.size(); i++) {
|
||||
nibbleSets[i] |= ts.nibbleSets[i];
|
||||
}
|
||||
litIds.insert(ts.litIds.begin(), ts.litIds.end());
|
||||
}
|
||||
|
||||
// return a value p from 0 .. MAXINT64 that gives p/MAXINT64
|
||||
// likelihood of this TeddySet firing a first-stage accept
|
||||
// if it was given a bucket of its own and random data were
|
||||
// to be passed in
|
||||
u64a probability() const {
|
||||
u64a val = 1;
|
||||
for (size_t i = 0; i < nibbleSets.size(); i++) {
|
||||
val *= popcount32((u32)nibbleSets[i]);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
// return a score based around the chance of this hitting times
|
||||
// a small fixed cost + the cost of traversing some sort of followup
|
||||
// (assumption is that the followup is linear)
|
||||
u64a heuristic() const {
|
||||
return probability() * (2+litCount());
|
||||
}
|
||||
|
||||
bool isRunProne() const {
|
||||
u16 lo_and = 0xffff;
|
||||
u16 hi_and = 0xffff;
|
||||
for (u32 i = 0; i < len; i++) {
|
||||
lo_and &= nibbleSets[i*2];
|
||||
hi_and &= nibbleSets[i*2+1];
|
||||
}
|
||||
// we're not flood-prone if there's no way to get
|
||||
// through with a flood
|
||||
if (!lo_and || !hi_and) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
bool TeddyCompiler::pack(map<BucketIndex,
|
||||
std::vector<LiteralIndex> > &bucketToLits) {
|
||||
set<TeddySet> sts;
|
||||
|
||||
for (u32 i = 0; i < lits.size(); i++) {
|
||||
TeddySet ts(lits, eng.numMasks);
|
||||
ts.addLiteral(i);
|
||||
sts.insert(ts);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
#ifdef TEDDY_DEBUG
|
||||
printf("Size %zu\n", sts.size());
|
||||
for (set<TeddySet>::const_iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||
printf("\n"); i1->dump();
|
||||
}
|
||||
printf("\n===============================================\n");
|
||||
#endif
|
||||
|
||||
set<TeddySet>::iterator m1 = sts.end(), m2 = sts.end();
|
||||
u64a best = 0xffffffffffffffffULL;
|
||||
|
||||
for (set<TeddySet>::iterator i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
|
||||
set<TeddySet>::iterator i2 = i1;
|
||||
++i2;
|
||||
const TeddySet &s1 = *i1;
|
||||
for (set<TeddySet>::iterator e2 = sts.end(); i2 != e2; ++i2) {
|
||||
const TeddySet &s2 = *i2;
|
||||
|
||||
// be more conservative if we don't absolutely need to
|
||||
// keep packing
|
||||
if ((sts.size() <= eng.getNumBuckets()) &&
|
||||
!s1.identicalTail(s2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
TeddySet tmpSet(lits, eng.numMasks);
|
||||
tmpSet.merge(s1);
|
||||
tmpSet.merge(s2);
|
||||
u64a newScore = tmpSet.heuristic();
|
||||
u64a oldScore = s1.heuristic() + s2.heuristic();
|
||||
if (newScore < oldScore) {
|
||||
m1 = i1;
|
||||
m2 = i2;
|
||||
break;
|
||||
} else {
|
||||
u64a score = newScore - oldScore;
|
||||
bool oldRunProne = s1.isRunProne() && s2.isRunProne();
|
||||
bool newRunProne = tmpSet.isRunProne();
|
||||
if (newRunProne && !oldRunProne) {
|
||||
continue;
|
||||
}
|
||||
if (score < best) {
|
||||
best = score;
|
||||
m1 = i1;
|
||||
m2 = i2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// if we didn't find a merge candidate, bail out
|
||||
if ((m1 == sts.end()) || (m2 == sts.end())) {
|
||||
break;
|
||||
}
|
||||
|
||||
// do the merge
|
||||
TeddySet nts(lits, eng.numMasks);
|
||||
nts.merge(*m1);
|
||||
nts.merge(*m2);
|
||||
#ifdef TEDDY_DEBUG
|
||||
printf("Merging\n");
|
||||
printf("m1 = \n");
|
||||
m1->dump();
|
||||
printf("m2 = \n");
|
||||
m2->dump();
|
||||
printf("nts = \n");
|
||||
nts.dump();
|
||||
printf("\n===============================================\n");
|
||||
#endif
|
||||
sts.erase(m1);
|
||||
sts.erase(m2);
|
||||
sts.insert(nts);
|
||||
}
|
||||
u32 cnt = 0;
|
||||
|
||||
if (sts.size() > eng.getNumBuckets()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (set<TeddySet>::const_iterator i = sts.begin(), e = sts.end(); i != e;
|
||||
++i) {
|
||||
for (set<u32>::const_iterator i2 = i->getLits().begin(),
|
||||
e2 = i->getLits().end();
|
||||
i2 != e2; ++i2) {
|
||||
bucketToLits[cnt].push_back(*i2);
|
||||
}
|
||||
cnt++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<FDR> TeddyCompiler::build(pair<u8 *, size_t> link) {
|
||||
if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
|
||||
DEBUG_PRINTF("too many literals: %zu\n", lits.size());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
for (size_t i = 0; i < lits.size(); i++) {
|
||||
printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
|
||||
lits[i].nocase ? "caseless" : "caseful");
|
||||
for (size_t j = 0; j < lits[i].s.size(); j++) {
|
||||
printf("%02x", ((u32)lits[i].s[j])&0xff);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
|
||||
if(eng.needConfirm(lits)) {
|
||||
if (!pack(bucketToLits)) {
|
||||
DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
|
||||
lits.size(), eng.getNumBuckets());
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
for (u32 i = 0; i < lits.size(); i++) {
|
||||
bucketToLits[i].push_back(i);
|
||||
}
|
||||
}
|
||||
u32 maskWidth = eng.getNumBuckets() / 8;
|
||||
|
||||
size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
|
||||
|
||||
pair<u8 *, size_t> floodControlTmp = setupFDRFloodControl(lits, eng);
|
||||
pair<u8 *, size_t> confirmTmp
|
||||
= setupFullMultiConfs(lits, eng, bucketToLits, make_small);
|
||||
|
||||
size_t size = ROUNDUP_N(sizeof(Teddy) +
|
||||
maskLen +
|
||||
confirmTmp.second +
|
||||
floodControlTmp.second +
|
||||
link.second, 16 * maskWidth);
|
||||
|
||||
aligned_unique_ptr<FDR> fdr = aligned_zmalloc_unique<FDR>(size);
|
||||
assert(fdr); // otherwise would have thrown std::bad_alloc
|
||||
Teddy *teddy = (Teddy *)fdr.get(); // ugly
|
||||
u8 *teddy_base = (u8 *)teddy;
|
||||
|
||||
teddy->size = size;
|
||||
teddy->engineID = eng.getID();
|
||||
teddy->maxStringLen = verify_u32(maxLen(lits));
|
||||
|
||||
u8 *ptr = teddy_base + sizeof(Teddy) + maskLen;
|
||||
memcpy(ptr, confirmTmp.first, confirmTmp.second);
|
||||
ptr += confirmTmp.second;
|
||||
aligned_free(confirmTmp.first);
|
||||
|
||||
teddy->floodOffset = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, floodControlTmp.first, floodControlTmp.second);
|
||||
ptr += floodControlTmp.second;
|
||||
aligned_free(floodControlTmp.first);
|
||||
|
||||
if (link.first) {
|
||||
teddy->link = verify_u32(ptr - teddy_base);
|
||||
memcpy(ptr, link.first, link.second);
|
||||
aligned_free(link.first);
|
||||
} else {
|
||||
teddy->link = 0;
|
||||
}
|
||||
|
||||
u8 *baseMsk = teddy_base + sizeof(Teddy);
|
||||
|
||||
for (map<BucketIndex, std::vector<LiteralIndex> >::const_iterator
|
||||
i = bucketToLits.begin(),
|
||||
e = bucketToLits.end();
|
||||
i != e; ++i) {
|
||||
const u32 bucket_id = i->first;
|
||||
const vector<LiteralIndex> &ids = i->second;
|
||||
const u8 bmsk = 1U << (bucket_id % 8);
|
||||
|
||||
for (vector<LiteralIndex>::const_iterator i2 = ids.begin(),
|
||||
e2 = ids.end();
|
||||
i2 != e2; ++i2) {
|
||||
LiteralIndex lit_id = *i2;
|
||||
const hwlmLiteral & l = lits[lit_id];
|
||||
DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
|
||||
const u32 sz = verify_u32(l.s.size());
|
||||
|
||||
// fill in masks
|
||||
for (u32 j = 0; j < eng.numMasks; j++) {
|
||||
u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
|
||||
u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
|
||||
|
||||
// if we don't have a char at this position, fill in i
|
||||
// locations in these masks with '1'
|
||||
if (j >= sz) {
|
||||
for (u32 n = 0; n < 16; n++) {
|
||||
baseMsk[msk_id_lo * 16 + n] |= bmsk;
|
||||
baseMsk[msk_id_hi * 16 + n] |= bmsk;
|
||||
}
|
||||
} else {
|
||||
u8 c = l.s[sz - 1 - j];
|
||||
// if we do have a char at this position
|
||||
const u32 hiShift = 4;
|
||||
u32 n_hi = (c >> hiShift) & 0xf;
|
||||
u32 n_lo = c & 0xf;
|
||||
|
||||
if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) {
|
||||
u8 m = l.msk[l.msk.size() - 1 - j];
|
||||
u8 m_hi = (m >> hiShift) & 0xf;
|
||||
u8 m_lo = m & 0xf;
|
||||
u8 cmp = l.cmp[l.msk.size() - 1 - j];
|
||||
u8 cmp_lo = cmp & 0xf;
|
||||
u8 cmp_hi = (cmp >> hiShift) & 0xf;
|
||||
|
||||
for (u8 cm = 0; cm < 0x10; cm++) {
|
||||
if ((cm & m_lo) == (cmp_lo & m_lo)) {
|
||||
baseMsk[msk_id_lo * 16 + cm] |= bmsk;
|
||||
}
|
||||
if ((cm & m_hi) == (cmp_hi & m_hi)) {
|
||||
baseMsk[msk_id_hi * 16 + cm] |= bmsk;
|
||||
}
|
||||
}
|
||||
} else{
|
||||
if (l.nocase && ourisalpha(c)) {
|
||||
u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
|
||||
u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
|
||||
baseMsk[msk_id_hi * 16 + (n_hi & cmHalfClear)] |= bmsk;
|
||||
baseMsk[msk_id_hi * 16 + (n_hi | cmHalfSet )] |= bmsk;
|
||||
} else {
|
||||
baseMsk[msk_id_hi * 16 + n_hi] |= bmsk;
|
||||
}
|
||||
baseMsk[msk_id_lo * 16 + n_lo] |= bmsk;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef TEDDY_DEBUG
|
||||
for (u32 i = 0; i < eng.numMasks * 2; i++) {
|
||||
for (u32 j = 0; j < 16; j++) {
|
||||
u8 val = baseMsk[i * 16 + j];
|
||||
for (u32 k = 0; k < 8; k++) {
|
||||
printf("%s", ((val >> k) & 0x1) ? "1" : "0");
|
||||
}
|
||||
printf(" ");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
return fdr;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
aligned_unique_ptr<FDR> teddyBuildTableHinted(const vector<hwlmLiteral> &lits,
|
||||
bool make_small, u32 hint,
|
||||
const target_t &target,
|
||||
pair<u8 *, size_t> link) {
|
||||
unique_ptr<TeddyEngineDescription> des;
|
||||
if (hint == HINT_INVALID) {
|
||||
des = chooseTeddyEngine(target, lits);
|
||||
} else {
|
||||
des = getTeddyDescription(hint);
|
||||
}
|
||||
if (!des) {
|
||||
return nullptr;
|
||||
}
|
||||
TeddyCompiler tc(lits, *des, make_small);
|
||||
return tc.build(link);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
56
src/fdr/teddy_compile.h
Normal file
56
src/fdr/teddy_compile.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief FDR literal matcher: Teddy build API.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_COMPILE_H
|
||||
#define TEDDY_COMPILE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <vector>
|
||||
#include <utility> // std::pair
|
||||
|
||||
struct FDR;
|
||||
struct target_t;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct hwlmLiteral;
|
||||
|
||||
ue2::aligned_unique_ptr<FDR>
|
||||
teddyBuildTableHinted(const std::vector<hwlmLiteral> &lits, bool make_small,
|
||||
u32 hint, const target_t &target,
|
||||
std::pair<u8 *, size_t> link);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // TEDDY_COMPILE_H
|
||||
207
src/fdr/teddy_engine_description.cpp
Normal file
207
src/fdr/teddy_engine_description.cpp
Normal file
@@ -0,0 +1,207 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "fdr.h"
|
||||
#include "fdr_internal.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
#include "fdr_confirm.h"
|
||||
#include "ue2common.h"
|
||||
#include "hs_internal.h"
|
||||
#include "fdr_engine_description.h"
|
||||
#include "teddy_internal.h"
|
||||
#include "teddy_engine_description.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
TeddyEngineDescription::TeddyEngineDescription(const TeddyEngineDef &def)
|
||||
: EngineDescription(def.id, targetByArchFeatures(def.cpu_features),
|
||||
def.numBuckets, def.confirmPullBackDistance,
|
||||
def.confirmTopLevelSplit),
|
||||
numMasks(def.numMasks), packed(def.packed) {}
|
||||
|
||||
u32 TeddyEngineDescription::getDefaultFloodSuffixLength() const {
|
||||
return numMasks;
|
||||
}
|
||||
|
||||
bool TeddyEngineDescription::needConfirm(const vector<hwlmLiteral> &lits) const {
|
||||
if (packed || lits.size() > getNumBuckets()) {
|
||||
return true;
|
||||
}
|
||||
for (const auto &lit : lits) {
|
||||
if (lit.s.size() > numMasks || !lit.msk.empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#include "teddy_autogen_compiler.cpp"
|
||||
|
||||
static
|
||||
size_t maxFloodTailLen(const vector<hwlmLiteral> &vl) {
|
||||
size_t max_flood_tail = 0;
|
||||
for (const auto &lit : vl) {
|
||||
const string &s = lit.s;
|
||||
assert(!s.empty());
|
||||
size_t j;
|
||||
for (j = 1; j < s.length(); j++) {
|
||||
if (s[s.length() - j - 1] != s[s.length() - 1]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
max_flood_tail = max(max_flood_tail, j);
|
||||
}
|
||||
return max_flood_tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief True if this Teddy engine is qualified to handle this set of literals
|
||||
* on this target.
|
||||
*/
|
||||
static
|
||||
bool isAllowed(const vector<hwlmLiteral> &vl, const TeddyEngineDescription &eng,
|
||||
const size_t max_lit_len, const target_t &target) {
|
||||
if (!eng.isValidOnTarget(target)) {
|
||||
DEBUG_PRINTF("%u disallowed: not valid on target\n", eng.getID());
|
||||
return false;
|
||||
}
|
||||
if (eng.getNumBuckets() < vl.size() && !eng.packed) {
|
||||
DEBUG_PRINTF("%u disallowed: num buckets < num lits and not packed\n",
|
||||
eng.getID());
|
||||
return false;
|
||||
}
|
||||
if (eng.getNumBuckets() * TEDDY_BUCKET_LOAD < vl.size()) {
|
||||
DEBUG_PRINTF("%u disallowed: too many lits for num buckets\n",
|
||||
eng.getID());
|
||||
return false;
|
||||
}
|
||||
if (eng.numMasks > max_lit_len) {
|
||||
DEBUG_PRINTF("%u disallowed: more masks than max lit len (%zu)\n",
|
||||
eng.getID(), max_lit_len);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (vl.size() > 40) {
|
||||
u32 n_small_lits = 0;
|
||||
for (const auto &lit : vl) {
|
||||
if (lit.s.length() < eng.numMasks) {
|
||||
n_small_lits++;
|
||||
}
|
||||
}
|
||||
if (n_small_lits * 5 > vl.size()) {
|
||||
DEBUG_PRINTF("too many short literals (%u)\n", n_small_lits);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unique_ptr<TeddyEngineDescription>
|
||||
chooseTeddyEngine(const target_t &target, const vector<hwlmLiteral> &vl) {
|
||||
vector<TeddyEngineDescription> descs;
|
||||
getTeddyDescriptions(&descs);
|
||||
const TeddyEngineDescription *best = nullptr;
|
||||
|
||||
const size_t max_lit_len = maxLen(vl);
|
||||
const size_t max_flood_tail = maxFloodTailLen(vl);
|
||||
DEBUG_PRINTF("%zu lits, max_lit_len=%zu, max_flood_tail=%zu\n", vl.size(),
|
||||
max_lit_len, max_flood_tail);
|
||||
|
||||
u32 best_score = 0;
|
||||
for (size_t engineID = 0; engineID < descs.size(); engineID++) {
|
||||
const TeddyEngineDescription &eng = descs[engineID];
|
||||
if (!isAllowed(vl, eng, max_lit_len, target)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 score = 0;
|
||||
|
||||
// We prefer unpacked Teddy models.
|
||||
if (!eng.packed) {
|
||||
score += 100;
|
||||
}
|
||||
|
||||
// If we're heavily loaded, we prefer to have more masks.
|
||||
if (vl.size() > 4 * eng.getNumBuckets()) {
|
||||
score += eng.numMasks * 4;
|
||||
} else {
|
||||
// Lightly loaded cases are great.
|
||||
score += 100;
|
||||
}
|
||||
|
||||
// We want enough masks to avoid becoming flood-prone.
|
||||
if (eng.numMasks > max_flood_tail) {
|
||||
score += 50;
|
||||
}
|
||||
|
||||
// We prefer having 3 masks. 3 is just right.
|
||||
score += 6 / (abs(3 - (int)eng.numMasks) + 1);
|
||||
|
||||
// We prefer cheaper, smaller Teddy models.
|
||||
score += 16 / eng.getNumBuckets();
|
||||
|
||||
DEBUG_PRINTF("teddy %u: masks=%u, buckets=%u, packed=%u "
|
||||
"-> score=%u\n",
|
||||
eng.getID(), eng.numMasks, eng.getNumBuckets(),
|
||||
eng.packed ? 1U : 0U, score);
|
||||
|
||||
if (!best || score > best_score) {
|
||||
best = ŋ
|
||||
best_score = score;
|
||||
}
|
||||
}
|
||||
|
||||
if (!best) {
|
||||
DEBUG_PRINTF("failed to find engine\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("using engine %u\n", best->getID());
|
||||
return ue2::make_unique<TeddyEngineDescription>(*best);
|
||||
}
|
||||
|
||||
unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID) {
|
||||
vector<TeddyEngineDescription> descs;
|
||||
getTeddyDescriptions(&descs);
|
||||
|
||||
for (const auto &desc : descs) {
|
||||
if (desc.getID() == engineID) {
|
||||
return ue2::make_unique<TeddyEngineDescription>(desc);
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
70
src/fdr/teddy_engine_description.h
Normal file
70
src/fdr/teddy_engine_description.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_ENGINE_DESCRIPTION_H
|
||||
#define TEDDY_ENGINE_DESCRIPTION_H
|
||||
|
||||
#include "engine_description.h"
|
||||
#include "fdr_compile_internal.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define TEDDY_BUCKET_LOAD 6
|
||||
|
||||
struct TeddyEngineDef {
|
||||
u32 id;
|
||||
u64a cpu_features;
|
||||
u32 numMasks;
|
||||
u32 numBuckets;
|
||||
bool packed;
|
||||
u32 confirmPullBackDistance;
|
||||
u32 confirmTopLevelSplit;
|
||||
};
|
||||
|
||||
class TeddyEngineDescription : public EngineDescription {
|
||||
public:
|
||||
u32 numMasks;
|
||||
bool packed;
|
||||
|
||||
explicit TeddyEngineDescription(const TeddyEngineDef &def);
|
||||
|
||||
u32 getDefaultFloodSuffixLength() const override;
|
||||
bool needConfirm(const std::vector<hwlmLiteral> &lits) const;
|
||||
};
|
||||
|
||||
std::unique_ptr<TeddyEngineDescription>
|
||||
chooseTeddyEngine(const target_t &target, const std::vector<hwlmLiteral> &vl);
|
||||
std::unique_ptr<TeddyEngineDescription> getTeddyDescription(u32 engineID);
|
||||
void getTeddyDescriptions(std::vector<TeddyEngineDescription> *out);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
46
src/fdr/teddy_internal.h
Normal file
46
src/fdr/teddy_internal.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef TEDDY_INTERNAL_H
|
||||
#define TEDDY_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
// first part is compatible with an FDR
|
||||
struct Teddy {
|
||||
u32 engineID;
|
||||
u32 size;
|
||||
u32 maxStringLen;
|
||||
u32 floodOffset;
|
||||
u32 link;
|
||||
u32 pad1;
|
||||
u32 pad2;
|
||||
u32 pad3;
|
||||
};
|
||||
|
||||
#endif
|
||||
374
src/grey.cpp
Normal file
374
src/grey.cpp
Normal file
@@ -0,0 +1,374 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "grey.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib> // exit
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define DEFAULT_MAX_HISTORY 60
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
Grey::Grey(void) :
|
||||
optimiseComponentTree(true),
|
||||
performGraphSimplification(true),
|
||||
prefilterReductions(true),
|
||||
removeEdgeRedundancy(true),
|
||||
allowGough(true),
|
||||
allowHaigLit(true),
|
||||
allowLitHaig(true),
|
||||
allowLbr(true),
|
||||
allowMcClellan(true),
|
||||
allowPuff(true),
|
||||
allowRose(true),
|
||||
allowExtendedNFA(true), /* bounded repeats of course */
|
||||
allowLimExNFA(true),
|
||||
allowSidecar(true),
|
||||
allowAnchoredAcyclic(true),
|
||||
allowSmallLiteralSet(true),
|
||||
allowCastle(true),
|
||||
allowDecoratedLiteral(true),
|
||||
allowNoodle(true),
|
||||
fdrAllowTeddy(true),
|
||||
puffImproveHead(true),
|
||||
castleExclusive(true),
|
||||
mergeSEP(true), /* short exhaustible passthroughs */
|
||||
mergeRose(true), // roses inside rose
|
||||
mergeSuffixes(true), // suffix nfas inside rose
|
||||
mergeOutfixes(true),
|
||||
onlyOneOutfix(false),
|
||||
allowShermanStates(true),
|
||||
allowMcClellan8(true),
|
||||
highlanderPruneDFA(true),
|
||||
minimizeDFA(true),
|
||||
accelerateDFA(true),
|
||||
accelerateNFA(true),
|
||||
reverseAccelerate(true),
|
||||
squashNFA(true),
|
||||
compressNFAState(true),
|
||||
numberNFAStatesWrong(false), /* debugging only */
|
||||
highlanderSquash(true),
|
||||
allowZombies(true),
|
||||
floodAsPuffette(false),
|
||||
nfaForceSize(0),
|
||||
nfaForceShifts(0),
|
||||
maxHistoryAvailable(DEFAULT_MAX_HISTORY),
|
||||
minHistoryAvailable(0), /* debugging only */
|
||||
maxAnchoredRegion(63), /* for rose's atable to run over */
|
||||
minRoseLiteralLength(3),
|
||||
minRoseNetflowLiteralLength(2),
|
||||
maxRoseNetflowEdges(50000), /* otherwise no netflow pass. */
|
||||
minExtBoundedRepeatSize(32),
|
||||
goughCopyPropagate(true),
|
||||
goughRegisterAllocate(true),
|
||||
shortcutLiterals(true),
|
||||
roseGraphReduction(true),
|
||||
roseRoleAliasing(true),
|
||||
roseMasks(true),
|
||||
roseMaxBadLeafLength(5),
|
||||
roseConvertInfBadLeaves(true),
|
||||
roseConvertFloodProneSuffixes(true),
|
||||
roseMergeRosesDuringAliasing(true),
|
||||
roseMultiTopRoses(true),
|
||||
roseHamsterMasks(true),
|
||||
roseLookaroundMasks(true),
|
||||
roseMcClellanPrefix(1),
|
||||
roseMcClellanSuffix(1),
|
||||
roseMcClellanOutfix(2),
|
||||
roseTransformDelay(true),
|
||||
roseDesiredSplit(4),
|
||||
earlyMcClellanPrefix(true),
|
||||
earlyMcClellanInfix(true),
|
||||
earlyMcClellanSuffix(true),
|
||||
allowCountingMiracles(true),
|
||||
allowSomChain(true),
|
||||
somMaxRevNfaLength(126),
|
||||
hamsterAccelForward(true),
|
||||
hamsterAccelReverse(false),
|
||||
miracleHistoryBonus(16),
|
||||
equivalenceEnable(true),
|
||||
|
||||
allowSmallWrite(true), // McClellan dfas for small patterns
|
||||
|
||||
smallWriteLargestBuffer(70), // largest buffer that can be
|
||||
// considered a small write
|
||||
// all blocks larger than this
|
||||
// are given to rose &co
|
||||
smallWriteLargestBufferBad(35),
|
||||
limitSmallWriteOutfixSize(1048576), // 1 MB
|
||||
dumpFlags(0),
|
||||
limitPatternCount(8000000), // 8M patterns
|
||||
limitPatternLength(16000), // 16K bytes
|
||||
limitGraphVertices(500000), // 500K vertices
|
||||
limitGraphEdges(1000000), // 1M edges
|
||||
limitReportCount(4*8000000),
|
||||
limitLiteralCount(8000000), // 8M literals
|
||||
limitLiteralLength(16000),
|
||||
limitLiteralMatcherChars(1073741824), // 1 GB
|
||||
limitLiteralMatcherSize(1073741824), // 1 GB
|
||||
limitRoseRoleCount(4*8000000),
|
||||
limitRoseEngineCount(8000000), // 8M engines
|
||||
limitRoseAnchoredSize(1073741824), // 1 GB
|
||||
limitEngineSize(1073741824), // 1 GB
|
||||
limitDFASize(1073741824), // 1 GB
|
||||
limitNFASize(1048576), // 1 MB
|
||||
limitLBRSize(1048576) // 1 MB
|
||||
{
|
||||
assert(maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
using boost::lexical_cast;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void applyGreyOverrides(Grey *g, const string &s) {
|
||||
string::const_iterator p = s.begin();
|
||||
string::const_iterator pe = s.end();
|
||||
string help = "help:0";
|
||||
bool invalid_key_seen = false;
|
||||
Grey defaultg;
|
||||
|
||||
if (s == "help" || s == "help:") {
|
||||
printf("Valid grey overrides:\n");
|
||||
p = help.begin();
|
||||
pe = help.end();
|
||||
}
|
||||
|
||||
while (p != pe) {
|
||||
string::const_iterator ke = find(p, pe, ':');
|
||||
|
||||
if (ke == pe) {
|
||||
break;
|
||||
}
|
||||
|
||||
string key(p, ke);
|
||||
|
||||
string::const_iterator ve = find(ke, pe, ';');
|
||||
|
||||
unsigned int value = lexical_cast<unsigned int>(string(ke + 1, ve));
|
||||
bool done = false;
|
||||
|
||||
/* surely there exists a nice template to go with this macro to make
|
||||
* all the boring code disappear */
|
||||
#define G_UPDATE(k) do { \
|
||||
if (key == ""#k) { g->k = value; done = 1;} \
|
||||
if (key == "help") { \
|
||||
printf("\t%-30s\tdefault: %s\n", #k, \
|
||||
lexical_cast<string>(defaultg.k).c_str()); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
G_UPDATE(optimiseComponentTree);
|
||||
G_UPDATE(performGraphSimplification);
|
||||
G_UPDATE(prefilterReductions);
|
||||
G_UPDATE(removeEdgeRedundancy);
|
||||
G_UPDATE(allowGough);
|
||||
G_UPDATE(allowHaigLit);
|
||||
G_UPDATE(allowLitHaig);
|
||||
G_UPDATE(allowLbr);
|
||||
G_UPDATE(allowMcClellan);
|
||||
G_UPDATE(allowPuff);
|
||||
G_UPDATE(allowRose);
|
||||
G_UPDATE(allowExtendedNFA);
|
||||
G_UPDATE(allowLimExNFA);
|
||||
G_UPDATE(allowSidecar);
|
||||
G_UPDATE(allowAnchoredAcyclic);
|
||||
G_UPDATE(allowSmallLiteralSet);
|
||||
G_UPDATE(allowCastle);
|
||||
G_UPDATE(allowDecoratedLiteral);
|
||||
G_UPDATE(allowNoodle);
|
||||
G_UPDATE(fdrAllowTeddy);
|
||||
G_UPDATE(puffImproveHead);
|
||||
G_UPDATE(castleExclusive);
|
||||
G_UPDATE(mergeSEP);
|
||||
G_UPDATE(mergeRose);
|
||||
G_UPDATE(mergeSuffixes);
|
||||
G_UPDATE(mergeOutfixes);
|
||||
G_UPDATE(onlyOneOutfix);
|
||||
G_UPDATE(allowShermanStates);
|
||||
G_UPDATE(allowMcClellan8);
|
||||
G_UPDATE(highlanderPruneDFA);
|
||||
G_UPDATE(minimizeDFA);
|
||||
G_UPDATE(accelerateDFA);
|
||||
G_UPDATE(accelerateNFA);
|
||||
G_UPDATE(reverseAccelerate);
|
||||
G_UPDATE(squashNFA);
|
||||
G_UPDATE(compressNFAState);
|
||||
G_UPDATE(numberNFAStatesWrong);
|
||||
G_UPDATE(allowZombies);
|
||||
G_UPDATE(floodAsPuffette);
|
||||
G_UPDATE(nfaForceSize);
|
||||
G_UPDATE(nfaForceShifts);
|
||||
G_UPDATE(highlanderSquash);
|
||||
G_UPDATE(maxHistoryAvailable);
|
||||
G_UPDATE(minHistoryAvailable);
|
||||
G_UPDATE(maxAnchoredRegion);
|
||||
G_UPDATE(minRoseLiteralLength);
|
||||
G_UPDATE(minRoseNetflowLiteralLength);
|
||||
G_UPDATE(maxRoseNetflowEdges);
|
||||
G_UPDATE(minExtBoundedRepeatSize);
|
||||
G_UPDATE(goughCopyPropagate);
|
||||
G_UPDATE(goughRegisterAllocate);
|
||||
G_UPDATE(shortcutLiterals);
|
||||
G_UPDATE(roseGraphReduction);
|
||||
G_UPDATE(roseRoleAliasing);
|
||||
G_UPDATE(roseMasks);
|
||||
G_UPDATE(roseMaxBadLeafLength);
|
||||
G_UPDATE(roseConvertInfBadLeaves);
|
||||
G_UPDATE(roseConvertFloodProneSuffixes);
|
||||
G_UPDATE(roseMergeRosesDuringAliasing);
|
||||
G_UPDATE(roseMultiTopRoses);
|
||||
G_UPDATE(roseHamsterMasks);
|
||||
G_UPDATE(roseLookaroundMasks);
|
||||
G_UPDATE(roseMcClellanPrefix);
|
||||
G_UPDATE(roseMcClellanSuffix);
|
||||
G_UPDATE(roseMcClellanOutfix);
|
||||
G_UPDATE(roseTransformDelay);
|
||||
G_UPDATE(roseDesiredSplit);
|
||||
G_UPDATE(earlyMcClellanPrefix);
|
||||
G_UPDATE(earlyMcClellanInfix);
|
||||
G_UPDATE(earlyMcClellanSuffix);
|
||||
G_UPDATE(allowSomChain);
|
||||
G_UPDATE(allowCountingMiracles);
|
||||
G_UPDATE(somMaxRevNfaLength);
|
||||
G_UPDATE(hamsterAccelForward);
|
||||
G_UPDATE(hamsterAccelReverse);
|
||||
G_UPDATE(miracleHistoryBonus);
|
||||
G_UPDATE(equivalenceEnable);
|
||||
G_UPDATE(allowSmallWrite);
|
||||
G_UPDATE(smallWriteLargestBuffer);
|
||||
G_UPDATE(smallWriteLargestBufferBad);
|
||||
G_UPDATE(limitSmallWriteOutfixSize);
|
||||
G_UPDATE(limitPatternCount);
|
||||
G_UPDATE(limitPatternLength);
|
||||
G_UPDATE(limitGraphVertices);
|
||||
G_UPDATE(limitGraphEdges);
|
||||
G_UPDATE(limitReportCount);
|
||||
G_UPDATE(limitLiteralCount);
|
||||
G_UPDATE(limitLiteralLength);
|
||||
G_UPDATE(limitLiteralMatcherChars);
|
||||
G_UPDATE(limitLiteralMatcherSize);
|
||||
G_UPDATE(limitRoseRoleCount);
|
||||
G_UPDATE(limitRoseEngineCount);
|
||||
G_UPDATE(limitRoseAnchoredSize);
|
||||
G_UPDATE(limitEngineSize);
|
||||
G_UPDATE(limitDFASize);
|
||||
G_UPDATE(limitNFASize);
|
||||
G_UPDATE(limitLBRSize);
|
||||
|
||||
#undef G_UPDATE
|
||||
if (key == "simple_som") {
|
||||
g->allowHaigLit = false;
|
||||
g->allowLitHaig = false;
|
||||
g->allowSomChain = false;
|
||||
g->somMaxRevNfaLength = 0;
|
||||
done = true;
|
||||
}
|
||||
if (key == "forceOutfixesNFA") {
|
||||
g->allowAnchoredAcyclic = false;
|
||||
g->allowCastle = false;
|
||||
g->allowDecoratedLiteral = false;
|
||||
g->allowGough = false;
|
||||
g->allowHaigLit = false;
|
||||
g->allowLbr = false;
|
||||
g->allowLimExNFA = true;
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = false;
|
||||
g->allowPuff = false;
|
||||
g->allowRose = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
}
|
||||
if (key == "forceOutfixesDFA") {
|
||||
g->allowAnchoredAcyclic = false;
|
||||
g->allowCastle = false;
|
||||
g->allowDecoratedLiteral = false;
|
||||
g->allowGough = false;
|
||||
g->allowHaigLit = false;
|
||||
g->allowLbr = false;
|
||||
g->allowLimExNFA = false;
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = true;
|
||||
g->allowPuff = false;
|
||||
g->allowRose = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
}
|
||||
if (key == "forceOutfixes") {
|
||||
g->allowAnchoredAcyclic = false;
|
||||
g->allowCastle = false;
|
||||
g->allowDecoratedLiteral = false;
|
||||
g->allowGough = true;
|
||||
g->allowHaigLit = false;
|
||||
g->allowLbr = false;
|
||||
g->allowLimExNFA = true;
|
||||
g->allowLitHaig = false;
|
||||
g->allowMcClellan = true;
|
||||
g->allowPuff = false;
|
||||
g->allowRose = false;
|
||||
g->allowSmallLiteralSet = false;
|
||||
g->roseMasks = false;
|
||||
done = true;
|
||||
}
|
||||
|
||||
if (!done && key != "help") {
|
||||
printf("Invalid grey override key %s:%u\n", key.c_str(), value);
|
||||
invalid_key_seen = true;
|
||||
}
|
||||
|
||||
p = ve;
|
||||
|
||||
if (p != pe) {
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid_key_seen) {
|
||||
applyGreyOverrides(g, "help");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
assert(g->maxAnchoredRegion < 64); /* a[lm]_log_sum have limited capacity */
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
197
src/grey.h
Normal file
197
src/grey.h
Normal file
@@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GREY_H
|
||||
#define GREY_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct Grey {
|
||||
Grey(void);
|
||||
|
||||
bool optimiseComponentTree;
|
||||
|
||||
bool performGraphSimplification;
|
||||
bool prefilterReductions;
|
||||
bool removeEdgeRedundancy;
|
||||
|
||||
bool allowGough;
|
||||
bool allowHaigLit;
|
||||
bool allowLitHaig;
|
||||
bool allowLbr;
|
||||
bool allowMcClellan;
|
||||
bool allowPuff;
|
||||
bool allowRose;
|
||||
bool allowExtendedNFA;
|
||||
bool allowLimExNFA;
|
||||
bool allowSidecar;
|
||||
bool allowAnchoredAcyclic;
|
||||
bool allowSmallLiteralSet;
|
||||
bool allowCastle;
|
||||
bool allowDecoratedLiteral;
|
||||
|
||||
bool allowNoodle;
|
||||
bool fdrAllowTeddy;
|
||||
|
||||
bool puffImproveHead;
|
||||
bool castleExclusive; // enable castle mutual exclusion analysis
|
||||
|
||||
bool mergeSEP;
|
||||
bool mergeRose;
|
||||
bool mergeSuffixes;
|
||||
bool mergeOutfixes;
|
||||
bool onlyOneOutfix; // if > 1 outfix, fail compile
|
||||
|
||||
bool allowShermanStates;
|
||||
bool allowMcClellan8;
|
||||
bool highlanderPruneDFA;
|
||||
bool minimizeDFA;
|
||||
|
||||
bool accelerateDFA;
|
||||
bool accelerateNFA;
|
||||
bool reverseAccelerate;
|
||||
|
||||
bool squashNFA;
|
||||
bool compressNFAState;
|
||||
bool numberNFAStatesWrong;
|
||||
bool highlanderSquash;
|
||||
bool allowZombies;
|
||||
bool floodAsPuffette;
|
||||
|
||||
u32 nfaForceSize;
|
||||
u32 nfaForceShifts;
|
||||
|
||||
u32 maxHistoryAvailable;
|
||||
u32 minHistoryAvailable;
|
||||
u32 maxAnchoredRegion;
|
||||
u32 minRoseLiteralLength;
|
||||
u32 minRoseNetflowLiteralLength;
|
||||
u32 maxRoseNetflowEdges;
|
||||
|
||||
u32 minExtBoundedRepeatSize; /* to be considered for ng_repeat */
|
||||
|
||||
bool goughCopyPropagate;
|
||||
bool goughRegisterAllocate;
|
||||
|
||||
bool shortcutLiterals;
|
||||
|
||||
bool roseGraphReduction;
|
||||
bool roseRoleAliasing;
|
||||
bool roseMasks;
|
||||
u32 roseMaxBadLeafLength;
|
||||
bool roseConvertInfBadLeaves;
|
||||
bool roseConvertFloodProneSuffixes;
|
||||
bool roseMergeRosesDuringAliasing;
|
||||
bool roseMultiTopRoses;
|
||||
bool roseHamsterMasks;
|
||||
bool roseLookaroundMasks;
|
||||
u32 roseMcClellanPrefix; /* 0 = off, 1 = only if large nfa, 2 = always */
|
||||
u32 roseMcClellanSuffix; /* 0 = off, 1 = only if very large nfa, 2 =
|
||||
* always */
|
||||
u32 roseMcClellanOutfix; /* 0 = off, 1 = sometimes, 2 = almost always */
|
||||
bool roseTransformDelay;
|
||||
u32 roseDesiredSplit;
|
||||
|
||||
bool earlyMcClellanPrefix;
|
||||
bool earlyMcClellanInfix;
|
||||
bool earlyMcClellanSuffix;
|
||||
|
||||
bool allowCountingMiracles;
|
||||
|
||||
bool allowSomChain;
|
||||
u32 somMaxRevNfaLength;
|
||||
|
||||
bool hamsterAccelForward;
|
||||
bool hamsterAccelReverse; // currently not implemented
|
||||
|
||||
u32 miracleHistoryBonus; /* cheap hack to make miracles better, TODO
|
||||
* something dignified */
|
||||
|
||||
bool equivalenceEnable;
|
||||
|
||||
// SmallWrite engine
|
||||
bool allowSmallWrite;
|
||||
u32 smallWriteLargestBuffer; // largest buffer that can be small write
|
||||
u32 smallWriteLargestBufferBad;// largest buffer that can be small write
|
||||
u32 limitSmallWriteOutfixSize; //!< max total size of outfix DFAs
|
||||
|
||||
enum DumpFlags {
|
||||
DUMP_NONE = 0,
|
||||
DUMP_BASICS = 1 << 0, // Dump basic textual data
|
||||
DUMP_PARSE = 1 << 1, // Dump component tree to .txt
|
||||
DUMP_INT_GRAPH = 1 << 2, // Dump non-implementation graphs
|
||||
DUMP_IMPL = 1 << 3 // Dump implementation graphs
|
||||
};
|
||||
|
||||
u32 dumpFlags;
|
||||
std::string dumpPath;
|
||||
|
||||
/* Resource limits. These are somewhat arbitrary, but are intended to bound
|
||||
* the input to many of our internal structures. Exceeding one of these
|
||||
* limits will cause an error to be returned to the user.
|
||||
*
|
||||
* NOTE: Raising these limitations make cause smoke to come out of parts of
|
||||
* the runtime. */
|
||||
|
||||
u32 limitPatternCount; //!< max number of patterns
|
||||
u32 limitPatternLength; //!< max number of characters in a regex
|
||||
u32 limitGraphVertices; //!< max number of states in built NFA graph
|
||||
u32 limitGraphEdges; //!< max number of edges in build NFA graph
|
||||
u32 limitReportCount; //!< max number of ReportIDs allocated internally
|
||||
|
||||
// HWLM literal matcher limits.
|
||||
u32 limitLiteralCount; //!< max number of literals in an HWLM table
|
||||
u32 limitLiteralLength; //!< max number of characters in a literal
|
||||
u32 limitLiteralMatcherChars; //!< max characters in an HWLM literal matcher
|
||||
u32 limitLiteralMatcherSize; //!< max size of an HWLM matcher (in bytes)
|
||||
|
||||
// Rose limits.
|
||||
u32 limitRoseRoleCount; //!< max number of Rose roles
|
||||
u32 limitRoseEngineCount; //!< max prefix/infix/suffix/outfix engines
|
||||
u32 limitRoseAnchoredSize; //!< max total size of anchored DFAs (bytes)
|
||||
|
||||
// Engine (DFA/NFA/etc) limits.
|
||||
u32 limitEngineSize; //!< max size of an engine (in bytes)
|
||||
u32 limitDFASize; //!< max size of a DFA (in bytes)
|
||||
u32 limitNFASize; //!< max size of an NFA (in bytes)
|
||||
u32 limitLBRSize; //!< max size of an LBR engine (in bytes)
|
||||
};
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
#include <string>
|
||||
void applyGreyOverrides(Grey *g, const std::string &overrides);
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
419
src/hs.cpp
Normal file
419
src/hs.cpp
Normal file
@@ -0,0 +1,419 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Compiler front-end, including public API calls for compilation.
|
||||
*/
|
||||
#include "allocator.h"
|
||||
#include "ue2common.h"
|
||||
#include "grey.h"
|
||||
#include "hs_compile.h"
|
||||
#include "hs_internal.h"
|
||||
#include "database.h"
|
||||
#include "compiler/compiler.h"
|
||||
#include "compiler/error.h"
|
||||
#include "nfagraph/ng.h"
|
||||
#include "nfagraph/ng_expr_info.h"
|
||||
#include "parser/parse_error.h"
|
||||
#include "parser/Parser.h"
|
||||
#include "parser/prefilter.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/cpuid_flags.h"
|
||||
#include "util/depth.h"
|
||||
#include "util/popcount.h"
|
||||
#include "util/target_info.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <limits.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
using namespace ue2;
|
||||
|
||||
/** \brief Cheap check that no unexpected mode flags are on. */
|
||||
static
|
||||
bool validModeFlags(unsigned int mode) {
|
||||
static const unsigned allModeFlags = HS_MODE_BLOCK
|
||||
| HS_MODE_STREAM
|
||||
| HS_MODE_VECTORED
|
||||
| HS_MODE_SOM_HORIZON_LARGE
|
||||
| HS_MODE_SOM_HORIZON_MEDIUM
|
||||
| HS_MODE_SOM_HORIZON_SMALL;
|
||||
|
||||
return !(mode & ~allModeFlags);
|
||||
}
|
||||
|
||||
/** \brief Validate mode flags. */
|
||||
static
|
||||
bool checkMode(unsigned int mode, hs_compile_error **comp_error) {
|
||||
// First, check that only bits with meaning are on.
|
||||
if (!validModeFlags(mode)) {
|
||||
*comp_error = generateCompileError("Invalid parameter: "
|
||||
"unrecognised mode flags.", -1);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Our mode must be ONE of (block, streaming, vectored).
|
||||
unsigned checkmode
|
||||
= mode & (HS_MODE_STREAM | HS_MODE_BLOCK | HS_MODE_VECTORED);
|
||||
if (popcount32(checkmode) != 1) {
|
||||
*comp_error = generateCompileError(
|
||||
"Invalid parameter: mode must have one "
|
||||
"(and only one) of HS_MODE_BLOCK, HS_MODE_STREAM or "
|
||||
"HS_MODE_VECTORED set.",
|
||||
-1);
|
||||
return false;
|
||||
}
|
||||
|
||||
// If you specify SOM precision, you must be in streaming mode and you only
|
||||
// get to have one.
|
||||
unsigned somMode = mode & (HS_MODE_SOM_HORIZON_LARGE |
|
||||
HS_MODE_SOM_HORIZON_MEDIUM |
|
||||
HS_MODE_SOM_HORIZON_SMALL);
|
||||
if (somMode) {
|
||||
if (!(mode & HS_MODE_STREAM)) {
|
||||
*comp_error = generateCompileError("Invalid parameter: the "
|
||||
"HS_MODE_SOM_HORIZON_ mode flags may only be set in "
|
||||
"streaming mode.", -1);
|
||||
return false;
|
||||
|
||||
}
|
||||
if ((somMode & (somMode - 1)) != 0) {
|
||||
*comp_error = generateCompileError("Invalid parameter: only one "
|
||||
"HS_MODE_SOM_HORIZON_ mode flag can be set.", -1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool checkPlatform(const hs_platform_info *p, hs_compile_error **comp_error) {
|
||||
#define HS_TUNE_LAST HS_TUNE_FAMILY_BDW
|
||||
#define HS_CPU_FEATURES_ALL (HS_CPU_FEATURES_AVX2)
|
||||
|
||||
if (!p) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (p->cpu_features & ~HS_CPU_FEATURES_ALL) {
|
||||
*comp_error = generateCompileError("Invalid cpu features specified in "
|
||||
"the platform information.", -1);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (p->tune > HS_TUNE_LAST) {
|
||||
*comp_error = generateCompileError("Invalid tuning value specified in "
|
||||
"the platform information.", -1);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \brief Convert from SOM mode to bytes of precision. */
|
||||
static
|
||||
unsigned getSomPrecision(unsigned mode) {
|
||||
if (mode & HS_MODE_VECTORED) {
|
||||
/* always assume full precision for vectoring */
|
||||
return 8;
|
||||
}
|
||||
|
||||
if (mode & HS_MODE_SOM_HORIZON_LARGE) {
|
||||
return 8;
|
||||
} else if (mode & HS_MODE_SOM_HORIZON_MEDIUM) {
|
||||
return 4;
|
||||
} else if (mode & HS_MODE_SOM_HORIZON_SMALL) {
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
hs_error_t
|
||||
hs_compile_multi_int(const char *const *expressions, const unsigned *flags,
|
||||
const unsigned *ids, const hs_expr_ext *const *ext,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform, hs_database_t **db,
|
||||
hs_compile_error_t **comp_error, const Grey &g) {
|
||||
// Check the args: note that it's OK for flags, ids or ext to be null.
|
||||
if (!comp_error) {
|
||||
if (db) {
|
||||
*db = nullptr;
|
||||
}
|
||||
// nowhere to write the string, but we can still report an error code
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (!db) {
|
||||
*comp_error = generateCompileError("Invalid parameter: db is NULL", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (!expressions) {
|
||||
*db = nullptr;
|
||||
*comp_error
|
||||
= generateCompileError("Invalid parameter: expressions is NULL",
|
||||
-1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
if (elements == 0) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError("Invalid parameter: elements is zero", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkMode(mode, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkMode.
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!checkPlatform(platform, comp_error)) {
|
||||
*db = nullptr;
|
||||
assert(*comp_error); // set by checkPlatform.
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (elements > g.limitPatternCount) {
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError("Number of patterns too large", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
// This function is simply a wrapper around both the parser and compiler
|
||||
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
|
||||
bool isVectored = mode & HS_MODE_VECTORED;
|
||||
unsigned somPrecision = getSomPrecision(mode);
|
||||
|
||||
target_t target_info = platform ? target_t(*platform)
|
||||
: get_current_target();
|
||||
|
||||
CompileContext cc(isStreaming, isVectored, target_info, g);
|
||||
NG ng(cc, somPrecision);
|
||||
|
||||
try {
|
||||
for (unsigned int i = 0; i < elements; i++) {
|
||||
// Add this expression to the compiler
|
||||
try {
|
||||
addExpression(ng, i, expressions[i], flags ? flags[i] : 0,
|
||||
ext ? ext[i] : nullptr, ids ? ids[i] : 0);
|
||||
} catch (CompileError &e) {
|
||||
/* Caught a parse error:
|
||||
* throw it upstream as a CompileError with a specific index */
|
||||
e.setExpressionIndex(i);
|
||||
throw; /* do not slice */
|
||||
}
|
||||
}
|
||||
|
||||
unsigned length = 0;
|
||||
struct hs_database *out = build(ng, &length);
|
||||
|
||||
assert(out); // should have thrown exception on error
|
||||
assert(length);
|
||||
|
||||
*db = out;
|
||||
*comp_error = nullptr;
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*db = nullptr;
|
||||
*comp_error = generateCompileError(e.reason,
|
||||
e.hasIndex ? (int)e.index : -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*db = nullptr;
|
||||
*comp_error = const_cast<hs_compile_error_t *>(&hs_einternal);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_compile(const char *expression, unsigned flags, unsigned mode,
|
||||
const hs_platform_info_t *platform, hs_database_t **db,
|
||||
hs_compile_error_t **error) {
|
||||
if (expression == nullptr) {
|
||||
*db = nullptr;
|
||||
*error = generateCompileError("Invalid parameter: expression is NULL",
|
||||
-1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
unsigned id = 0; // single expressions get zero as an ID
|
||||
const hs_expr_ext * const *ext = nullptr; // unused for this call.
|
||||
|
||||
return hs_compile_multi_int(&expression, &flags, &id, ext, 1, mode,
|
||||
platform, db, error, Grey());
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_compile_multi(const char * const *expressions,
|
||||
const unsigned *flags, const unsigned *ids,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db, hs_compile_error_t **error) {
|
||||
const hs_expr_ext * const *ext = nullptr; // unused for this call.
|
||||
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
|
||||
platform, db, error, Grey());
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_compile_ext_multi(const char * const *expressions,
|
||||
const unsigned *flags, const unsigned *ids,
|
||||
const hs_expr_ext * const *ext,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **error) {
|
||||
return hs_compile_multi_int(expressions, flags, ids, ext, elements, mode,
|
||||
platform, db, error, Grey());
|
||||
}
|
||||
|
||||
static
|
||||
hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
|
||||
unsigned int mode, hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
if (!error) {
|
||||
// nowhere to write an error, but we can still return an error code.
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!info) {
|
||||
*error = generateCompileError("Invalid parameter: info is NULL", -1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
if (!expression) {
|
||||
*error = generateCompileError("Invalid parameter: expression is NULL",
|
||||
-1);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
*info = nullptr;
|
||||
*error = nullptr;
|
||||
|
||||
hs_expr_info local_info;
|
||||
memset(&local_info, 0, sizeof(local_info));
|
||||
|
||||
try {
|
||||
bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED);
|
||||
bool isVectored = mode & HS_MODE_VECTORED;
|
||||
|
||||
CompileContext cc(isStreaming, isVectored, get_current_target(),
|
||||
Grey());
|
||||
|
||||
// Ensure that our pattern isn't too long (in characters).
|
||||
if (strlen(expression) > cc.grey.limitPatternLength) {
|
||||
throw ParseError("Pattern length exceeds limit.");
|
||||
}
|
||||
|
||||
ReportManager rm(cc.grey);
|
||||
ParsedExpression pe(0, expression, flags, 0);
|
||||
assert(pe.component);
|
||||
|
||||
// Apply prefiltering transformations if desired.
|
||||
if (pe.prefilter) {
|
||||
prefilterTree(pe.component, ParseMode(flags));
|
||||
}
|
||||
|
||||
unique_ptr<NGWrapper> g = buildWrapper(rm, cc, pe);
|
||||
|
||||
if (!g) {
|
||||
DEBUG_PRINTF("NFA build failed, but no exception was thrown.\n");
|
||||
throw ParseError("Internal error.");
|
||||
}
|
||||
|
||||
fillExpressionInfo(rm, *g, &local_info);
|
||||
}
|
||||
catch (const CompileError &e) {
|
||||
// Compiler error occurred
|
||||
*error = generateCompileError(e);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (std::bad_alloc) {
|
||||
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
catch (...) {
|
||||
assert(!"Internal error, unexpected exception");
|
||||
*error = const_cast<hs_compile_error_t *>(&hs_einternal);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
hs_expr_info *rv = (hs_expr_info *)hs_misc_alloc(sizeof(*rv));
|
||||
if (!rv) {
|
||||
*error = const_cast<hs_compile_error_t *>(&hs_enomem);
|
||||
return HS_COMPILER_ERROR;
|
||||
}
|
||||
|
||||
*rv = local_info;
|
||||
*info = rv;
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error) {
|
||||
return hs_expression_info_int(expression, flags, HS_MODE_BLOCK, info,
|
||||
error);
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_populate_platform(hs_platform_info_t *platform) {
|
||||
if (!platform) {
|
||||
return HS_INVALID;
|
||||
}
|
||||
|
||||
memset(platform, 0, sizeof(*platform));
|
||||
|
||||
platform->cpu_features = cpuid_flags();
|
||||
platform->tune = cpuid_tune();
|
||||
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" HS_PUBLIC_API
|
||||
hs_error_t hs_free_compile_error(hs_compile_error_t *error) {
|
||||
freeCompileError(error);
|
||||
return HS_SUCCESS;
|
||||
}
|
||||
45
src/hs.h
Normal file
45
src/hs.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_H_
|
||||
#define HS_H_
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The complete Hyperscan API definition.
|
||||
*
|
||||
* Hyperscan is a high speed regular expression engine.
|
||||
*
|
||||
* This header includes both the Hyperscan compiler and runtime components. See
|
||||
* the individual component headers for documentation.
|
||||
*/
|
||||
|
||||
#include "hs_compile.h"
|
||||
#include "hs_runtime.h"
|
||||
|
||||
#endif /* HS_H_ */
|
||||
509
src/hs_common.h
Normal file
509
src/hs_common.h
Normal file
@@ -0,0 +1,509 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_COMMON_H_
|
||||
#define HS_COMMON_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Hyperscan common API definition.
|
||||
*
|
||||
* Hyperscan is a high speed regular expression engine.
|
||||
*
|
||||
* This header contains functions available to both the Hyperscan compiler and
|
||||
* runtime.
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
struct hs_database;
|
||||
|
||||
/**
|
||||
* A Hyperscan pattern database.
|
||||
*
|
||||
* Generated by one of the Hyperscan compiler functions:
|
||||
* - @ref hs_compile()
|
||||
* - @ref hs_compile_multi()
|
||||
* - @ref hs_compile_ext_multi()
|
||||
*/
|
||||
typedef struct hs_database hs_database_t;
|
||||
|
||||
/**
|
||||
* A type for errors returned by Hyperscan functions.
|
||||
*/
|
||||
typedef int hs_error_t;
|
||||
|
||||
/**
|
||||
* Free a compiled pattern database.
|
||||
*
|
||||
* The free callback set by @ref hs_set_database_allocator() (or @ref
|
||||
* hs_set_allocator()) will be used by this function.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database. NULL may also be safely provided, in which
|
||||
* case the function does nothing.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_free_database(hs_database_t *db);
|
||||
|
||||
/**
|
||||
* Serialize a pattern database to a stream of bytes.
|
||||
*
|
||||
* The allocator callback set by @ref hs_set_misc_allocator() (or @ref
|
||||
* hs_set_allocator()) will be used by this function.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param bytes
|
||||
* On success, a pointer to an array of bytes will be returned here.
|
||||
* These bytes can be subsequently relocated or written to disk. The
|
||||
* caller is responsible for freeing this block.
|
||||
*
|
||||
* @param length
|
||||
* On success, the number of bytes in the generated byte array will be
|
||||
* returned here.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, @ref HS_NOMEM if the byte array cannot be
|
||||
* allocated, other values may be returned if errors are detected.
|
||||
*/
|
||||
hs_error_t hs_serialize_database(const hs_database_t *db, char **bytes,
|
||||
size_t *length);
|
||||
|
||||
/**
|
||||
* Reconstruct a pattern database from a stream of bytes previously generated
|
||||
* by @ref hs_serialize_database().
|
||||
*
|
||||
* This function will allocate sufficient space for the database using the
|
||||
* allocator set with @ref hs_set_database_allocator() (or @ref
|
||||
* hs_set_allocator()); to use a pre-allocated region of memory, use the @ref
|
||||
* hs_deserialize_database_at() function.
|
||||
*
|
||||
* @param bytes
|
||||
* A byte array generated by @ref hs_serialize_database() representing a
|
||||
* compiled pattern database.
|
||||
*
|
||||
* @param length
|
||||
* The length of the byte array generated by @ref hs_serialize_database().
|
||||
* This should be the same value as that returned by @ref
|
||||
* hs_serialize_database().
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to a newly allocated @ref hs_database_t will be
|
||||
* returned here. This database can then be used for scanning, and
|
||||
* eventually freed by the caller using @ref hs_free_database().
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_deserialize_database(const char *bytes, const size_t length,
|
||||
hs_database_t **db);
|
||||
|
||||
/**
|
||||
* Reconstruct a pattern database from a stream of bytes previously generated
|
||||
* by @ref hs_serialize_database() at a given memory location.
|
||||
*
|
||||
* This function (unlike @ref hs_deserialize_database()) will write the
|
||||
* reconstructed database to the memory location given in the @a db parameter.
|
||||
* The amount of space required at this location can be determined with the
|
||||
* @ref hs_serialized_database_size() function.
|
||||
*
|
||||
* @param bytes
|
||||
* A byte array generated by @ref hs_serialize_database() representing a
|
||||
* compiled pattern database.
|
||||
*
|
||||
* @param length
|
||||
* The length of the byte array generated by @ref hs_serialize_database().
|
||||
* This should be the same value as that returned by @ref
|
||||
* hs_serialize_database().
|
||||
*
|
||||
* @param db
|
||||
* Pointer to an 8-byte aligned block of memory of sufficient size to hold
|
||||
* the deserialized database. On success, the reconstructed database will
|
||||
* be written to this location. This database can then be used for pattern
|
||||
* matching. The user is responsible for freeing this memory; the @ref
|
||||
* hs_free_database() call should not be used.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_deserialize_database_at(const char *bytes, const size_t length,
|
||||
hs_database_t *db);
|
||||
|
||||
/**
|
||||
* Provides the size of the stream state allocated by a single stream opened
|
||||
* against the given database.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to a compiled (streaming mode) pattern database.
|
||||
*
|
||||
* @param stream_size
|
||||
* On success, the size in bytes of an individual stream opened against the
|
||||
* given database is placed in this parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_stream_size(const hs_database_t *database, size_t *stream_size);
|
||||
|
||||
/**
|
||||
* Provides the size of the given database in bytes.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to compiled pattern database.
|
||||
*
|
||||
* @param database_size
|
||||
* On success, the size of the compiled database in bytes is placed in this
|
||||
* parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_database_size(const hs_database_t *database,
|
||||
size_t *database_size);
|
||||
|
||||
/**
|
||||
* Utility function for reporting the size that would be required by a
|
||||
* database if it were deserialized.
|
||||
*
|
||||
* This can be used to allocate a shared memory region or other "special"
|
||||
* allocation prior to deserializing with the @ref hs_deserialize_database_at()
|
||||
* function.
|
||||
*
|
||||
* @param bytes
|
||||
* Pointer to a byte array generated by @ref hs_serialize_database()
|
||||
* representing a compiled pattern database.
|
||||
*
|
||||
* @param length
|
||||
* The length of the byte array generated by @ref hs_serialize_database().
|
||||
* This should be the same value as that returned by @ref
|
||||
* hs_serialize_database().
|
||||
*
|
||||
* @param deserialized_size
|
||||
* On success, the size of the compiled database that would be generated
|
||||
* by @ref hs_deserialize_database_at() is returned here.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_serialized_database_size(const char *bytes, const size_t length,
|
||||
size_t *deserialized_size);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a database.
|
||||
*
|
||||
* @param database
|
||||
* Pointer to a compiled database.
|
||||
*
|
||||
* @param info
|
||||
* On success, a string containing the version and platform information for
|
||||
* the supplied database is placed in the parameter. The string is
|
||||
* allocated using the allocator supplied in @ref hs_set_misc_allocator()
|
||||
* (or malloc() if no allocator was set) and should be freed by the caller.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_database_info(const hs_database_t *database, char **info);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a serialized database.
|
||||
*
|
||||
* @param bytes
|
||||
* Pointer to a serialized database.
|
||||
*
|
||||
* @param length
|
||||
* Length in bytes of the serialized database.
|
||||
*
|
||||
* @param info
|
||||
* On success, a string containing the version and platform information
|
||||
* for the supplied serialized database is placed in the parameter. The
|
||||
* string is allocated using the allocator supplied in @ref
|
||||
* hs_set_misc_allocator() (or malloc() if no allocator was set) and
|
||||
* should be freed by the caller.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_serialized_database_info(const char *bytes, size_t length,
|
||||
char **info);
|
||||
|
||||
/**
|
||||
* The type of the callback function that will be used by Hyperscan to allocate
|
||||
* more memory at runtime as required, for example in @ref hs_open_stream() to
|
||||
* allocate stream state.
|
||||
*
|
||||
* If Hyperscan is to be used in a multi-threaded, or similarly concurrent
|
||||
* environment, the allocation function will need to be re-entrant, or
|
||||
* similarly safe for concurrent use.
|
||||
*
|
||||
* @param size
|
||||
* The number of bytes to allocate.
|
||||
* @return
|
||||
* A pointer to the region of memory allocated, or NULL on error.
|
||||
*/
|
||||
typedef void *(*hs_alloc_t)(size_t size);
|
||||
|
||||
/**
|
||||
* The type of the callback function that will be used by Hyperscan to free
|
||||
* memory regions previously allocated using the @ref hs_alloc_t function.
|
||||
*
|
||||
* @param ptr
|
||||
* The region of memory to be freed.
|
||||
*/
|
||||
typedef void (*hs_free_t)(void *ptr);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating
|
||||
* memory at runtime for stream state, scratch space, database bytecode,
|
||||
* and various other data structure returned by the Hyperscan API.
|
||||
*
|
||||
* The function is equivalent to calling @ref hs_set_stream_allocator(),
|
||||
* @ref hs_set_scratch_allocator(), @ref hs_set_database_allocator() and
|
||||
* @ref hs_set_misc_allocator() with the provided parameters.
|
||||
*
|
||||
* This call will override any previous allocators that have been set.
|
||||
*
|
||||
* Note: there is no way to change the allocator used for temporary objects
|
||||
* created during the various compile calls (@ref hs_compile(), @ref
|
||||
* hs_compile_multi(), @ref hs_compile_ext_multi()).
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||
* for database bytecode produced by the compile calls (@ref hs_compile(), @ref
|
||||
* hs_compile_multi(), @ref hs_compile_ext_multi()) and by database
|
||||
* deserialization (@ref hs_deserialize_database()).
|
||||
*
|
||||
* If no database allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous database allocators that have been set.
|
||||
*
|
||||
* Note: the database allocator may also be set by calling @ref
|
||||
* hs_set_allocator().
|
||||
*
|
||||
* Note: there is no way to change how temporary objects created during the
|
||||
* various compile calls (@ref hs_compile(), @ref hs_compile_multi(), @ref
|
||||
* hs_compile_ext_multi()) are allocated.
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_database_allocator(hs_alloc_t alloc_func,
|
||||
hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||
* for items returned by the Hyperscan API such as @ref hs_compile_error_t, @ref
|
||||
* hs_expr_info_t and serialized databases.
|
||||
*
|
||||
* If no misc allocation functions are set, or if NULL is used in place of both
|
||||
* parameters, then memory allocation will default to standard methods (such as
|
||||
* the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous misc allocators that have been set.
|
||||
*
|
||||
* Note: the misc allocator may also be set by calling @ref hs_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_misc_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||
* for scratch space by @ref hs_alloc_scratch() and @ref hs_clone_scratch().
|
||||
*
|
||||
* If no scratch allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous scratch allocators that have been set.
|
||||
*
|
||||
* Note: the scratch allocator may also be set by calling @ref
|
||||
* hs_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_scratch_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Set the allocate and free functions used by Hyperscan for allocating memory
|
||||
* for stream state by @ref hs_open_stream().
|
||||
*
|
||||
* If no stream allocation functions are set, or if NULL is used in place of
|
||||
* both parameters, then memory allocation will default to standard methods
|
||||
* (such as the system malloc() and free() calls).
|
||||
*
|
||||
* This call will override any previous stream allocators that have been set.
|
||||
*
|
||||
* Note: the stream allocator may also be set by calling @ref
|
||||
* hs_set_allocator().
|
||||
*
|
||||
* @param alloc_func
|
||||
* A callback function pointer that allocates memory. This function must
|
||||
* return memory suitably aligned for the largest representable data type
|
||||
* on this platform.
|
||||
*
|
||||
* @param free_func
|
||||
* A callback function pointer that frees allocated memory.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_set_stream_allocator(hs_alloc_t alloc_func, hs_free_t free_func);
|
||||
|
||||
/**
|
||||
* Utility function for identifying this release version.
|
||||
*
|
||||
* @return
|
||||
* A string containing the version number of this release build and the
|
||||
* date of the build. It is allocated statically, so it does not need to
|
||||
* be freed by the caller.
|
||||
*/
|
||||
const char *hs_version(void);
|
||||
|
||||
/**
|
||||
* @defgroup HS_ERROR hs_error_t values
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* The engine completed normally.
|
||||
*/
|
||||
#define HS_SUCCESS 0
|
||||
|
||||
/**
|
||||
* A parameter passed to this function was invalid.
|
||||
*/
|
||||
#define HS_INVALID (-1)
|
||||
|
||||
/**
|
||||
* A memory allocation failed.
|
||||
*/
|
||||
#define HS_NOMEM (-2)
|
||||
|
||||
/**
|
||||
* The engine was terminated by callback.
|
||||
*
|
||||
* This return value indicates that the target buffer was partially scanned,
|
||||
* but that the callback function requested that scanning cease after a match
|
||||
* was located.
|
||||
*/
|
||||
#define HS_SCAN_TERMINATED (-3)
|
||||
|
||||
/**
|
||||
* The pattern compiler failed, and the @ref hs_compile_error_t should be
|
||||
* inspected for more detail.
|
||||
*/
|
||||
#define HS_COMPILER_ERROR (-4)
|
||||
|
||||
/**
|
||||
* The given database was built for a different version of Hyperscan.
|
||||
*/
|
||||
#define HS_DB_VERSION_ERROR (-5)
|
||||
|
||||
/**
|
||||
* The given database was built for a different platform (i.e., CPU type).
|
||||
*/
|
||||
#define HS_DB_PLATFORM_ERROR (-6)
|
||||
|
||||
/**
|
||||
* The given database was built for a different mode of operation. This error
|
||||
* is returned when streaming calls are used with a block or vectored database
|
||||
* and vice versa.
|
||||
*/
|
||||
#define HS_DB_MODE_ERROR (-7)
|
||||
|
||||
/**
|
||||
* A parameter passed to this function was not correctly aligned.
|
||||
*/
|
||||
#define HS_BAD_ALIGN (-8)
|
||||
|
||||
/**
|
||||
* The memory allocator (either malloc() or the allocator set with @ref
|
||||
* hs_set_allocator()) did not correctly return memory suitably aligned for the
|
||||
* largest representable data type on this platform.
|
||||
*/
|
||||
#define HS_BAD_ALLOC (-9)
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* HS_COMMON_H_ */
|
||||
848
src/hs_compile.h
Normal file
848
src/hs_compile.h
Normal file
@@ -0,0 +1,848 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_COMPILE_H_
|
||||
#define HS_COMPILE_H_
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Hyperscan compiler API definition.
|
||||
*
|
||||
* Hyperscan is a high speed regular expression engine.
|
||||
*
|
||||
* This header contains functions for compiling regular expressions into
|
||||
* Hyperscan databases that can be used by the Hyperscan runtime.
|
||||
*/
|
||||
|
||||
#include "hs_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A type containing error details that is returned by the compile calls (@ref
|
||||
* hs_compile(), @ref hs_compile_multi() and @ref hs_compile_ext_multi()) on
|
||||
* failure. The caller may inspect the values returned in this type to
|
||||
* determine the cause of failure.
|
||||
*
|
||||
* Common errors generated during the compile process include:
|
||||
*
|
||||
* - *Invalid parameter*
|
||||
*
|
||||
* An invalid argument was specified in the compile call.
|
||||
*
|
||||
* - *Unrecognised flag*
|
||||
*
|
||||
* An unrecognised value was passed in the flags argument.
|
||||
*
|
||||
* - *Pattern matches empty buffer*
|
||||
*
|
||||
* By default, Hyperscan only supports patterns that will *always*
|
||||
* consume at least one byte of input. Patterns that do not have this
|
||||
* property (such as `/(abc)?/`) will produce this error unless
|
||||
* the @ref HS_FLAG_ALLOWEMPTY flag is supplied. Note that such
|
||||
* patterns will produce a match for *every* byte when scanned.
|
||||
*
|
||||
* - *Embedded anchors not supported*
|
||||
*
|
||||
* Hyperscan only supports the use of anchor meta-characters (such as
|
||||
* `^` and `$`) in patterns where they could *only* match
|
||||
* at the start or end of a buffer. A pattern containing an embedded
|
||||
* anchor, such as `/abc^def/`, can never match, as there is no
|
||||
* way for `abc` to precede the start of the data stream.
|
||||
*
|
||||
* - *Bounded repeat is too large*
|
||||
*
|
||||
* The pattern contains a repeated construct with very large finite
|
||||
* bounds.
|
||||
*
|
||||
* - *Unsupported component type*
|
||||
*
|
||||
* An unsupported PCRE construct was used in the pattern.
|
||||
*
|
||||
* - *Unable to generate bytecode*
|
||||
*
|
||||
* This error indicates that Hyperscan was unable to compile a pattern
|
||||
* that is syntactically valid. The most common cause is a pattern that is
|
||||
* very long and complex or contains a large repeated subpattern.
|
||||
*
|
||||
* - *Unable to allocate memory*
|
||||
*
|
||||
* The library was unable to allocate temporary storage used during
|
||||
* compilation time.
|
||||
*
|
||||
* - *Internal error*
|
||||
*
|
||||
* An unexpected error occurred: if this error is reported, please contact
|
||||
* the Hyperscan team with a description of the situation.
|
||||
*/
|
||||
typedef struct hs_compile_error {
|
||||
/**
|
||||
* A human-readable error message describing the error.
|
||||
*/
|
||||
char *message;
|
||||
|
||||
/**
|
||||
* The zero-based number of the expression that caused the error (if this
|
||||
* can be determined). If the error is not specific to an expression, then
|
||||
* this value will be less than zero.
|
||||
*/
|
||||
int expression;
|
||||
} hs_compile_error_t;
|
||||
|
||||
/**
|
||||
* A type containing information on the target platform which may optionally be
|
||||
* provided to the compile calls (@ref hs_compile(), @ref hs_compile_multi(),
|
||||
* @ref hs_compile_ext_multi()).
|
||||
*
|
||||
* A hs_platform_info structure may be populated for the current platform by
|
||||
* using the @ref hs_populate_platform() call.
|
||||
*/
|
||||
typedef struct hs_platform_info {
|
||||
/**
|
||||
* Information about the target platform which may be used to guide the
|
||||
* optimisation process of the compile.
|
||||
*
|
||||
* Use of this field does not limit the processors that the resulting
|
||||
* database can run on, but may impact the performance of the resulting
|
||||
* database.
|
||||
*/
|
||||
unsigned int tune;
|
||||
|
||||
/**
|
||||
* Relevant CPU features available on the target platform
|
||||
*
|
||||
* This value may be produced by combining HS_CPU_FEATURE_* flags (such as
|
||||
* @ref HS_CPU_FEATURES_AVX2). Multiple CPU features may be or'ed together
|
||||
* to produce the value.
|
||||
*/
|
||||
unsigned long long cpu_features;
|
||||
|
||||
/**
|
||||
* Reserved for future use.
|
||||
*/
|
||||
unsigned long long reserved1;
|
||||
|
||||
/**
|
||||
* Reserved for future use.
|
||||
*/
|
||||
unsigned long long reserved2;
|
||||
} hs_platform_info_t;
|
||||
|
||||
/**
|
||||
* A type containing information related to an expression that is returned by
|
||||
* @ref hs_expression_info().
|
||||
*/
|
||||
typedef struct hs_expr_info {
|
||||
/**
|
||||
* The minimum length in bytes of a match for the pattern.
|
||||
*/
|
||||
unsigned int min_width;
|
||||
|
||||
/**
|
||||
* The maximum length in bytes of a match for the pattern. If the pattern
|
||||
* has an unbounded maximum width, this will be set to the maximum value of
|
||||
* an unsigned int (UINT_MAX).
|
||||
*/
|
||||
unsigned int max_width;
|
||||
|
||||
/**
|
||||
* Whether this expression can produce matches that are not returned in
|
||||
* order, such as those produced by assertions. Zero if false, non-zero if
|
||||
* true.
|
||||
*/
|
||||
char unordered_matches;
|
||||
|
||||
/**
|
||||
* Whether this expression can produce matches at end of data (EOD). In
|
||||
* streaming mode, EOD matches are raised during @ref hs_close_stream(),
|
||||
* since it is only when @ref hs_close_stream() is called that the EOD
|
||||
* location is known. Zero if false, non-zero if true.
|
||||
*
|
||||
* Note: trailing `\b` word boundary assertions may also result in EOD
|
||||
* matches as end-of-data can act as a word boundary.
|
||||
*/
|
||||
char matches_at_eod;
|
||||
|
||||
/**
|
||||
* Whether this expression can *only* produce matches at end of data (EOD).
|
||||
* In streaming mode, all matches for this expression are raised during
|
||||
* @ref hs_close_stream(). Zero if false, non-zero if true.
|
||||
*/
|
||||
char matches_only_at_eod;
|
||||
} hs_expr_info_t;
|
||||
|
||||
/**
|
||||
* A structure containing additional parameters related to an expression,
|
||||
* passed in at build time to @ref hs_compile_ext_multi().
|
||||
*
|
||||
* These parameters allow the set of matches produced by a pattern to be
|
||||
* constrained at compile time, rather than relying on the application to
|
||||
* process unwanted matches at runtime.
|
||||
*/
|
||||
typedef struct hs_expr_ext {
|
||||
/**
|
||||
* Flags governing which parts of this structure are to be used by the
|
||||
* compiler. See @ref HS_EXT_FLAG.
|
||||
*/
|
||||
unsigned long long flags;
|
||||
|
||||
/**
|
||||
* The minimum end offset in the data stream at which this expression
|
||||
* should match successfully. To use this parameter, set the
|
||||
* @ref HS_EXT_FLAG_MIN_OFFSET flag in the hs_expr_ext::flags field.
|
||||
*/
|
||||
unsigned long long min_offset;
|
||||
|
||||
/**
|
||||
* The maximum end offset in the data stream at which this expression
|
||||
* should match successfully. To use this parameter, set the
|
||||
* @ref HS_EXT_FLAG_MAX_OFFSET flag in the hs_expr_ext::flags field.
|
||||
*/
|
||||
unsigned long long max_offset;
|
||||
|
||||
/**
|
||||
* The minimum match length (from start to end) required to successfully
|
||||
* match this expression. To use this parameter, set the
|
||||
* @ref HS_EXT_FLAG_MIN_LENGTH flag in the hs_expr_ext::flags field.
|
||||
*/
|
||||
unsigned long long min_length;
|
||||
} hs_expr_ext_t;
|
||||
|
||||
/**
|
||||
* @defgroup HS_EXT_FLAG hs_expr_ext_t flags
|
||||
*
|
||||
* These flags are used in @ref hs_expr_ext_t::flags to indicate which fields
|
||||
* are used.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** Flag indicating that the hs_expr_ext::min_offset field is used. */
|
||||
#define HS_EXT_FLAG_MIN_OFFSET 1ULL
|
||||
|
||||
/** Flag indicating that the hs_expr_ext::max_offset field is used. */
|
||||
#define HS_EXT_FLAG_MAX_OFFSET 2ULL
|
||||
|
||||
/** Flag indicating that the hs_expr_ext::min_length field is used. */
|
||||
#define HS_EXT_FLAG_MIN_LENGTH 4ULL
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* The basic regular expression compiler.
|
||||
*
|
||||
* This is the function call with which an expression is compiled into a
|
||||
* Hyperscan database which can be passed to the runtime functions (such as
|
||||
* @ref hs_scan(), @ref hs_open_stream(), etc.)
|
||||
*
|
||||
* @param expression
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @a flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
|
||||
* flags.
|
||||
*
|
||||
* @param flags
|
||||
* Flags which modify the behaviour of the expression. Multiple flags may
|
||||
* be used by ORing them together. Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated for the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flags that affect the database as a whole. One of @ref
|
||||
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
|
||||
* supplied, to select between the generation of a streaming, block or
|
||||
* vectored database. In addition, other flags (beginning with HS_MODE_)
|
||||
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
|
||||
* more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref hs_free_database() function.
|
||||
*
|
||||
* @param error
|
||||
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
hs_error_t hs_compile(const char *expression, unsigned int flags,
|
||||
unsigned int mode, const hs_platform_info_t *platform,
|
||||
hs_database_t **db, hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler.
|
||||
*
|
||||
* This is the function call with which a set of expressions is compiled into a
|
||||
* database which can be passed to the runtime functions (such as @ref
|
||||
* hs_scan(), @ref hs_open_stream(), etc.) Each expression can be labelled with
|
||||
* a unique integer which is passed into the match callback to identify the
|
||||
* pattern that has matched.
|
||||
*
|
||||
* @param expressions
|
||||
* Array of NULL-terminated expressions to compile. Note that (as for @ref
|
||||
* hs_compile()) these strings must contain only the pattern to be
|
||||
* matched, with no delimiters or flags. For example, the expression
|
||||
* `/abc?def/i` should be compiled by providing `abc?def` as the first
|
||||
* string in the @a expressions array, and @ref HS_FLAG_CASELESS as the
|
||||
* first value in the @a flags array.
|
||||
*
|
||||
* @param flags
|
||||
* Array of flags which modify the behaviour of each expression. Multiple
|
||||
* flags may be used by ORing them together. Specifying the NULL pointer
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
|
||||
* with this match id per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
* corresponding pattern in the expressions array. Specifying the NULL
|
||||
* pointer in place of an array will set the ID value for all patterns to
|
||||
* zero.
|
||||
*
|
||||
* @param elements
|
||||
* The number of elements in the input arrays.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flags that affect the database as a whole. One of @ref
|
||||
* HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
|
||||
* supplied, to select between the generation of a streaming, block or
|
||||
* vectored database. In addition, other flags (beginning with HS_MODE_)
|
||||
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
|
||||
* more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref hs_free_database() function.
|
||||
*
|
||||
* @param error
|
||||
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the @a error
|
||||
* parameter.
|
||||
*
|
||||
*/
|
||||
hs_error_t hs_compile_multi(const char *const *expressions,
|
||||
const unsigned int *flags, const unsigned int *ids,
|
||||
unsigned int elements, unsigned int mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db, hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* The multiple regular expression compiler with extended pattern support.
|
||||
*
|
||||
* This function call compiles a group of expressions into a database in the
|
||||
* same way as @ref hs_compile_multi(), but allows additional parameters to be
|
||||
* specified via an @ref hs_expr_ext_t structure per expression.
|
||||
*
|
||||
* @param expressions
|
||||
* Array of NULL-terminated expressions to compile. Note that (as for @ref
|
||||
* hs_compile()) these strings must contain only the pattern to be
|
||||
* matched, with no delimiters or flags. For example, the expression
|
||||
* `/abc?def/i` should be compiled by providing `abc?def` as the first
|
||||
* string in the @a expressions array, and @ref HS_FLAG_CASELESS as the
|
||||
* first value in the @a flags array.
|
||||
*
|
||||
* @param flags
|
||||
* Array of flags which modify the behaviour of each expression. Multiple
|
||||
* flags may be used by ORing them together. Specifying the NULL pointer
|
||||
* in place of an array will set the flags value for all patterns to zero.
|
||||
* Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by patterns
|
||||
* with this match id per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param ids
|
||||
* An array of integers specifying the ID number to be associated with the
|
||||
* corresponding pattern in the expressions array. Specifying the NULL
|
||||
* pointer in place of an array will set the ID value for all patterns to
|
||||
* zero.
|
||||
*
|
||||
* @param ext
|
||||
* An array of pointers to filled @ref hs_expr_ext_t structures that
|
||||
* define extended behaviour for each pattern. NULL may be specified if no
|
||||
* extended behaviour is needed for an individual pattern, or in place of
|
||||
* the whole array if it is not needed for any expressions. Memory used by
|
||||
* these structures must be both allocated and freed by the caller.
|
||||
*
|
||||
* @param elements
|
||||
* The number of elements in the input arrays.
|
||||
*
|
||||
* @param mode
|
||||
* Compiler mode flags that affect the database as a whole. One of @ref
|
||||
* HS_MODE_STREAM, @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be
|
||||
* supplied, to select between the generation of a streaming, block or
|
||||
* vectored database. In addition, other flags (beginning with HS_MODE_)
|
||||
* may be supplied to enable specific features. See @ref HS_MODE_FLAG for
|
||||
* more details.
|
||||
*
|
||||
* @param platform
|
||||
* If not NULL, the platform structure is used to determine the target
|
||||
* platform for the database. If NULL, a database suitable for running
|
||||
* on the current host platform is produced.
|
||||
*
|
||||
* @param db
|
||||
* On success, a pointer to the generated database will be returned in
|
||||
* this parameter, or NULL on failure. The caller is responsible for
|
||||
* deallocating the buffer using the @ref hs_free_database() function.
|
||||
*
|
||||
* @param error
|
||||
* If the compile fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the @a error
|
||||
* parameter.
|
||||
*
|
||||
*/
|
||||
hs_error_t hs_compile_ext_multi(const char *const *expressions,
|
||||
const unsigned int *flags,
|
||||
const unsigned int *ids,
|
||||
const hs_expr_ext_t *const *ext,
|
||||
unsigned int elements, unsigned int mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db, hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* Free an error structure generated by @ref hs_compile(), @ref
|
||||
* hs_compile_multi() or @ref hs_compile_ext_multi().
|
||||
*
|
||||
* @param error
|
||||
* The @ref hs_compile_error_t to be freed. NULL may also be safely
|
||||
* provided.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_free_compile_error(hs_compile_error_t *error);
|
||||
|
||||
/**
|
||||
* Utility function providing information about a regular expression. The
|
||||
* information provided in @ref hs_expr_info_t includes the minimum and maximum
|
||||
* width of a pattern match.
|
||||
*
|
||||
* @param expression
|
||||
* The NULL-terminated expression to parse. Note that this string must
|
||||
* represent ONLY the pattern to be matched, with no delimiters or flags;
|
||||
* any global flags should be specified with the @a flags argument. For
|
||||
* example, the expression `/abc?def/i` should be compiled by providing
|
||||
* `abc?def` as the @a expression, and @ref HS_FLAG_CASELESS as the @a
|
||||
* flags.
|
||||
*
|
||||
* @param flags
|
||||
* Flags which modify the behaviour of the expression. Multiple flags may
|
||||
* be used by ORing them together. Valid values are:
|
||||
* - HS_FLAG_CASELESS - Matching will be performed case-insensitively.
|
||||
* - HS_FLAG_DOTALL - Matching a `.` will not exclude newlines.
|
||||
* - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data.
|
||||
* - HS_FLAG_SINGLEMATCH - Only one match will be generated by the
|
||||
* expression per stream.
|
||||
* - HS_FLAG_ALLOWEMPTY - Allow expressions which can match against an
|
||||
* empty string, such as `.*`.
|
||||
* - HS_FLAG_UTF8 - Treat this pattern as a sequence of UTF-8 characters.
|
||||
* - HS_FLAG_UCP - Use Unicode properties for character classes.
|
||||
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
|
||||
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
|
||||
* when a match is found.
|
||||
*
|
||||
* @param info
|
||||
* On success, a pointer to the pattern information will be returned in
|
||||
* this parameter, or NULL on failure. This structure is allocated using
|
||||
* the allocator supplied in @ref hs_set_allocator() (or malloc() if no
|
||||
* allocator was set) and should be freed by the caller.
|
||||
*
|
||||
* @param error
|
||||
* If the call fails, a pointer to a @ref hs_compile_error_t will be
|
||||
* returned, providing details of the error condition. The caller is
|
||||
* responsible for deallocating the buffer using the @ref
|
||||
* hs_free_compile_error() function.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS is returned on successful compilation; @ref
|
||||
* HS_COMPILER_ERROR on failure, with details provided in the error
|
||||
* parameter.
|
||||
*/
|
||||
hs_error_t hs_expression_info(const char *expression, unsigned int flags,
|
||||
hs_expr_info_t **info,
|
||||
hs_compile_error_t **error);
|
||||
|
||||
/**
|
||||
* Populates the platform information based on the current host.
|
||||
*
|
||||
* @param platform
|
||||
* On success, the pointed to structure is populated based on the current
|
||||
* host.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_populate_platform(hs_platform_info_t *platform);
|
||||
|
||||
/**
|
||||
* @defgroup HS_PATTERN_FLAG Pattern flags
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compile flag: Set case-insensitive matching.
|
||||
*
|
||||
* This flag sets the expression to be matched case-insensitively by default.
|
||||
* The expression may still use PCRE tokens (notably `(?i)` and
|
||||
* `(?-i)`) to switch case-insensitive matching on and off.
|
||||
*/
|
||||
#define HS_FLAG_CASELESS 1
|
||||
|
||||
/**
|
||||
* Compile flag: Matching a `.` will not exclude newlines.
|
||||
*
|
||||
* This flag sets any instances of the `.` token to match newline characters as
|
||||
* well as all other characters. The PCRE specification states that the `.`
|
||||
* token does not match newline characters by default, so without this flag the
|
||||
* `.` token will not cross line boundaries.
|
||||
*/
|
||||
#define HS_FLAG_DOTALL 2
|
||||
|
||||
/**
|
||||
* Compile flag: Set multi-line anchoring.
|
||||
*
|
||||
* This flag instructs the expression to make the `^` and `$` tokens match
|
||||
* newline characters as well as the start and end of the stream. If this flag
|
||||
* is not specified, the `^` token will only ever match at the start of a
|
||||
* stream, and the `$` token will only ever match at the end of a stream within
|
||||
* the guidelines of the PCRE specification.
|
||||
*/
|
||||
#define HS_FLAG_MULTILINE 4
|
||||
|
||||
/**
|
||||
* Compile flag: Set single-match only mode.
|
||||
*
|
||||
* This flag sets the expression's match ID to match at most once. In streaming
|
||||
* mode, this means that the expression will return only a single match over
|
||||
* the lifetime of the stream, rather than reporting every match as per
|
||||
* standard Hyperscan semantics. In block mode or vectored mode, only the first
|
||||
* match for each invocation of @ref hs_scan() or @ref hs_scan_vector() will be
|
||||
* returned.
|
||||
*
|
||||
* If multiple expressions in the database share the same match ID, then they
|
||||
* either must all specify @ref HS_FLAG_SINGLEMATCH or none of them specify
|
||||
* @ref HS_FLAG_SINGLEMATCH. If a group of expressions sharing a match ID
|
||||
* specify the flag, then at most one match with the match ID will be generated
|
||||
* per stream.
|
||||
*
|
||||
* Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
|
||||
* is not currently supported.
|
||||
*/
|
||||
#define HS_FLAG_SINGLEMATCH 8
|
||||
|
||||
/**
|
||||
* Compile flag: Allow expressions that can match against empty buffers.
|
||||
*
|
||||
* This flag instructs the compiler to allow expressions that can match against
|
||||
* empty buffers, such as `.?`, `.*`, `(a|)`. Since Hyperscan can return every
|
||||
* possible match for an expression, such expressions generally execute very
|
||||
* slowly; the default behaviour is to return an error when an attempt to
|
||||
* compile one is made. Using this flag will force the compiler to allow such
|
||||
* an expression.
|
||||
*/
|
||||
#define HS_FLAG_ALLOWEMPTY 16
|
||||
|
||||
/**
|
||||
* Compile flag: Enable UTF-8 mode for this expression.
|
||||
*
|
||||
* This flag instructs Hyperscan to treat the pattern as a sequence of UTF-8
|
||||
* characters. The results of scanning invalid UTF-8 sequences with a Hyperscan
|
||||
* library that has been compiled with one or more patterns using this flag are
|
||||
* undefined.
|
||||
*/
|
||||
#define HS_FLAG_UTF8 32
|
||||
|
||||
/**
|
||||
* Compile flag: Enable Unicode property support for this expression.
|
||||
*
|
||||
* This flag instructs Hyperscan to use Unicode properties, rather than the
|
||||
* default ASCII interpretations, for character mnemonics like `\w` and `\s` as
|
||||
* well as the POSIX character classes. It is only meaningful in conjunction
|
||||
* with @ref HS_FLAG_UTF8.
|
||||
*/
|
||||
#define HS_FLAG_UCP 64
|
||||
|
||||
/**
|
||||
* Compile flag: Enable prefiltering mode for this expression.
|
||||
*
|
||||
* This flag instructs Hyperscan to compile an "approximate" version of this
|
||||
* pattern for use in a prefiltering application, even if Hyperscan does not
|
||||
* support the pattern in normal operation.
|
||||
*
|
||||
* The set of matches returned when this flag is used is guaranteed to be a
|
||||
* superset of the matches specified by the non-prefiltering expression.
|
||||
*
|
||||
* If the pattern contains pattern constructs not supported by Hyperscan (such
|
||||
* as zero-width assertions, back-references or conditional references) these
|
||||
* constructs will be replaced internally with broader constructs that may
|
||||
* match more often.
|
||||
*
|
||||
* Furthermore, in prefiltering mode Hyperscan may simplify a pattern that
|
||||
* would otherwise return a "Pattern too large" error at compile time, or for
|
||||
* performance reasons (subject to the matching guarantee above).
|
||||
*
|
||||
* It is generally expected that the application will subsequently confirm
|
||||
* prefilter matches with another regular expression matcher that can provide
|
||||
* exact matches for the pattern.
|
||||
*
|
||||
* Note: The use of this flag in combination with @ref HS_FLAG_SOM_LEFTMOST
|
||||
* is not currently supported.
|
||||
*/
|
||||
#define HS_FLAG_PREFILTER 128
|
||||
|
||||
/**
|
||||
* Compile flag: Enable leftmost start of match reporting.
|
||||
*
|
||||
* This flag instructs Hyperscan to report the leftmost possible start of match
|
||||
* offset when a match is reported for this expression. (By default, no start
|
||||
* of match is returned.)
|
||||
*
|
||||
* Enabling this behaviour may reduce performance and increase stream state
|
||||
* requirements in streaming mode.
|
||||
*/
|
||||
#define HS_FLAG_SOM_LEFTMOST 256
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @defgroup HS_CPU_FEATURES_FLAG CPU feature support flags
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* CPU features flag - Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2)
|
||||
*
|
||||
* Setting this flag indicates that the target platform supports AVX2
|
||||
* instructions.
|
||||
*/
|
||||
#define HS_CPU_FEATURES_AVX2 (1ULL << 2)
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @defgroup HS_TUNE_FLAG Tuning flags
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Generic
|
||||
*
|
||||
* This indicates that the compiled database should not be tuned for any
|
||||
* particular target platform.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_GENERIC 0
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Sandy Bridge
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Sandy Bridge microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_SNB 1
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Ivy Bridge
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Ivy Bridge microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_IVB 2
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Haswell
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Haswell microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_HSW 3
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Silvermont
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Silvermont microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_SLM 4
|
||||
|
||||
/**
|
||||
* Tuning Parameter - Intel(R) microarchitecture code name Broadwell
|
||||
*
|
||||
* This indicates that the compiled database should be tuned for the
|
||||
* Broadwell microarchitecture.
|
||||
*/
|
||||
#define HS_TUNE_FAMILY_BDW 5
|
||||
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @defgroup HS_MODE_FLAG Compile mode flags
|
||||
*
|
||||
* The mode flags are used as values for the mode parameter of the various
|
||||
* compile calls (@ref hs_compile(), @ref hs_compile_multi() and @ref
|
||||
* hs_compile_ext_multi()).
|
||||
*
|
||||
* A mode value can be built by ORing these flag values together; the only
|
||||
* required flag is one of @ref HS_MODE_BLOCK, @ref HS_MODE_STREAM or @ref
|
||||
* HS_MODE_VECTORED. Other flags may be added to enable support for additional
|
||||
* features.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Block scan (non-streaming) database.
|
||||
*/
|
||||
#define HS_MODE_BLOCK 1
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Alias for @ref HS_MODE_BLOCK.
|
||||
*/
|
||||
#define HS_MODE_NOSTREAM 1
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Streaming database.
|
||||
*/
|
||||
#define HS_MODE_STREAM 2
|
||||
|
||||
/**
|
||||
* Compiler mode flag: Vectored scanning database.
|
||||
*/
|
||||
#define HS_MODE_VECTORED 4
|
||||
|
||||
/**
|
||||
* Compiler mode flag: use full precision to track start of match offsets in
|
||||
* stream state.
|
||||
*
|
||||
* This mode will use the most stream state per pattern, but will always return
|
||||
* an accurate start of match offset regardless of how far back in the past it
|
||||
* was found.
|
||||
*
|
||||
* One of the SOM_HORIZON modes must be selected to use the @ref
|
||||
* HS_FLAG_SOM_LEFTMOST expression flag.
|
||||
*/
|
||||
#define HS_MODE_SOM_HORIZON_LARGE (1U << 24)
|
||||
|
||||
/**
|
||||
* Compiler mode flag: use medium precision to track start of match offsets in
|
||||
* stream state.
|
||||
*
|
||||
* This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
|
||||
* will limit start of match accuracy to offsets within 2^32 bytes of the
|
||||
* end of match offset reported.
|
||||
*
|
||||
* One of the SOM_HORIZON modes must be selected to use the @ref
|
||||
* HS_FLAG_SOM_LEFTMOST expression flag.
|
||||
*/
|
||||
#define HS_MODE_SOM_HORIZON_MEDIUM (1U << 25)
|
||||
|
||||
/**
|
||||
* Compiler mode flag: use limited precision to track start of match offsets in
|
||||
* stream state.
|
||||
*
|
||||
* This mode will use less stream state than @ref HS_MODE_SOM_HORIZON_LARGE and
|
||||
* will limit start of match accuracy to offsets within 2^16 bytes of the
|
||||
* end of match offset reported.
|
||||
*
|
||||
* One of the SOM_HORIZON modes must be selected to use the @ref
|
||||
* HS_FLAG_SOM_LEFTMOST expression flag.
|
||||
*/
|
||||
#define HS_MODE_SOM_HORIZON_SMALL (1U << 26)
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* HS_COMPILE_H_ */
|
||||
78
src/hs_internal.h
Normal file
78
src/hs_internal.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Internal-use only definitions. Available to internal tools.
|
||||
*/
|
||||
|
||||
#ifndef HS_INTERNAL_H
|
||||
#define HS_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hs.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct Grey;
|
||||
|
||||
/** \brief Internal use only: takes a Grey argument so that we can use it in
|
||||
* tools. */
|
||||
hs_error_t hs_compile_multi_int(const char *const *expressions,
|
||||
const unsigned *flags, const unsigned *ids,
|
||||
const hs_expr_ext *const *ext,
|
||||
unsigned elements, unsigned mode,
|
||||
const hs_platform_info_t *platform,
|
||||
hs_database_t **db,
|
||||
hs_compile_error_t **comp_error, const Grey &g);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define HS_MATCH_FLAG_ADJUSTED 1U
|
||||
|
||||
/** \brief Bitmask of all valid Hyperscan flags. */
|
||||
#define HS_FLAG_ALL ( HS_FLAG_CASELESS \
|
||||
| HS_FLAG_DOTALL \
|
||||
| HS_FLAG_MULTILINE \
|
||||
| HS_FLAG_UTF8 \
|
||||
| HS_FLAG_UCP \
|
||||
| HS_FLAG_PREFILTER \
|
||||
| HS_FLAG_SINGLEMATCH \
|
||||
| HS_FLAG_ALLOWEMPTY \
|
||||
| HS_FLAG_SOM_LEFTMOST)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif
|
||||
493
src/hs_runtime.h
Normal file
493
src/hs_runtime.h
Normal file
@@ -0,0 +1,493 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_RUNTIME_H_
|
||||
#define HS_RUNTIME_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @brief The Hyperscan runtime API definition.
|
||||
*
|
||||
* Hyperscan is a high speed regular expression engine.
|
||||
*
|
||||
* This header contains functions for using compiled Hyperscan databases for
|
||||
* scanning data at runtime.
|
||||
*/
|
||||
|
||||
#include "hs_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Definition of the stream identifier type.
|
||||
*/
|
||||
struct hs_stream;
|
||||
|
||||
/**
|
||||
* The stream identifier returned by @ref hs_open_stream().
|
||||
*/
|
||||
typedef struct hs_stream hs_stream_t;
|
||||
|
||||
struct hs_scratch;
|
||||
|
||||
/**
|
||||
* A Hyperscan scratch space.
|
||||
*/
|
||||
typedef struct hs_scratch hs_scratch_t;
|
||||
|
||||
/**
|
||||
* Definition of the match event callback function type.
|
||||
*
|
||||
* A callback function matching the defined type must be provided by the
|
||||
* application calling the @ref hs_scan(), @ref hs_scan_vector() or @ref
|
||||
* hs_scan_stream() functions (or other streaming calls which can produce
|
||||
* matches).
|
||||
*
|
||||
* This callback function will be invoked whenever a match is located in the
|
||||
* target data during the execution of a scan. The details of the match are
|
||||
* passed in as parameters to the callback function, and the callback function
|
||||
* should return a value indicating whether or not matching should continue on
|
||||
* the target data. If no callbacks are desired from a scan call, NULL may be
|
||||
* provided in order to suppress match production.
|
||||
*
|
||||
* This callback function should not attempt to call Hyperscan API functions on
|
||||
* the same stream nor should it attempt to reuse the scratch space allocated
|
||||
* for the API calls that caused it to be triggered. Making another call to the
|
||||
* Hyperscan library with completely independent parameters should work (for
|
||||
* example, scanning a different database in a new stream and with new scratch
|
||||
* space), but reusing data structures like stream state and/or scratch space
|
||||
* will produce undefined behavior.
|
||||
*
|
||||
* @param id
|
||||
* The ID number of the expression that matched. If the expression was a
|
||||
* single expression compiled with @ref hs_compile(), this value will be
|
||||
* zero.
|
||||
*
|
||||
* @param from
|
||||
* - If a start of match flag is enabled for the current pattern, this
|
||||
* argument will be set to the start of match for the pattern assuming
|
||||
* that that start of match value lies within the current 'start of match
|
||||
* horizon' chosen by one of the SOM_HORIZON mode flags.
|
||||
|
||||
* - If the start of match value lies outside this horizon (possible only
|
||||
* when the SOM_HORIZON value is not @ref HS_MODE_SOM_HORIZON_LARGE),
|
||||
* the @a from value will be set to @ref HS_OFFSET_PAST_HORIZON.
|
||||
|
||||
* - This argument will be set to zero if the Start of Match flag is not
|
||||
* enabled for the given pattern.
|
||||
*
|
||||
* @param to
|
||||
* The offset after the last byte that matches the expression.
|
||||
*
|
||||
* @param flags
|
||||
* This is provided for future use and is unused at present.
|
||||
*
|
||||
* @param context
|
||||
* The pointer supplied by the user to the @ref hs_scan(), @ref
|
||||
* hs_scan_vector() or @ref hs_scan_stream() function.
|
||||
*
|
||||
* @return
|
||||
* Non-zero if the matching should cease, else zero. If scanning is
|
||||
* performed in streaming mode and a non-zero value is returned, any
|
||||
* subsequent calls to @ref hs_scan_stream() for that stream will
|
||||
* immediately return with @ref HS_SCAN_TERMINATED.
|
||||
*/
|
||||
typedef int (*match_event_handler)(unsigned int id,
|
||||
unsigned long long from,
|
||||
unsigned long long to,
|
||||
unsigned int flags,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* Open and initialise a stream.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of the stream. This parameter is provided
|
||||
* for future use and is unused at present.
|
||||
*
|
||||
* @param stream
|
||||
* On success, a pointer to the generated @ref hs_stream_t will be
|
||||
* returned; NULL on failure.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_open_stream(const hs_database_t *db, unsigned int flags,
|
||||
hs_stream_t **stream);
|
||||
|
||||
/**
|
||||
* Write data to be scanned to the opened stream.
|
||||
*
|
||||
* This is the function call in which the actual pattern matching takes place
|
||||
* as data is written to the stream. Matches will be returned via the @ref
|
||||
* match_event_handler callback supplied.
|
||||
*
|
||||
* @param id
|
||||
* The stream ID (returned by @ref hs_open_stream()) to which the data
|
||||
* will be written.
|
||||
*
|
||||
* @param data
|
||||
* Pointer to the data to be scanned.
|
||||
*
|
||||
* @param length
|
||||
* The number of bytes to scan.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of the stream. This parameter is provided
|
||||
* for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param ctxt
|
||||
* The user defined pointer which will be passed to the callback function
|
||||
* when a match occurs.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||
* match callback indicated that scanning should stop; other values on
|
||||
* error.
|
||||
*/
|
||||
hs_error_t hs_scan_stream(hs_stream_t *id, const char *data,
|
||||
unsigned int length, unsigned int flags,
|
||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
||||
void *ctxt);
|
||||
|
||||
/**
|
||||
* Close a stream.
|
||||
*
|
||||
* This function must be called for any stream created with @ref
|
||||
* hs_open_stream(), even if scanning has been terminated by a non-zero return
|
||||
* from the match callback function.
|
||||
*
|
||||
* Note: This operation may result in matches being returned (via calls to the
|
||||
* match event callback) for expressions anchored to the end of the data stream
|
||||
* (for example, via the use of the `$` meta-character). If these matches are
|
||||
* not desired, NULL may be provided as the @ref match_event_handler callback.
|
||||
*
|
||||
* If NULL is provided as the @ref match_event_handler callback, it is
|
||||
* permissible to provide a NULL scratch.
|
||||
*
|
||||
* @param id
|
||||
* The stream ID returned by @ref hs_open_stream().
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
|
||||
* allowed to be NULL only if the @a onEvent callback is also NULL.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param ctxt
|
||||
* The user defined pointer which will be passed to the callback function
|
||||
* when a match occurs.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *ctxt);
|
||||
|
||||
/**
|
||||
* Reset a stream to an initial state.
|
||||
*
|
||||
* Conceptually, this is equivalent to performing @ref hs_close_stream() on the
|
||||
* given stream, followed by a @ref hs_open_stream(). This new stream replaces
|
||||
* the original stream in memory, avoiding the overhead of freeing the old
|
||||
* stream and allocating the new one.
|
||||
*
|
||||
* Note: This operation may result in matches being returned (via calls to the
|
||||
* match event callback) for expressions anchored to the end of the original
|
||||
* data stream (for example, via the use of the `$` meta-character). If these
|
||||
* matches are not desired, NULL may be provided as the @ref match_event_handler
|
||||
* callback.
|
||||
*
|
||||
* Note: the stream will also be tied to the same database.
|
||||
*
|
||||
* @param id
|
||||
* The stream (as created by @ref hs_open_stream()) to be replaced.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of the stream. This parameter is provided
|
||||
* for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function
|
||||
* when a match occurs.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_reset_stream(hs_stream_t *id, unsigned int flags,
|
||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* Duplicate the given stream. The new stream will have the same state as the
|
||||
* original including the current stream offset.
|
||||
*
|
||||
* @param to_id
|
||||
* On success, a pointer to the new, copied @ref hs_stream_t will be
|
||||
* returned; NULL on failure.
|
||||
*
|
||||
* @param from_id
|
||||
* The stream (as created by @ref hs_open_stream()) to be copied.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_copy_stream(hs_stream_t **to_id, const hs_stream_t *from_id);
|
||||
|
||||
/**
|
||||
* Duplicate the given 'from' stream state onto the 'to' stream. The 'to' stream
|
||||
* will first be reset (reporting any EOD matches if a non-NULL @a onEvent
|
||||
* callback handler is provided).
|
||||
*
|
||||
* Note: the 'to' stream and the 'from' stream must be open against the same
|
||||
* database.
|
||||
*
|
||||
* @param to_id
|
||||
* On success, a pointer to the new, copied @ref hs_stream_t will be
|
||||
* returned; NULL on failure.
|
||||
*
|
||||
* @param from_id
|
||||
* The stream (as created by @ref hs_open_stream()) to be copied.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch().
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function
|
||||
* when a match occurs.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_reset_and_copy_stream(hs_stream_t *to_id,
|
||||
const hs_stream_t *from_id,
|
||||
hs_scratch_t *scratch,
|
||||
match_event_handler onEvent,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* The block (non-streaming) regular expression scanner.
|
||||
*
|
||||
* This is the function call in which the actual pattern matching takes place
|
||||
* for block-mode pattern databases.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param data
|
||||
* Pointer to the data to be scanned.
|
||||
*
|
||||
* @param length
|
||||
* The number of bytes to scan.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of this function. This parameter is
|
||||
* provided for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch() for this
|
||||
* database.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the
|
||||
* match callback indicated that scanning should stop; other values on
|
||||
* error.
|
||||
*/
|
||||
hs_error_t hs_scan(const hs_database_t *db, const char *data,
|
||||
unsigned int length, unsigned int flags,
|
||||
hs_scratch_t *scratch, match_event_handler onEvent,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* The vectored regular expression scanner.
|
||||
*
|
||||
* This is the function call in which the actual pattern matching takes place
|
||||
* for vectoring-mode pattern databases.
|
||||
*
|
||||
* @param db
|
||||
* A compiled pattern database.
|
||||
*
|
||||
* @param data
|
||||
* An array of pointers to the data blocks to be scanned.
|
||||
*
|
||||
* @param length
|
||||
* An array of lengths (in bytes) of each data block to scan.
|
||||
*
|
||||
* @param count
|
||||
* Number of data blocks to scan. This should correspond to the size of
|
||||
* of the @a data and @a length arrays.
|
||||
*
|
||||
* @param flags
|
||||
* Flags modifying the behaviour of this function. This parameter is
|
||||
* provided for future use and is unused at present.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch() for
|
||||
* this database.
|
||||
*
|
||||
* @param onEvent
|
||||
* Pointer to a match event callback function. If a NULL pointer is given,
|
||||
* no matches will be returned.
|
||||
*
|
||||
* @param context
|
||||
* The user defined pointer which will be passed to the callback function.
|
||||
*
|
||||
* @return
|
||||
* Returns @ref HS_SUCCESS on success; @ref HS_SCAN_TERMINATED if the match
|
||||
* callback indicated that scanning should stop; other values on error.
|
||||
*/
|
||||
hs_error_t hs_scan_vector(const hs_database_t *db, const char *const *data,
|
||||
const unsigned int *length, unsigned int count,
|
||||
unsigned int flags, hs_scratch_t *scratch,
|
||||
match_event_handler onEvent, void *context);
|
||||
|
||||
/**
|
||||
* Allocate a "scratch" space for use by Hyperscan.
|
||||
*
|
||||
* This is required for runtime use, and one scratch space per thread, or
|
||||
* concurrent caller, is required. Any allocator callback set by @ref
|
||||
* hs_set_scratch_allocator() or @ref hs_set_allocator() will be used by this
|
||||
* function.
|
||||
*
|
||||
* @param db
|
||||
* The database, as produced by @ref hs_compile().
|
||||
*
|
||||
* @param scratch
|
||||
* On first allocation, a pointer to NULL should be provided so a new
|
||||
* scratch can be allocated. If a scratch block has been previously
|
||||
* allocated, then a pointer to it should be passed back in to see if it
|
||||
* is valid for this database block. If a new scratch block is required,
|
||||
* the original will be freed and the new one returned, otherwise the
|
||||
* previous scratch block will be returned. On success, the scratch block
|
||||
* will be suitable for use with the provided database in addition to any
|
||||
* databases that original scratch space was suitable for.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on successful allocation; @ref HS_NOMEM if the
|
||||
* allocation fails. Other errors may be returned if invalid parameters
|
||||
* are specified.
|
||||
*/
|
||||
hs_error_t hs_alloc_scratch(const hs_database_t *db, hs_scratch_t **scratch);
|
||||
|
||||
/**
|
||||
* Allocate a scratch space that is a clone of an existing scratch space.
|
||||
*
|
||||
* This is useful when multiple concurrent threads will be using the same set
|
||||
* of compiled databases, and another scratch space is required. Any allocator
|
||||
* callback set by @ref hs_set_scratch_allocator() or @ref hs_set_allocator()
|
||||
* will be used by this function.
|
||||
*
|
||||
* @param src
|
||||
* The existing @ref hs_scratch_t to be cloned.
|
||||
*
|
||||
* @param dest
|
||||
* A pointer to the new scratch space will be returned here.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success; @ref HS_NOMEM if the allocation fails.
|
||||
* Other errors may be returned if invalid parameters are specified.
|
||||
*/
|
||||
hs_error_t hs_clone_scratch(const hs_scratch_t *src, hs_scratch_t **dest);
|
||||
|
||||
/**
|
||||
* Provides the size of the given scratch space.
|
||||
*
|
||||
* @param scratch
|
||||
* A per-thread scratch space allocated by @ref hs_alloc_scratch() or @ref
|
||||
* hs_clone_scratch().
|
||||
*
|
||||
* @param scratch_size
|
||||
* On success, the size of the scratch space in bytes is placed in this
|
||||
* parameter.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_scratch_size(const hs_scratch_t *scratch, size_t *scratch_size);
|
||||
|
||||
/**
|
||||
* Free a scratch block previously allocated by @ref hs_alloc_scratch() or @ref
|
||||
* hs_clone_scratch().
|
||||
*
|
||||
* The free callback set by @ref hs_set_scratch_allocator() or @ref
|
||||
* hs_set_allocator() will be used by this function.
|
||||
*
|
||||
* @param scratch
|
||||
* The scratch block to be freed. NULL may also be safely provided.
|
||||
*
|
||||
* @return
|
||||
* @ref HS_SUCCESS on success, other values on failure.
|
||||
*/
|
||||
hs_error_t hs_free_scratch(hs_scratch_t *scratch);
|
||||
|
||||
/**
|
||||
* Callback 'from' return value, indicating that the start of this match was
|
||||
* too early to be tracked with the requested SOM_HORIZON precision.
|
||||
*/
|
||||
#define HS_OFFSET_PAST_HORIZON (~0ULL)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* HS_RUNTIME_H_ */
|
||||
36
src/hs_version.c
Normal file
36
src/hs_version.c
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "hs_common.h"
|
||||
#include "hs_version.h"
|
||||
|
||||
HS_PUBLIC_API
|
||||
const char *hs_version(void) {
|
||||
return HS_VERSION_STRING;
|
||||
}
|
||||
40
src/hs_version.h.in
Normal file
40
src/hs_version.h.in
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HS_VERSION_H_C6428FAF8E3713
|
||||
#define HS_VERSION_H_C6428FAF8E3713
|
||||
|
||||
/**
|
||||
* A version string to identify this release of Hyperscan.
|
||||
*/
|
||||
#define HS_VERSION_STRING "@HS_VERSION@ @BUILD_DATE@"
|
||||
|
||||
#define HS_VERSION_32BIT ((@HS_MAJOR_VERSION@ << 24) | (@HS_MINOR_VERSION@ << 16) | (@HS_PATCH_VERSION@ << 8) | 0)
|
||||
|
||||
#endif /* HS_VERSION_H_C6428FAF8E3713 */
|
||||
|
||||
240
src/hwlm/hwlm.c
Normal file
240
src/hwlm/hwlm.c
Normal file
@@ -0,0 +1,240 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: runtime.
|
||||
*/
|
||||
#include "hwlm.h"
|
||||
#include "hwlm_internal.h"
|
||||
#include "noodle_engine.h"
|
||||
#include "scratch.h"
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr.h"
|
||||
#include "nfa/accel.h"
|
||||
#include "nfa/shufti.h"
|
||||
#include "nfa/vermicelli.h"
|
||||
#include <string.h>
|
||||
|
||||
#define MIN_ACCEL_LEN_BLOCK 16
|
||||
#define MIN_ACCEL_LEN_STREAM 16
|
||||
|
||||
static really_inline
|
||||
const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
|
||||
const u8 *end) {
|
||||
switch (aux->accel_type) {
|
||||
case ACCEL_VERM:
|
||||
DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
|
||||
return vermicelliExec(aux->verm.c, 0, ptr, end);
|
||||
case ACCEL_VERM_NOCASE:
|
||||
DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
|
||||
return vermicelliExec(aux->verm.c, 1, ptr, end);
|
||||
case ACCEL_DVERM:
|
||||
DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n", aux->dverm.c1,
|
||||
aux->dverm.c2);
|
||||
return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
|
||||
case ACCEL_DVERM_NOCASE:
|
||||
DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end);
|
||||
case ACCEL_SHUFTI:
|
||||
DEBUG_PRINTF("single shufti\n");
|
||||
return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
|
||||
default:
|
||||
/* no acceleration, fall through and return current ptr */
|
||||
return ptr;
|
||||
}
|
||||
}
|
||||
|
||||
static really_inline
|
||||
void do_accel_block(const union AccelAux *aux, const u8 *buf, size_t len,
|
||||
size_t *start) {
|
||||
if (len - *start < MIN_ACCEL_LEN_BLOCK) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u8 *ptr = buf + *start;
|
||||
const u8 *end = buf + len;
|
||||
const u8 offset = aux->generic.offset;
|
||||
ptr = run_hwlm_accel(aux, ptr, end);
|
||||
|
||||
if (offset) {
|
||||
ptr -= offset;
|
||||
if (ptr < buf) {
|
||||
ptr = buf;
|
||||
}
|
||||
}
|
||||
assert(ptr >= buf);
|
||||
*start = ptr - buf;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
int inaccurate_accel(u8 type) {
|
||||
/* accels which don't always catch up to the boundary
|
||||
* DSHUFTI is also inaccurate but it is not used by the hamsters */
|
||||
return type == ACCEL_DVERM_NOCASE || type == ACCEL_DVERM;
|
||||
}
|
||||
|
||||
static never_inline
|
||||
void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
|
||||
const u8 *buf, size_t len, size_t *start) {
|
||||
if (aux->accel_type == ACCEL_NONE || len - *start < MIN_ACCEL_LEN_STREAM) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u8 offset = aux->generic.offset;
|
||||
|
||||
DEBUG_PRINTF("using accel %hhu offset %hhu\n", aux->accel_type, offset);
|
||||
|
||||
// Scan history buffer, but only if the start offset (which always refers to
|
||||
// buf) is zero.
|
||||
|
||||
if (!*start && hlen) {
|
||||
const u8 *ptr1 = hbuf;
|
||||
const u8 *end1 = hbuf + hlen;
|
||||
if (hlen >= 16) {
|
||||
ptr1 = run_hwlm_accel(aux, ptr1, end1);
|
||||
}
|
||||
|
||||
if ((hlen <= 16 || inaccurate_accel(aux->accel_type))
|
||||
&& end1 != ptr1 && end1 - ptr1 <= 16) {
|
||||
DEBUG_PRINTF("already scanned %zu/%zu\n", ptr1 - hbuf, hlen);
|
||||
/* see if we can finish off the history buffer completely */
|
||||
u8 ALIGN_DIRECTIVE temp[17];
|
||||
ptrdiff_t tlen = end1 - ptr1;
|
||||
memcpy(temp, ptr1, tlen);
|
||||
memset(temp + tlen, 0, 17 - tlen);
|
||||
if (len) { /* for dverm */
|
||||
temp[end1 - ptr1] = *buf;
|
||||
}
|
||||
|
||||
const u8 *tempp = run_hwlm_accel(aux, temp, temp + 17);
|
||||
|
||||
if (tempp - temp >= tlen) {
|
||||
ptr1 = end1;
|
||||
}
|
||||
DEBUG_PRINTF("got %zu\n", tempp - temp);
|
||||
}
|
||||
|
||||
if (ptr1 != end1) {
|
||||
DEBUG_PRINTF("bailing in history\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("scanning main buffer, start=%zu, len=%zu\n", *start, len);
|
||||
|
||||
const u8 *ptr2 = buf + *start;
|
||||
const u8 *end2 = buf + len;
|
||||
|
||||
const u8 *found = run_hwlm_accel(aux, ptr2, end2);
|
||||
|
||||
if (found >= ptr2 + offset) {
|
||||
size_t delta = found - offset - ptr2;
|
||||
DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
|
||||
*start += delta;
|
||||
} else if (hlen) {
|
||||
UNUSED size_t remaining = offset + ptr2 - found;
|
||||
DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen);
|
||||
}
|
||||
}
|
||||
|
||||
hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback cb, void *ctxt,
|
||||
hwlm_group_t groups) {
|
||||
DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups);
|
||||
if (!groups) {
|
||||
DEBUG_PRINTF("groups all off\n");
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
assert(start < len);
|
||||
|
||||
if (t->type == HWLM_ENGINE_NOOD) {
|
||||
DEBUG_PRINTF("calling noodExec\n");
|
||||
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start, cb,
|
||||
ctxt);
|
||||
} else {
|
||||
assert(t->type == HWLM_ENGINE_FDR);
|
||||
const union AccelAux *aa = &t->accel0;
|
||||
if ((groups & ~t->accel1_groups) == 0) {
|
||||
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
|
||||
aa = &t->accel1;
|
||||
}
|
||||
do_accel_block(aa, buf, len, &start);
|
||||
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
|
||||
start);
|
||||
return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, ctxt, groups);
|
||||
}
|
||||
}
|
||||
|
||||
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
|
||||
size_t len, size_t start, HWLMCallback cb,
|
||||
void *ctxt, hwlm_group_t groups,
|
||||
u8 *stream_state) {
|
||||
const u8 *hbuf = scratch->core_info.hbuf;
|
||||
const size_t hlen = scratch->core_info.hlen;
|
||||
const u8 *buf = scratch->core_info.buf;
|
||||
|
||||
DEBUG_PRINTF("hbuf len=%zu, buf len=%zu, start=%zu, groups=%llx\n", hlen,
|
||||
len, start, groups);
|
||||
|
||||
if (!groups) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
assert(start < len);
|
||||
|
||||
if (t->type == HWLM_ENGINE_NOOD) {
|
||||
DEBUG_PRINTF("calling noodExec\n");
|
||||
// If we've been handed a start offset, we can use a block mode scan at
|
||||
// that offset.
|
||||
if (start) {
|
||||
return noodExec(HWLM_C_DATA(t), buf + start, len - start, start,
|
||||
cb, ctxt);
|
||||
} else {
|
||||
return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb,
|
||||
ctxt, scratch->fdr_temp_buf,
|
||||
FDR_TEMP_BUF_SIZE);
|
||||
}
|
||||
} else {
|
||||
// t->type == HWLM_ENGINE_FDR
|
||||
const union AccelAux *aa = &t->accel0;
|
||||
if ((groups & ~t->accel1_groups) == 0) {
|
||||
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
|
||||
aa = &t->accel1;
|
||||
}
|
||||
// if no active stream state, use acceleration
|
||||
if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) {
|
||||
do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
|
||||
}
|
||||
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
|
||||
start);
|
||||
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len,
|
||||
start, cb, ctxt, groups, stream_state);
|
||||
}
|
||||
}
|
||||
142
src/hwlm/hwlm.h
Normal file
142
src/hwlm/hwlm.h
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: runtime API.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_H
|
||||
#define HWLM_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/** \brief Error return type for exec functions. */
|
||||
typedef int hwlm_error_t;
|
||||
|
||||
/** \brief Type representing a set of groups as a bitmap. */
|
||||
typedef u64a hwlm_group_t;
|
||||
|
||||
/** \brief HWLM callback return type. */
|
||||
typedef hwlm_group_t hwlmcb_rv_t;
|
||||
|
||||
/** \brief Value representing all possible literal groups. */
|
||||
#define HWLM_ALL_GROUPS ((hwlm_group_t)~0ULL)
|
||||
|
||||
/** \brief Callback return value indicating that we should continue matching. */
|
||||
#define HWLM_CONTINUE_MATCHING HWLM_ALL_GROUPS
|
||||
|
||||
/** \brief Callback return value indicating that we should halt matching. */
|
||||
#define HWLM_TERMINATE_MATCHING 0
|
||||
|
||||
/** \brief Matching finished without being terminated by the user. */
|
||||
#define HWLM_SUCCESS 0
|
||||
|
||||
/** \brief The user terminated matching by returning HWLM_TERMINATE_MATCHING
|
||||
* from the match callback. */
|
||||
#define HWLM_TERMINATED 1
|
||||
|
||||
/** \brief An error occurred during matching.
|
||||
*
|
||||
* This should only be used if an unsupported engine was called (like one
|
||||
* designed for a different architecture). */
|
||||
#define HWLM_ERROR_UNKNOWN 2
|
||||
|
||||
struct hs_scratch;
|
||||
struct HWLM;
|
||||
|
||||
/** \brief The type for an HWLM callback.
|
||||
*
|
||||
* This callback receives a start-of-match offset, an end-of-match offset, the
|
||||
* ID of the match and the context pointer that was passed into \ref
|
||||
* hwlmExec or \ref hwlmExecStreaming.
|
||||
*
|
||||
* A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching.
|
||||
*
|
||||
* A callback return of \ref HWLM_CONTINUE_MATCHING continues matching.
|
||||
*
|
||||
* An arbitrary group mask may be given as the return value. This will be taken
|
||||
* as a hint by the underlying engine that only literals with groups
|
||||
* overlapping the provided mask need to be reported.
|
||||
*
|
||||
* The underlying engine may choose not to report a match if there is no group
|
||||
* belonging to the literal which was active at the when the end match location
|
||||
* was first reached.
|
||||
*/
|
||||
typedef hwlmcb_rv_t (*HWLMCallback)(size_t start, size_t end, u32 id,
|
||||
void *context);
|
||||
|
||||
/** \brief Match strings in table.
|
||||
*
|
||||
* If a match occurs, the callback function given will be called with the index
|
||||
* of the last character in the string and the \p context (passed through
|
||||
* without interpretation).
|
||||
*
|
||||
* Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback
|
||||
* returning \ref HWLM_TERMINATE_MATCHING.
|
||||
*
|
||||
* \p start is the first offset at which a match may start.
|
||||
*
|
||||
* The underlying engine may choose not to report any match which starts before
|
||||
* the first possible match of a literal which is in the initial group mask.
|
||||
*/
|
||||
hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len,
|
||||
size_t start, HWLMCallback callback, void *context,
|
||||
hwlm_group_t groups);
|
||||
|
||||
/** \brief As for \ref hwlmExec, but a streaming case across two buffers.
|
||||
*
|
||||
* \p scratch is used to access fdr_temp_buf and to access the history buffer,
|
||||
* history length and the main buffer.
|
||||
*
|
||||
* \p len is the length of the main buffer to be scanned.
|
||||
*
|
||||
* \p start is an advisory hint representing the first offset at which a match
|
||||
* may start. Some underlying literal matches may not respect it.
|
||||
*
|
||||
* Two buffers/lengths are provided. Matches that occur entirely within
|
||||
* the history buffer will not be reported by this function. The offsets
|
||||
* reported for the main buffer are relative to the start of that buffer (a
|
||||
* match at byte 10 of the main buffer is reported as 10). Matches that start
|
||||
* in the history buffer will have starts reported with 'negative' values.
|
||||
*/
|
||||
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab,
|
||||
struct hs_scratch *scratch, size_t len,
|
||||
size_t start, HWLMCallback callback,
|
||||
void *context, hwlm_group_t groups,
|
||||
u8 *stream_state);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif
|
||||
635
src/hwlm/hwlm_build.cpp
Normal file
635
src/hwlm/hwlm_build.cpp
Normal file
@@ -0,0 +1,635 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: build code.
|
||||
*/
|
||||
#include "grey.h"
|
||||
#include "hwlm.h"
|
||||
#include "hwlm_build.h"
|
||||
#include "hwlm_internal.h"
|
||||
#include "noodle_engine.h"
|
||||
#include "noodle_build.h"
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr_compile.h"
|
||||
#include "fdr/fdr.h"
|
||||
#include "nfa/shufticompile.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/compile_error.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/target_info.h"
|
||||
#include "util/ue2string.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static const unsigned int MAX_ACCEL_OFFSET = 16;
|
||||
static const unsigned int MAX_SHUFTI_WIDTH = 240;
|
||||
|
||||
static
|
||||
bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
||||
const hwlmLiteral &first = *lits.front();
|
||||
|
||||
struct candidate {
|
||||
candidate(void)
|
||||
: c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {}
|
||||
candidate(const hwlmLiteral &base, u32 offset)
|
||||
: c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0),
|
||||
b5insens(false), valid(true) {}
|
||||
char c1;
|
||||
char c2;
|
||||
u32 max_offset;
|
||||
bool b5insens;
|
||||
bool valid;
|
||||
|
||||
bool operator>(const candidate &other) const {
|
||||
if (!valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!other.valid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (other.cdiffers() && !cdiffers()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!other.cdiffers() && cdiffers()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!other.b5insens && b5insens) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (other.b5insens && !b5insens) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (max_offset > other.max_offset) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cdiffers(void) const {
|
||||
if (!b5insens) {
|
||||
return c1 != c2;
|
||||
}
|
||||
return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR);
|
||||
}
|
||||
};
|
||||
|
||||
candidate best;
|
||||
|
||||
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) {
|
||||
candidate curr(first, i);
|
||||
|
||||
/* check to see if this pair appears in each string */
|
||||
for (const auto &lit_ptr : lits) {
|
||||
const hwlmLiteral &lit = *lit_ptr;
|
||||
if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) {
|
||||
curr.b5insens = true; /* no choice but to be case insensitive */
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
bool found_nc = false;
|
||||
for (u32 j = 0;
|
||||
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) {
|
||||
found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
|
||||
found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
|
||||
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
|
||||
|
||||
if (curr.b5insens) {
|
||||
found = found_nc;
|
||||
}
|
||||
}
|
||||
|
||||
if (!curr.b5insens && !found && found_nc) {
|
||||
curr.b5insens = true;
|
||||
found = true;
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
goto next_candidate;
|
||||
}
|
||||
}
|
||||
|
||||
/* check to find the max offset where this appears */
|
||||
for (const auto &lit_ptr : lits) {
|
||||
const hwlmLiteral &lit = *lit_ptr;
|
||||
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1;
|
||||
j++) {
|
||||
bool found = false;
|
||||
if (curr.b5insens) {
|
||||
found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
|
||||
&& (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
|
||||
} else {
|
||||
found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
|
||||
}
|
||||
|
||||
if (found) {
|
||||
curr.max_offset = MAX(curr.max_offset, j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (curr > best) {
|
||||
best = curr;
|
||||
}
|
||||
|
||||
next_candidate:;
|
||||
}
|
||||
|
||||
if (!best.valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
aux->dverm.offset = verify_u8(best.max_offset);
|
||||
|
||||
if (!best.b5insens) {
|
||||
aux->dverm.accel_type = ACCEL_DVERM;
|
||||
aux->dverm.c1 = best.c1;
|
||||
aux->dverm.c2 = best.c2;
|
||||
DEBUG_PRINTF("built dverm for %02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
} else {
|
||||
aux->dverm.accel_type = ACCEL_DVERM_NOCASE;
|
||||
aux->dverm.c1 = best.c1 & CASE_CLEAR;
|
||||
aux->dverm.c2 = best.c2 & CASE_CLEAR;
|
||||
DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
|
||||
const hwlmLiteral &first = *lits.front();
|
||||
|
||||
struct candidate {
|
||||
candidate(void)
|
||||
: c(0), max_offset(0), b5insens(false), valid(false) {}
|
||||
candidate(const hwlmLiteral &base, u32 offset)
|
||||
: c(base.s[offset]), max_offset(0),
|
||||
b5insens(false), valid(true) {}
|
||||
char c;
|
||||
u32 max_offset;
|
||||
bool b5insens;
|
||||
bool valid;
|
||||
|
||||
bool operator>(const candidate &other) const {
|
||||
if (!valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!other.valid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!other.b5insens && b5insens) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (other.b5insens && !b5insens) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (max_offset > other.max_offset) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
candidate best;
|
||||
|
||||
for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) {
|
||||
candidate curr(first, i);
|
||||
|
||||
/* check to see if this pair appears in each string */
|
||||
for (const auto &lit_ptr : lits) {
|
||||
const hwlmLiteral &lit = *lit_ptr;
|
||||
if (lit.nocase && ourisalpha(curr.c)) {
|
||||
curr.b5insens = true; /* no choice but to be case insensitive */
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
bool found_nc = false;
|
||||
for (u32 j = 0;
|
||||
!found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
|
||||
found |= curr.c == lit.s[j];
|
||||
found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
|
||||
|
||||
if (curr.b5insens) {
|
||||
found = found_nc;
|
||||
}
|
||||
}
|
||||
|
||||
if (!curr.b5insens && !found && found_nc) {
|
||||
curr.b5insens = true;
|
||||
found = true;
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
goto next_candidate;
|
||||
}
|
||||
}
|
||||
|
||||
/* check to find the max offset where this appears */
|
||||
for (const auto &lit_ptr : lits) {
|
||||
const hwlmLiteral &lit = *lit_ptr;
|
||||
for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
|
||||
bool found = false;
|
||||
if (curr.b5insens) {
|
||||
found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
|
||||
} else {
|
||||
found = curr.c == lit.s[j];
|
||||
}
|
||||
|
||||
if (found) {
|
||||
curr.max_offset = MAX(curr.max_offset, j);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (curr > best) {
|
||||
best = curr;
|
||||
}
|
||||
|
||||
next_candidate:;
|
||||
}
|
||||
|
||||
if (!best.valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!best.b5insens) {
|
||||
aux->verm.accel_type = ACCEL_VERM;
|
||||
aux->verm.c = best.c;
|
||||
DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c);
|
||||
} else {
|
||||
aux->verm.accel_type = ACCEL_VERM_NOCASE;
|
||||
aux->verm.c = best.c & CASE_CLEAR;
|
||||
DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c);
|
||||
}
|
||||
aux->verm.offset = verify_u8(best.max_offset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
|
||||
vector<const hwlmLiteral *> *filtered_lits, u32 *min_len) {
|
||||
*min_len = MAX_ACCEL_OFFSET;
|
||||
|
||||
for (const auto &lit : lits) {
|
||||
if (!(lit.groups & expected_groups)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const size_t lit_len = lit.s.length();
|
||||
if (lit_len < *min_len) {
|
||||
*min_len = verify_u32(lit_len);
|
||||
}
|
||||
|
||||
filtered_lits->push_back(&lit);
|
||||
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("lit:");
|
||||
for (u32 i = 0; i < lit.s.length(); i++) {
|
||||
printf("%02hhx", lit.s[i]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
|
||||
hwlm_group_t expected_groups, AccelAux *aux) {
|
||||
DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups);
|
||||
u32 min_len = MAX_ACCEL_OFFSET;
|
||||
vector<const hwlmLiteral *> filtered_lits;
|
||||
|
||||
filterLits(lits, expected_groups, &filtered_lits, &min_len);
|
||||
if (filtered_lits.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (findDVerm(filtered_lits, aux)
|
||||
|| findSVerm(filtered_lits, aux)) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
|
||||
for (const auto &lit : lits) {
|
||||
if (!(lit.groups & expected_groups)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < MAX_ACCEL_OFFSET && i < lit.s.length(); i++) {
|
||||
unsigned char c = lit.s[i];
|
||||
if (lit.nocase) {
|
||||
DEBUG_PRINTF("adding %02hhx to %u\n", mytoupper(c), i);
|
||||
DEBUG_PRINTF("adding %02hhx to %u\n", mytolower(c), i);
|
||||
reach[i].set(mytoupper(c));
|
||||
reach[i].set(mytolower(c));
|
||||
} else {
|
||||
DEBUG_PRINTF("adding %02hhx to %u\n", c, i);
|
||||
reach[i].set(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 min_count = ~0U;
|
||||
u32 min_offset = ~0U;
|
||||
for (u32 i = 0; i < min_len; i++) {
|
||||
size_t count = reach[i].count();
|
||||
DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
|
||||
describeClass(reach[i]).c_str(), count);
|
||||
if (count < min_count) {
|
||||
min_count = (u32)count;
|
||||
min_offset = i;
|
||||
}
|
||||
}
|
||||
assert(min_offset <= min_len);
|
||||
|
||||
if (min_count > MAX_SHUFTI_WIDTH) {
|
||||
DEBUG_PRINTF("min shufti with %u chars is too wide\n", min_count);
|
||||
return;
|
||||
}
|
||||
|
||||
const CharReach &cr = reach[min_offset];
|
||||
if (shuftiBuildMasks(cr, &aux->shufti.lo, &aux->shufti.hi) != -1) {
|
||||
DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
|
||||
describeClass(cr).c_str(), cr.count(), min_offset);
|
||||
aux->shufti.accel_type = ACCEL_SHUFTI;
|
||||
aux->shufti.offset = verify_u8(min_offset);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("fail\n");
|
||||
}
|
||||
|
||||
static
|
||||
void buildForwardAccel(HWLM *h, const vector<hwlmLiteral> &lits,
|
||||
hwlm_group_t expected_groups) {
|
||||
findForwardAccelScheme(lits, expected_groups, &h->accel1);
|
||||
findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0);
|
||||
|
||||
h->accel1_groups = expected_groups;
|
||||
}
|
||||
|
||||
static
|
||||
void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
|
||||
#ifdef DEBUG
|
||||
DEBUG_PRINTF("building lit table for:\n");
|
||||
for (const auto &lit : lits) {
|
||||
printf("\t%u:%016llx %s%s\n", lit.id, lit.groups,
|
||||
escapeString(lit.s).c_str(), lit.nocase ? " (nc)" : "");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Called by an assertion.
|
||||
static
|
||||
bool everyoneHasGroups(const vector<hwlmLiteral> &lits) {
|
||||
for (const auto &lit : lits) {
|
||||
if (!lit.groups) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static
|
||||
bool isNoodleable(const vector<hwlmLiteral> &lits,
|
||||
const hwlmStreamingControl *stream_control,
|
||||
const CompileContext &cc) {
|
||||
if (!cc.grey.allowNoodle) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lits.size() != 1) {
|
||||
DEBUG_PRINTF("too many literals for noodle\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (stream_control) { // nullptr if in block mode
|
||||
if (lits.front().s.length() + 1 > stream_control->history_max) {
|
||||
DEBUG_PRINTF("length of %zu too long for history max %zu\n",
|
||||
lits.front().s.length(),
|
||||
stream_control->history_max);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!lits.front().msk.empty()) {
|
||||
DEBUG_PRINTF("noodle can't handle supplementary masks\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control,
|
||||
bool make_small, const CompileContext &cc,
|
||||
hwlm_group_t expected_groups) {
|
||||
assert(!lits.empty());
|
||||
dumpLits(lits);
|
||||
|
||||
if (stream_control) {
|
||||
assert(stream_control->history_min <= stream_control->history_max);
|
||||
}
|
||||
|
||||
// Check that we haven't exceeded the maximum number of literals.
|
||||
if (lits.size() > cc.grey.limitLiteralCount) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
// Safety and resource limit checks.
|
||||
u64a total_chars = 0;
|
||||
for (const auto &lit : lits) {
|
||||
assert(!lit.s.empty());
|
||||
|
||||
if (lit.s.length() > cc.grey.limitLiteralLength) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
total_chars += lit.s.length();
|
||||
if (total_chars > cc.grey.limitLiteralMatcherChars) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
// We do not allow the all-ones ID, as we reserve that for internal use
|
||||
// within literal matchers.
|
||||
if (lit.id == 0xffffffffu) {
|
||||
assert(!"reserved id 0xffffffff used");
|
||||
throw CompileError("Internal error.");
|
||||
}
|
||||
}
|
||||
|
||||
u8 engType = 0;
|
||||
size_t engSize = 0;
|
||||
shared_ptr<void> eng;
|
||||
|
||||
DEBUG_PRINTF("building table with %zu strings\n", lits.size());
|
||||
|
||||
assert(everyoneHasGroups(lits));
|
||||
|
||||
if (isNoodleable(lits, stream_control, cc)) {
|
||||
DEBUG_PRINTF("build noodle table\n");
|
||||
engType = HWLM_ENGINE_NOOD;
|
||||
const hwlmLiteral &lit = lits.front();
|
||||
auto noodle = noodBuildTable((const u8 *)lit.s.c_str(), lit.s.length(),
|
||||
lit.nocase, lit.id);
|
||||
if (noodle) {
|
||||
engSize = noodSize(noodle.get());
|
||||
}
|
||||
if (stream_control) {
|
||||
// For now, a single literal still goes to noodle and asks
|
||||
// for a great big history
|
||||
stream_control->literal_history_required = lit.s.length() - 1;
|
||||
assert(stream_control->literal_history_required
|
||||
<= stream_control->history_max);
|
||||
stream_control->literal_stream_state_required = 0;
|
||||
}
|
||||
eng = move(noodle);
|
||||
} else {
|
||||
DEBUG_PRINTF("building a new deal\n");
|
||||
engType = HWLM_ENGINE_FDR;
|
||||
auto fdr = fdrBuildTable(lits, make_small, cc.target_info, cc.grey,
|
||||
stream_control);
|
||||
if (fdr) {
|
||||
engSize = fdrSize(fdr.get());
|
||||
}
|
||||
eng = move(fdr);
|
||||
}
|
||||
|
||||
if (!eng) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(engSize);
|
||||
if (engSize > cc.grey.limitLiteralMatcherSize) {
|
||||
throw ResourceLimitError();
|
||||
}
|
||||
|
||||
auto h = aligned_zmalloc_unique<HWLM>(ROUNDUP_CL(sizeof(HWLM)) + engSize);
|
||||
|
||||
h->type = engType;
|
||||
memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
|
||||
|
||||
if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) {
|
||||
buildForwardAccel(h.get(), lits, expected_groups);
|
||||
}
|
||||
|
||||
if (stream_control) {
|
||||
DEBUG_PRINTF("requires %zu (of max %zu) bytes of history\n",
|
||||
stream_control->literal_history_required,
|
||||
stream_control->history_max);
|
||||
assert(stream_control->literal_history_required
|
||||
<= stream_control->history_max);
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
size_t hwlmSize(const HWLM *h) {
|
||||
size_t engSize = 0;
|
||||
|
||||
switch (h->type) {
|
||||
case HWLM_ENGINE_NOOD:
|
||||
engSize = noodSize((const noodTable *)HWLM_C_DATA(h));
|
||||
break;
|
||||
case HWLM_ENGINE_FDR:
|
||||
engSize = fdrSize((const FDR *)HWLM_C_DATA(h));
|
||||
break;
|
||||
}
|
||||
|
||||
if (!engSize) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return engSize + ROUNDUP_CL(sizeof(*h));
|
||||
}
|
||||
|
||||
size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc) {
|
||||
const size_t NO_LIMIT = ~(size_t)0;
|
||||
|
||||
// NOTE: this function contains a number of magic numbers which are
|
||||
// conservative estimates of flood-proneness based on internal details of
|
||||
// the various literal engines that fall under the HWLM aegis. If you
|
||||
// change those engines, you might need to change this function too.
|
||||
|
||||
DEBUG_PRINTF("%zu literals\n", numLiterals);
|
||||
|
||||
if (cc.grey.allowNoodle && numLiterals <= 1) {
|
||||
DEBUG_PRINTF("noodle\n");
|
||||
return NO_LIMIT;
|
||||
}
|
||||
|
||||
if (cc.grey.fdrAllowTeddy) {
|
||||
if (numLiterals <= 48) {
|
||||
DEBUG_PRINTF("teddy\n");
|
||||
return 3;
|
||||
}
|
||||
if (cc.target_info.has_avx2() && numLiterals <= 96) {
|
||||
DEBUG_PRINTF("avx2 teddy\n");
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: we had thought we could push this value up to 9, but it seems that
|
||||
// hurts performance on floods in some FDR models. Super-conservative for
|
||||
// now.
|
||||
DEBUG_PRINTF("fdr\n");
|
||||
return 3;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
104
src/hwlm/hwlm_build.h
Normal file
104
src/hwlm/hwlm_build.h
Normal file
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: build API.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_BUILD_H
|
||||
#define HWLM_BUILD_H
|
||||
|
||||
#include "hwlm.h"
|
||||
#include "hwlm_literal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
struct HWLM;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct CompileContext;
|
||||
struct Grey;
|
||||
struct target_t;
|
||||
|
||||
/** \brief Structure gathering together the input/output parameters related to
|
||||
* streaming mode operation. */
|
||||
struct hwlmStreamingControl {
|
||||
/** \brief IN parameter: Upper limit on the amount of history that can be
|
||||
* requested. */
|
||||
size_t history_max;
|
||||
|
||||
/** \brief IN parameter: History already known to be used before literal
|
||||
* analysis. */
|
||||
size_t history_min;
|
||||
|
||||
/** \brief OUT parameter: History required by the literal matcher to
|
||||
* correctly match all literals. */
|
||||
size_t literal_history_required;
|
||||
|
||||
/** OUT parameter: Stream state required by literal matcher in bytes. Can
|
||||
* be zero, and generally will be small (0-8 bytes). */
|
||||
size_t literal_stream_state_required;
|
||||
};
|
||||
|
||||
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
|
||||
* literals.
|
||||
*
|
||||
* \param lits The group of literals.
|
||||
* \param stream_control Streaming control parameters. If the matcher will
|
||||
* operate in non-streaming (block) mode, this pointer should be NULL.
|
||||
* \param make_small Optimise matcher for small size.
|
||||
* \param cc Compile context.
|
||||
* \param expected_groups FIXME: document me!
|
||||
*
|
||||
* Build failures are generally a result of memory allocation failure. These
|
||||
* may result in a nullptr return value, or a std::bad_alloc exception being
|
||||
* thrown.
|
||||
*/
|
||||
aligned_unique_ptr<HWLM>
|
||||
hwlmBuild(const std::vector<hwlmLiteral> &lits,
|
||||
hwlmStreamingControl *stream_control, bool make_small,
|
||||
const CompileContext &cc,
|
||||
hwlm_group_t expected_groups = HWLM_ALL_GROUPS);
|
||||
|
||||
/**
|
||||
* Returns an estimate of the number of repeated characters on the end of a
|
||||
* literal that will make a literal set of size \a numLiterals suffer
|
||||
* performance degradation.
|
||||
*/
|
||||
size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc);
|
||||
|
||||
/** \brief Return the size in bytes of an HWLM structure. */
|
||||
size_t hwlmSize(const HWLM *h);
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // HWLM_BUILD_H
|
||||
70
src/hwlm/hwlm_dump.cpp
Normal file
70
src/hwlm/hwlm_dump.cpp
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: dump code.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "hwlm_dump.h"
|
||||
#include "hwlm_internal.h"
|
||||
#include "noodle_build.h"
|
||||
#include "ue2common.h"
|
||||
#include "fdr/fdr_dump.h"
|
||||
#include "nfa/accel_dump.h"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void hwlmPrintStats(const HWLM *h, FILE *f) {
|
||||
switch (h->type) {
|
||||
case HWLM_ENGINE_NOOD:
|
||||
noodPrintStats((const noodTable *)HWLM_C_DATA(h), f);
|
||||
break;
|
||||
case HWLM_ENGINE_FDR:
|
||||
fdrPrintStats((const FDR *)HWLM_C_DATA(h), f);
|
||||
break;
|
||||
default:
|
||||
fprintf(f, "<unknown hwlm subengine>\n");
|
||||
}
|
||||
|
||||
fprintf(f, "accel1_groups: %016llx\n", h->accel1_groups);
|
||||
|
||||
fprintf(f, "accel1:");
|
||||
dumpAccelInfo(f, h->accel1);
|
||||
fprintf(f, "accel0:");
|
||||
dumpAccelInfo(f, h->accel0);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
50
src/hwlm/hwlm_dump.h
Normal file
50
src/hwlm/hwlm_dump.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: dump API.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_DUMP_H
|
||||
#define HWLM_DUMP_H
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
struct HWLM;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Dump some information about the give HWLM structure. */
|
||||
void hwlmPrintStats(const HWLM *h, FILE *f);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
#endif
|
||||
62
src/hwlm/hwlm_internal.h
Normal file
62
src/hwlm/hwlm_internal.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: data structures.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_INTERNAL_H
|
||||
#define HWLM_INTERNAL_H
|
||||
|
||||
#include "hwlm.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfa/accel.h"
|
||||
|
||||
/** \brief Underlying engine is FDR. */
|
||||
#define HWLM_ENGINE_FDR 12
|
||||
|
||||
/** \brief Underlying engine is Noodle. */
|
||||
#define HWLM_ENGINE_NOOD 16
|
||||
|
||||
/** \brief Main Hamster Wheel Literal Matcher header. Followed by
|
||||
* engine-specific structure. */
|
||||
struct HWLM {
|
||||
u8 type; /**< HWLM_ENGINE_NOOD or HWLM_ENGINE_FDR */
|
||||
hwlm_group_t accel1_groups; /**< accelerable groups. */
|
||||
union AccelAux accel1; /**< used if group mask is subset of accel1_groups */
|
||||
union AccelAux accel0; /**< fallback accel scheme */
|
||||
};
|
||||
|
||||
/** \brief Fetch a const pointer to the underlying engine. */
|
||||
#define HWLM_C_DATA(p) ((const void *)((const char *)(p) \
|
||||
+ ROUNDUP_CL(sizeof(struct HWLM))))
|
||||
|
||||
/** \brief Fetch a pointer to the underlying engine. */
|
||||
#define HWLM_DATA(p) ((void *)((char *)(p) + ROUNDUP_CL(sizeof(struct HWLM))))
|
||||
|
||||
#endif
|
||||
111
src/hwlm/hwlm_literal.cpp
Normal file
111
src/hwlm/hwlm_literal.cpp
Normal file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: literal representation at build time.
|
||||
*/
|
||||
#include "hwlm_literal.h"
|
||||
#include "util/bitutils.h" // for CASE_BIT
|
||||
#include "util/compare.h" // for ourisalpha
|
||||
#include "util/ue2string.h" // for escapeString
|
||||
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/algorithm/cxx11/all_of.hpp>
|
||||
|
||||
using namespace std;
|
||||
using namespace boost::algorithm;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#ifdef DEBUG
|
||||
static UNUSED
|
||||
std::string dumpMask(const vector<u8> &v) {
|
||||
ostringstream oss;
|
||||
vector<u8>::const_iterator it, ite;
|
||||
for (it = v.begin(), ite = v.end(); it != ite; ++it) {
|
||||
oss << setfill('0') << setw(2) << hex << (unsigned int)*it;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
bool maskIsConsistent(const std::string &s, bool nocase, const vector<u8> &msk,
|
||||
const vector<u8> &cmp) {
|
||||
string::const_reverse_iterator si = s.rbegin();
|
||||
vector<u8>::const_reverse_iterator mi = msk.rbegin(), ci = cmp.rbegin();
|
||||
|
||||
for (; si != s.rend() && mi != msk.rend(); ++si, ++mi, ++ci) {
|
||||
u8 c = *si, m = *mi, v = *ci;
|
||||
if (nocase && ourisalpha(c)) {
|
||||
m &= ~CASE_BIT;
|
||||
v &= ~CASE_BIT;
|
||||
}
|
||||
|
||||
assert(ci != cmp.rend());
|
||||
if ((c & m) != v) {
|
||||
DEBUG_PRINTF("c = %02hhx; *ci = %02hhx m =%02hhx\n", c, *ci, m);
|
||||
DEBUG_PRINTF("s = %s; dist = %zd\n", s.c_str(), si - s.rbegin());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** \brief Complete constructor, takes group information and msk/cmp.
|
||||
*
|
||||
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
|
||||
* \ref HWLM_MASKLEN. */
|
||||
hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in,
|
||||
bool noruns_in, u32 id_in, hwlm_group_t groups_in,
|
||||
const vector<u8> &msk_in, const vector<u8> &cmp_in)
|
||||
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
|
||||
groups(groups_in), msk(msk_in), cmp(cmp_in) {
|
||||
assert(msk.size() <= HWLM_MASKLEN);
|
||||
assert(msk.size() == cmp.size());
|
||||
|
||||
DEBUG_PRINTF("literal '%s', msk=%s, cmp=%s\n",
|
||||
escapeString(s).c_str(), dumpMask(msk).c_str(),
|
||||
dumpMask(cmp).c_str());
|
||||
|
||||
// Mask and compare vectors MUST be the same size.
|
||||
assert(msk.size() == cmp.size());
|
||||
|
||||
// We must have been passed a msk/cmp that can be applied to s.
|
||||
assert(maskIsConsistent(s, nocase, msk, cmp));
|
||||
|
||||
// In the name of good hygiene, zap msk/cmp if msk is all zeroes.
|
||||
if (all_of_equal(msk.begin(), msk.end(), 0)) {
|
||||
msk.clear();
|
||||
cmp.clear();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
121
src/hwlm/hwlm_literal.h
Normal file
121
src/hwlm/hwlm_literal.h
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Hamster Wheel Literal Matcher: literal representation at build time.
|
||||
*/
|
||||
|
||||
#ifndef HWLM_LITERAL_H
|
||||
#define HWLM_LITERAL_H
|
||||
|
||||
#include "hwlm.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
|
||||
#define HWLM_MASKLEN 8
|
||||
|
||||
/** \brief Class representing a literal, fed to \ref hwlmBuild. */
|
||||
struct hwlmLiteral {
|
||||
std::string s; //!< \brief The literal itself.
|
||||
|
||||
/** \brief The ID to pass to the callback if this literal matches.
|
||||
*
|
||||
* Note that the special value 0xFFFFFFFF is reserved for internal use and
|
||||
* should not be used. */
|
||||
u32 id;
|
||||
|
||||
bool nocase; //!< \brief True if literal is case-insensitive.
|
||||
|
||||
/** \brief Matches for runs of this literal can be quashed.
|
||||
*
|
||||
* Advisory flag meaning that there is no value in returning runs of
|
||||
* additional matches for a literal after the first one, so such matches
|
||||
* can be quashed by the literal matcher. */
|
||||
bool noruns;
|
||||
|
||||
/** \brief Set of groups that literal belongs to.
|
||||
*
|
||||
* Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of
|
||||
* the groups that are switched on. */
|
||||
hwlm_group_t groups;
|
||||
|
||||
/** \brief Supplementary comparison mask.
|
||||
*
|
||||
* These two values add a supplementary comparison that is done over the
|
||||
* final 8 bytes of the string -- if v is those bytes, then the string must
|
||||
* match as well as (v & msk) == cmp.
|
||||
*
|
||||
* An empty msk is the safe way of not adding any comparison to the string
|
||||
* unnecessarily filling in msk may turn off optimizations.
|
||||
*
|
||||
* The msk/cmp mechanism must NOT place a value into the literal that
|
||||
* conflicts with the contents of the string, but can be allowed to add
|
||||
* additional power within the string -- for example, to allow some case
|
||||
* sensitivity within a case-insensitive string.
|
||||
|
||||
* Values are stored in memory order -- i.e. the last byte of the mask
|
||||
* corresponds to the last byte of the string. Both vectors must be the
|
||||
* same size, and must not exceed \ref HWLM_MASKLEN in length.
|
||||
*/
|
||||
std::vector<u8> msk;
|
||||
|
||||
/** \brief Supplementary comparison value.
|
||||
*
|
||||
* See documentation for \ref msk.
|
||||
*/
|
||||
std::vector<u8> cmp;
|
||||
|
||||
/** \brief Simple constructor: no group information, no msk/cmp. */
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in)
|
||||
: s(s_in), id(id_in), nocase(nocase_in), noruns(false),
|
||||
groups(HWLM_ALL_GROUPS), msk(0), cmp(0) {}
|
||||
|
||||
/** \brief Complete constructor, takes group information and msk/cmp.
|
||||
*
|
||||
* This constructor takes a msk/cmp pair. Both must be vectors of length <=
|
||||
* \ref HWLM_MASKLEN. */
|
||||
hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in,
|
||||
u32 id_in, hwlm_group_t groups_in,
|
||||
const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in);
|
||||
};
|
||||
|
||||
/**
|
||||
* Consistency test; returns false if the given msk/cmp test can never match
|
||||
* the literal string s.
|
||||
*/
|
||||
bool maskIsConsistent(const std::string &s, bool nocase,
|
||||
const std::vector<u8> &msk, const std::vector<u8> &cmp);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // HWLM_LITERAL_H
|
||||
110
src/hwlm/noodle_build.cpp
Normal file
110
src/hwlm/noodle_build.cpp
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Noodle literal matcher: build code.
|
||||
*/
|
||||
#include <cstring> // for memcpy
|
||||
|
||||
#include "noodle_build.h"
|
||||
#include "noodle_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
size_t findNoodFragOffset(const u8 *lit, size_t len, bool nocase) {
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i + 1 < len; i++) {
|
||||
int diff = 0;
|
||||
const char c = lit[i];
|
||||
const char d = lit[i + 1];
|
||||
if (nocase && ourisalpha(c)) {
|
||||
diff = (mytoupper(c) != mytoupper(d));
|
||||
} else {
|
||||
diff = (c != d);
|
||||
}
|
||||
offset = i;
|
||||
if (diff) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
/** \brief Construct a Noodle matcher for the given literal. */
|
||||
aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
|
||||
bool nocase, u32 id) {
|
||||
size_t noodle_len = sizeof(noodTable) + len;
|
||||
aligned_unique_ptr<noodTable> n =
|
||||
aligned_zmalloc_unique<noodTable>(noodle_len);
|
||||
assert(n);
|
||||
|
||||
size_t key_offset = findNoodFragOffset(lit, len, nocase);
|
||||
|
||||
n->id = id;
|
||||
n->len = verify_u32(len);
|
||||
n->key_offset = verify_u32(key_offset);
|
||||
n->nocase = nocase ? 1 : 0;
|
||||
memcpy(n->str, lit, len);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
size_t noodSize(const noodTable *n) {
|
||||
assert(n); // shouldn't call with null
|
||||
return sizeof(*n) + n->len;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
#include <cctype>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void noodPrintStats(const noodTable *n, FILE *f) {
|
||||
fprintf(f, "Noodle table\n");
|
||||
fprintf(f, "Len: %u Key Offset: %u\n", n->len, n->key_offset);
|
||||
fprintf(f, "String: ");
|
||||
for (u32 i = 0; i < n->len; i++) {
|
||||
if (isgraph(n->str[i]) && n->str[i] != '\\') {
|
||||
fprintf(f, "%c", n->str[i]);
|
||||
} else {
|
||||
fprintf(f, "\\x%02hhx", n->str[i]);
|
||||
}
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
64
src/hwlm/noodle_build.h
Normal file
64
src/hwlm/noodle_build.h
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Noodle literal matcher: build code.
|
||||
*/
|
||||
|
||||
#ifndef NOODLE_BUILD_H_048A1A6D585A9A
|
||||
#define NOODLE_BUILD_H_048A1A6D585A9A
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
|
||||
struct noodTable;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
/** \brief Construct a Noodle matcher for the given literal. */
|
||||
ue2::aligned_unique_ptr<noodTable> noodBuildTable(const u8 *lit, size_t len,
|
||||
bool nocase, u32 id);
|
||||
|
||||
size_t noodSize(const noodTable *n);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void noodPrintStats(const noodTable *n, FILE *f);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
|
||||
#endif /* NOODLE_BUILD_H_048A1A6D585A9A */
|
||||
|
||||
364
src/hwlm/noodle_engine.c
Normal file
364
src/hwlm/noodle_engine.c
Normal file
@@ -0,0 +1,364 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Noodle literal matcher: runtime.
|
||||
*/
|
||||
#include "hwlm.h"
|
||||
#include "noodle_engine.h"
|
||||
#include "noodle_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/bitutils.h"
|
||||
#include "util/compare.h"
|
||||
#include "util/masked_move.h"
|
||||
#include "util/simd_utils.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
/** \brief Noodle runtime context. */
|
||||
struct cb_info {
|
||||
HWLMCallback cb; //!< callback function called on match
|
||||
u32 id; //!< ID to pass to callback on match
|
||||
void *ctx; //!< caller-supplied context to pass to callback
|
||||
size_t offsetAdj; //!< used in streaming mode
|
||||
};
|
||||
|
||||
#define RETURN_IF_TERMINATED(x) \
|
||||
{ \
|
||||
if ((x) == HWLM_TERMINATED) { \
|
||||
return HWLM_TERMINATED; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SINGLE_ZSCAN() \
|
||||
do { \
|
||||
while (unlikely(z)) { \
|
||||
u32 pos = findAndClearLSB_32(&z); \
|
||||
size_t matchPos = d - buf + pos; \
|
||||
hwlmcb_rv_t rv = final(buf, len, key, 1, 0, 0, noCase, cbi, \
|
||||
matchPos); \
|
||||
RETURN_IF_TERMINATED(rv); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define DOUBLE_ZSCAN() \
|
||||
do { \
|
||||
while (unlikely(z)) { \
|
||||
u32 pos = findAndClearLSB_32(&z); \
|
||||
size_t matchPos = d - buf + pos - 1; \
|
||||
hwlmcb_rv_t rv = final(buf, len, key, keyLen, keyOffset, 1, \
|
||||
noCase, cbi, matchPos); \
|
||||
RETURN_IF_TERMINATED(rv); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static really_inline
|
||||
u8 caseClear8(u8 x, bool noCase) {
|
||||
return (u8)(noCase ? (x & (u8)0xdf) : x);
|
||||
}
|
||||
|
||||
// Make sure the rest of the string is there. The single character scanner
|
||||
// is used only for single chars with case insensitivity used correctly,
|
||||
// so it can go straight to the callback if we get this far.
|
||||
static really_inline
|
||||
hwlm_error_t final(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
size_t keyOffset, bool is_double, bool noCase,
|
||||
const struct cb_info *cbi, size_t pos) {
|
||||
pos -= keyOffset;
|
||||
if (is_double) {
|
||||
if (pos + keyLen > len) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
if (cmpForward(buf + pos, key, keyLen, noCase)) { // ret 1 on mismatch
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
}
|
||||
pos += cbi->offsetAdj;
|
||||
DEBUG_PRINTF("match @ %zu->%zu\n", pos, (pos + keyLen - 1));
|
||||
hwlmcb_rv_t rv = cbi->cb(pos, (pos + keyLen - 1), cbi->id, cbi->ctx);
|
||||
if (rv == HWLM_TERMINATE_MATCHING) {
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define CHUNKSIZE 32
|
||||
#define MASK_TYPE m256
|
||||
#include "noodle_engine_avx2.c"
|
||||
#else
|
||||
#define CHUNKSIZE 16
|
||||
#define MASK_TYPE m128
|
||||
#include "noodle_engine_sse.c"
|
||||
#endif
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleMain(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, const struct cb_info *cbi) {
|
||||
hwlm_error_t rv;
|
||||
size_t end = len;
|
||||
|
||||
const MASK_TYPE mask1 = getMask(key[0], noCase);
|
||||
const MASK_TYPE caseMask = getCaseMask();
|
||||
|
||||
if (len < CHUNKSIZE) {
|
||||
rv = scanSingleShort(buf, len, key, noCase, caseMask, mask1, cbi, 0, len);
|
||||
return rv;
|
||||
}
|
||||
|
||||
if (len == CHUNKSIZE) {
|
||||
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
|
||||
0, len);
|
||||
return rv;
|
||||
}
|
||||
|
||||
uintptr_t data = (uintptr_t)buf;
|
||||
uintptr_t s2Start = ROUNDUP_N(data, CHUNKSIZE) - data;
|
||||
uintptr_t last = data + end;
|
||||
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
|
||||
uintptr_t s3Start = len - CHUNKSIZE;
|
||||
|
||||
if (s2Start) {
|
||||
// first scan out to the fast scan starting point
|
||||
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
|
||||
rv = scanSingleUnaligned(buf, len, 0, key, noCase, caseMask, mask1, cbi,
|
||||
0, s2Start);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
}
|
||||
|
||||
if (likely(s2Start != s2End)) {
|
||||
// scan as far as we can, bounded by the last point this key can
|
||||
// possibly match
|
||||
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End);
|
||||
rv = scanSingleFast(buf, len, key, noCase, caseMask, mask1, cbi,
|
||||
s2Start, s2End);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
}
|
||||
|
||||
// if we are done bail out
|
||||
if (s2End == end) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, end);
|
||||
rv = scanSingleUnaligned(buf, len, s3Start, key, noCase, caseMask, mask1,
|
||||
cbi, s2End, end);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleMain(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
hwlm_error_t rv;
|
||||
// we stop scanning for the key-fragment when the rest of the key can't
|
||||
// possibly fit in the remaining buffer
|
||||
size_t end = len - keyLen + keyOffset + 2;
|
||||
|
||||
const MASK_TYPE caseMask = getCaseMask();
|
||||
const MASK_TYPE mask1 = getMask(key[keyOffset + 0], noCase);
|
||||
const MASK_TYPE mask2 = getMask(key[keyOffset + 1], noCase);
|
||||
|
||||
if (end - keyOffset < CHUNKSIZE) {
|
||||
rv = scanDoubleShort(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||
mask1, mask2, cbi, keyOffset, end);
|
||||
return rv;
|
||||
}
|
||||
if (end - keyOffset == CHUNKSIZE) {
|
||||
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
|
||||
noCase, caseMask, mask1, mask2, cbi, keyOffset,
|
||||
end);
|
||||
return rv;
|
||||
}
|
||||
|
||||
uintptr_t data = (uintptr_t)buf;
|
||||
uintptr_t s2Start = ROUNDUP_N(data + keyOffset, CHUNKSIZE) - data;
|
||||
uintptr_t s1End = s2Start + 1;
|
||||
uintptr_t last = data + end;
|
||||
uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data;
|
||||
uintptr_t s3Start = end - CHUNKSIZE;
|
||||
uintptr_t off = keyOffset;
|
||||
|
||||
if (s2Start != keyOffset) {
|
||||
// first scan out to the fast scan starting point plus one char past to
|
||||
// catch the key on the overlap
|
||||
DEBUG_PRINTF("stage 1: -> %zu\n", s2Start);
|
||||
rv = scanDoubleUnaligned(buf, len, keyOffset, key, keyLen, keyOffset,
|
||||
noCase, caseMask, mask1, mask2, cbi, off,
|
||||
s1End);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
}
|
||||
off = s1End;
|
||||
|
||||
if (s2Start >= end) {
|
||||
DEBUG_PRINTF("s2 == mL %zu\n", end);
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
if (likely(s2Start != s2End)) {
|
||||
// scan as far as we can, bounded by the last point this key can
|
||||
// possibly match
|
||||
DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start);
|
||||
rv = scanDoubleFast(buf, len, key, keyLen, keyOffset, noCase, caseMask,
|
||||
mask1, mask2, cbi, s2Start, s2End);
|
||||
RETURN_IF_TERMINATED(rv);
|
||||
off = s2End;
|
||||
}
|
||||
|
||||
// if there isn't enough data left to match the key, bail out
|
||||
if (s2End == end) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end);
|
||||
rv = scanDoubleUnaligned(buf, len, s3Start, key, keyLen, keyOffset, noCase,
|
||||
caseMask, mask1, mask2, cbi, off, end);
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleNoCase(const u8 *buf, size_t len, const u8 *key,
|
||||
const struct cb_info *cbi) {
|
||||
return scanSingleMain(buf, len, key, 1, cbi);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleCase(const u8 *buf, size_t len, const u8 *key,
|
||||
const struct cb_info *cbi) {
|
||||
return scanSingleMain(buf, len, key, 0, cbi);
|
||||
}
|
||||
|
||||
// Single-character specialisation, used when keyLen = 1
|
||||
static really_inline
|
||||
hwlm_error_t scanSingle(const u8 *buf, size_t len, const u8 *key, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
if (!ourisalpha(key[0])) {
|
||||
noCase = 0; // force noCase off if we don't have an alphabetic char
|
||||
}
|
||||
|
||||
// kinda ugly, but this forces constant propagation
|
||||
if (noCase) {
|
||||
return scanSingleNoCase(buf, len, key, cbi);
|
||||
} else {
|
||||
return scanSingleCase(buf, len, key, cbi);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleNoCase(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset,
|
||||
const struct cb_info *cbi) {
|
||||
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 1, cbi);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleCase(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset,
|
||||
const struct cb_info *cbi) {
|
||||
return scanDoubleMain(buf, len, key, keyLen, keyOffset, 0, cbi);
|
||||
}
|
||||
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDouble(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
size_t keyOffset, bool noCase,
|
||||
const struct cb_info *cbi) {
|
||||
// kinda ugly, but this forces constant propagation
|
||||
if (noCase) {
|
||||
return scanDoubleNoCase(buf, len, key, keyLen, keyOffset, cbi);
|
||||
} else {
|
||||
return scanDoubleCase(buf, len, key, keyLen, keyOffset, cbi);
|
||||
}
|
||||
}
|
||||
|
||||
// main entry point for the scan code
|
||||
static really_inline
|
||||
hwlm_error_t scan(const u8 *buf, size_t len, const u8 *key, size_t keyLen,
|
||||
size_t keyOffset, bool noCase, const struct cb_info *cbi) {
|
||||
if (len < keyLen) {
|
||||
// can't find string of length keyLen in a shorter buffer
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
if (keyLen == 1) {
|
||||
assert(keyOffset == 0);
|
||||
return scanSingle(buf, len, key, noCase, cbi);
|
||||
} else {
|
||||
return scanDouble(buf, len, key, keyLen, keyOffset, noCase, cbi);
|
||||
}
|
||||
}
|
||||
|
||||
/** \brief Block-mode scanner. */
|
||||
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
size_t offset_adj, HWLMCallback cb, void *ctxt) {
|
||||
assert(n && buf);
|
||||
|
||||
struct cb_info cbi = { cb, n->id, ctxt, offset_adj };
|
||||
DEBUG_PRINTF("nood scan of %zu bytes for %*s\n", len, n->len, n->str);
|
||||
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
|
||||
}
|
||||
|
||||
/** \brief Streaming-mode scanner. */
|
||||
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
HWLMCallback cb, void *ctxt, u8 *temp_buf,
|
||||
UNUSED size_t temp_buffer_size) {
|
||||
assert(n);
|
||||
|
||||
struct cb_info cbi = {cb, n->id, ctxt, 0};
|
||||
hwlm_error_t rv;
|
||||
|
||||
if (hlen) {
|
||||
assert(hbuf);
|
||||
|
||||
size_t tl1 = MIN(n->len - 1, hlen);
|
||||
size_t tl2 = MIN(n->len - 1, len);
|
||||
size_t temp_len = tl1 + tl2;
|
||||
assert(temp_len < temp_buffer_size);
|
||||
memcpy(temp_buf, hbuf + hlen - tl1, tl1);
|
||||
memcpy(temp_buf + tl1, buf, tl2);
|
||||
|
||||
cbi.offsetAdj = -tl1;
|
||||
rv = scan(temp_buf, temp_len, n->str, n->len, n->key_offset, n->nocase,
|
||||
&cbi);
|
||||
if (rv == HWLM_TERMINATED) {
|
||||
return HWLM_TERMINATED;
|
||||
}
|
||||
}
|
||||
|
||||
assert(buf);
|
||||
|
||||
cbi.offsetAdj = 0;
|
||||
return scan(buf, len, n->str, n->len, n->key_offset, n->nocase, &cbi);
|
||||
}
|
||||
59
src/hwlm/noodle_engine.h
Normal file
59
src/hwlm/noodle_engine.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Noodle literal matcher: runtime API.
|
||||
*/
|
||||
|
||||
#ifndef NOODLE_ENGINE_H
|
||||
#define NOODLE_ENGINE_H
|
||||
|
||||
#include "hwlm.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
struct noodTable;
|
||||
|
||||
/** \brief Block-mode scanner. */
|
||||
hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len,
|
||||
size_t offset_adj, HWLMCallback cb, void *ctxt);
|
||||
|
||||
/** \brief Streaming-mode scanner. */
|
||||
hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf,
|
||||
size_t hlen, const u8 *buf, size_t len,
|
||||
HWLMCallback cb, void *ctxt, u8 *temp_buf,
|
||||
size_t temp_buffer_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif
|
||||
234
src/hwlm/noodle_engine_avx2.c
Normal file
234
src/hwlm/noodle_engine_avx2.c
Normal file
@@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* noodle scan parts for AVX */
|
||||
|
||||
static really_inline m256 getMask(u8 c, bool noCase) {
|
||||
u8 k = caseClear8(c, noCase);
|
||||
return set32x8(k);
|
||||
}
|
||||
|
||||
static really_inline m256 getCaseMask(void) {
|
||||
return set32x8(0xdf);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
const u8 *key, bool noCase, m256 caseMask,
|
||||
m256 mask1, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
const u8 *d = buf + offset;
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
const size_t l = end - start;
|
||||
|
||||
m256 v = loadu256(d);
|
||||
|
||||
if (noCase) {
|
||||
v = and256(v, caseMask);
|
||||
}
|
||||
|
||||
u32 z = movemask256(eq256(mask1, v));
|
||||
|
||||
u32 buf_off = start - offset;
|
||||
u32 mask = (u32)((u64a)(1ULL << l) - 1) << buf_off;
|
||||
DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
|
||||
|
||||
z &= mask;
|
||||
|
||||
SINGLE_ZSCAN();
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
const u8 *key, size_t keyLen, size_t keyOffset,
|
||||
bool noCase, m256 caseMask, m256 mask1,
|
||||
m256 mask2, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
const u8 *d = buf + offset;
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
size_t l = end - start;
|
||||
|
||||
m256 v = loadu256(d);
|
||||
|
||||
if (noCase) {
|
||||
v = and256(v, caseMask);
|
||||
}
|
||||
|
||||
u32 z0 = movemask256(eq256(mask1, v));
|
||||
u32 z1 = movemask256(eq256(mask2, v));
|
||||
u32 z = (z0 << 1) & z1;
|
||||
|
||||
// mask out where we can't match
|
||||
u32 buf_off = start - offset;
|
||||
u32 mask = (u32)((u64a)(1ULL << l) - 1) << buf_off;
|
||||
DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
|
||||
z &= mask;
|
||||
|
||||
DOUBLE_ZSCAN();
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
// The short scan routine. It is used both to scan data up to an
|
||||
// alignment boundary if needed and to finish off data that the aligned scan
|
||||
// function can't handle (due to small/unaligned chunk at end)
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, m256 caseMask, m256 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
size_t l = end - start;
|
||||
DEBUG_PRINTF("l %zu\n", l);
|
||||
assert(l <= 32);
|
||||
if (!l) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
m256 v;
|
||||
|
||||
if (l < 4) {
|
||||
u8 *vp = (u8*)&v;
|
||||
switch (l) {
|
||||
case 3: vp[2] = d[2];
|
||||
case 2: vp[1] = d[1];
|
||||
case 1: vp[0] = d[0];
|
||||
}
|
||||
} else {
|
||||
v = masked_move256_len(d, l);
|
||||
}
|
||||
|
||||
if (noCase) {
|
||||
v = and256(v, caseMask);
|
||||
}
|
||||
|
||||
// mask out where we can't match
|
||||
u32 mask = (0xFFFFFFFF >> (32 - l));
|
||||
|
||||
u32 z = mask & movemask256(eq256(mask1, v));
|
||||
|
||||
SINGLE_ZSCAN();
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m256 caseMask, m256 mask1, m256 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
size_t l = end - start;
|
||||
if (!l) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
assert(l <= 32);
|
||||
m256 v;
|
||||
|
||||
DEBUG_PRINTF("d %zu\n", d - buf);
|
||||
if (l < 4) {
|
||||
u8 *vp = (u8*)&v;
|
||||
switch (l) {
|
||||
case 3: vp[2] = d[2];
|
||||
case 2: vp[1] = d[1];
|
||||
case 1: vp[0] = d[0];
|
||||
}
|
||||
} else {
|
||||
v = masked_move256_len(d, l);
|
||||
}
|
||||
if (noCase) {
|
||||
v = and256(v, caseMask);
|
||||
}
|
||||
|
||||
u32 z0 = movemask256(eq256(mask1, v));
|
||||
u32 z1 = movemask256(eq256(mask2, v));
|
||||
u32 z = (z0 << 1) & z1;
|
||||
|
||||
// mask out where we can't match
|
||||
u32 mask = (0xFFFFFFFF >> (32 - l));
|
||||
z &= mask;
|
||||
|
||||
DOUBLE_ZSCAN();
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, m256 caseMask, m256 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
assert(d < e);
|
||||
|
||||
for (; d < e; d += 32) {
|
||||
m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
|
||||
|
||||
u32 z = movemask256(eq256(mask1, v));
|
||||
|
||||
// On large packet buffers, this prefetch appears to get us about 2%.
|
||||
__builtin_prefetch(d + 128);
|
||||
|
||||
SINGLE_ZSCAN();
|
||||
}
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m256 caseMask, m256 mask1, m256 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
DEBUG_PRINTF("start %zu end %zu \n", start, end);
|
||||
assert(d < e);
|
||||
u8 lastz0 = 0;
|
||||
|
||||
for (; d < e; d += 32) {
|
||||
m256 v = noCase ? and256(load256(d), caseMask) : load256(d);
|
||||
|
||||
// we have to pull the masks out of the AVX registers because we can't
|
||||
// byte shift between the lanes
|
||||
u32 z0 = movemask256(eq256(mask1, v));
|
||||
u32 z1 = movemask256(eq256(mask2, v));
|
||||
u32 z = (lastz0 | (z0 << 1)) & z1;
|
||||
lastz0 = (z0 & 0x80000000) >> 31;
|
||||
|
||||
// On large packet buffers, this prefetch appears to get us about 2%.
|
||||
__builtin_prefetch(d + 128);
|
||||
|
||||
DOUBLE_ZSCAN();
|
||||
|
||||
}
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
202
src/hwlm/noodle_engine_sse.c
Normal file
202
src/hwlm/noodle_engine_sse.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* noodle scan parts for SSE */
|
||||
|
||||
static really_inline m128 getMask(u8 c, bool noCase) {
|
||||
u8 k = caseClear8(c, noCase);
|
||||
return set16x8(k);
|
||||
}
|
||||
|
||||
static really_inline m128 getCaseMask(void) {
|
||||
return set16x8(0xdf);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, m128 caseMask, m128 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
size_t l = end - start;
|
||||
DEBUG_PRINTF("l %zu\n", l);
|
||||
assert(l <= 16);
|
||||
if (!l) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
m128 v = zeroes128();
|
||||
// we don't have a clever way of doing this move yet
|
||||
memcpy(&v, d, l);
|
||||
if (noCase) {
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
|
||||
// mask out where we can't match
|
||||
u32 mask = (0xFFFF >> (16 - l));
|
||||
|
||||
u32 z = mask & movemask128(eq128(mask1, v));
|
||||
|
||||
SINGLE_ZSCAN();
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
const u8 *key, bool noCase, m128 caseMask,
|
||||
m128 mask1, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
const u8 *d = buf + offset;
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
const size_t l = end - start;
|
||||
|
||||
m128 v = loadu128(d);
|
||||
|
||||
if (noCase) {
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
|
||||
u32 buf_off = start - offset;
|
||||
u32 mask = ((1 << l) - 1) << buf_off;
|
||||
|
||||
u32 z = mask & movemask128(eq128(mask1, v));
|
||||
|
||||
DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
|
||||
|
||||
z &= mask;
|
||||
|
||||
SINGLE_ZSCAN();
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleShort(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m128 caseMask, m128 mask1, m128 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start;
|
||||
size_t l = end - start;
|
||||
if (!l) {
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
assert(l <= 32);
|
||||
|
||||
DEBUG_PRINTF("d %zu\n", d - buf);
|
||||
m128 v = zeroes128();
|
||||
memcpy(&v, d, l);
|
||||
if (noCase) {
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
|
||||
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
|
||||
|
||||
// mask out where we can't match
|
||||
u32 mask = (0xFFFF >> (16 - l));
|
||||
z &= mask;
|
||||
|
||||
DOUBLE_ZSCAN();
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleUnaligned(const u8 *buf, size_t len, size_t offset,
|
||||
const u8 *key, size_t keyLen, size_t keyOffset,
|
||||
bool noCase, m128 caseMask, m128 mask1,
|
||||
m128 mask2, const struct cb_info *cbi,
|
||||
size_t start, size_t end) {
|
||||
const u8 *d = buf + offset;
|
||||
DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
|
||||
size_t l = end - start;
|
||||
|
||||
m128 v = loadu128(d);
|
||||
|
||||
if (noCase) {
|
||||
v = and128(v, caseMask);
|
||||
}
|
||||
|
||||
u32 z = movemask128(and128(shiftLeft8Bits(eq128(mask1, v)), eq128(mask2, v)));
|
||||
|
||||
// mask out where we can't match
|
||||
u32 buf_off = start - offset;
|
||||
u32 mask = ((1 << l) - 1) << buf_off;
|
||||
DEBUG_PRINTF("mask 0x%08x z 0x%08x\n", mask, z);
|
||||
z &= mask;
|
||||
|
||||
DOUBLE_ZSCAN();
|
||||
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanSingleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
bool noCase, m128 caseMask, m128 mask1,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
assert(d < e);
|
||||
|
||||
for (; d < e; d += 16) {
|
||||
m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
|
||||
|
||||
u32 z = movemask128(eq128(mask1, v));
|
||||
|
||||
// On large packet buffers, this prefetch appears to get us about 2%.
|
||||
__builtin_prefetch(d + 128);
|
||||
|
||||
SINGLE_ZSCAN();
|
||||
}
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
|
||||
static really_inline
|
||||
hwlm_error_t scanDoubleFast(const u8 *buf, size_t len, const u8 *key,
|
||||
size_t keyLen, size_t keyOffset, bool noCase,
|
||||
m128 caseMask, m128 mask1, m128 mask2,
|
||||
const struct cb_info *cbi, size_t start,
|
||||
size_t end) {
|
||||
const u8 *d = buf + start, *e = buf + end;
|
||||
assert(d < e);
|
||||
m128 lastz1 = zeroes128();
|
||||
|
||||
for (; d < e; d += 16) {
|
||||
m128 v = noCase ? and128(load128(d), caseMask) : load128(d);
|
||||
m128 z1 = eq128(mask1, v);
|
||||
m128 z2 = eq128(mask2, v);
|
||||
u32 z = movemask128(and128(or128(lastz1, shiftLeft8Bits(z1)), z2));
|
||||
lastz1 = _mm_srli_si128(z1, 15);
|
||||
|
||||
// On large packet buffers, this prefetch appears to get us about 2%.
|
||||
__builtin_prefetch(d + 128);
|
||||
DEBUG_PRINTF("z 0x%08x\n", z);
|
||||
DOUBLE_ZSCAN();
|
||||
}
|
||||
return HWLM_SUCCESS;
|
||||
}
|
||||
47
src/hwlm/noodle_internal.h
Normal file
47
src/hwlm/noodle_internal.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Data structures for Noodle literal matcher engine.
|
||||
*/
|
||||
|
||||
#ifndef NOODLE_INTERNAL_H_25D751C42E34A6
|
||||
#define NOODLE_INTERNAL_H_25D751C42E34A6
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
struct noodTable {
|
||||
u32 id;
|
||||
u32 len;
|
||||
u32 key_offset;
|
||||
u8 nocase;
|
||||
u8 str[];
|
||||
};
|
||||
|
||||
#endif /* NOODLE_INTERNAL_H_25D751C42E34A6 */
|
||||
|
||||
131
src/nfa/accel.c
Normal file
131
src/nfa/accel.c
Normal file
@@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "accel.h"
|
||||
#include "shufti.h"
|
||||
#include "truffle.h"
|
||||
#include "vermicelli.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
|
||||
assert(ISALIGNED_N(accel, alignof(union AccelAux)));
|
||||
const u8 *rv;
|
||||
|
||||
switch (accel->accel_type) {
|
||||
case ACCEL_NONE:
|
||||
DEBUG_PRINTF("accel none %p %p\n", c, c_end);
|
||||
return c;
|
||||
|
||||
case ACCEL_VERM:
|
||||
DEBUG_PRINTF("accel verm %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = vermicelliExec(accel->verm.c, 0, c, c_end);
|
||||
break;
|
||||
|
||||
case ACCEL_VERM_NOCASE:
|
||||
DEBUG_PRINTF("accel verm nc %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = vermicelliExec(accel->verm.c, 1, c, c_end);
|
||||
break;
|
||||
|
||||
case ACCEL_DVERM:
|
||||
DEBUG_PRINTF("accel dverm %p %p\n", c, c_end);
|
||||
if (c + 16 + 1 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
/* need to stop one early to get an accurate end state */
|
||||
rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 0, c,
|
||||
c_end - 1);
|
||||
break;
|
||||
|
||||
case ACCEL_DVERM_NOCASE:
|
||||
DEBUG_PRINTF("accel dverm nc %p %p\n", c, c_end);
|
||||
if (c + 16 + 1 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
/* need to stop one early to get an accurate end state */
|
||||
rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 1, c,
|
||||
c_end - 1);
|
||||
break;
|
||||
|
||||
case ACCEL_SHUFTI:
|
||||
DEBUG_PRINTF("accel shufti %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = shuftiExec(accel->shufti.lo, accel->shufti.hi, c, c_end);
|
||||
break;
|
||||
|
||||
case ACCEL_TRUFFLE:
|
||||
DEBUG_PRINTF("accel Truffle %p %p\n", c, c_end);
|
||||
if (c + 15 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
rv = truffleExec(accel->truffle.mask1, accel->truffle.mask2, c, c_end);
|
||||
break;
|
||||
|
||||
case ACCEL_DSHUFTI:
|
||||
DEBUG_PRINTF("accel dshufti %p %p\n", c, c_end);
|
||||
if (c + 15 + 1 >= c_end) {
|
||||
return c;
|
||||
}
|
||||
|
||||
/* need to stop one early to get an accurate end state */
|
||||
rv = shuftiDoubleExec(accel->dshufti.lo1,
|
||||
accel->dshufti.hi1,
|
||||
accel->dshufti.lo2,
|
||||
accel->dshufti.hi2, c, c_end - 1);
|
||||
break;
|
||||
|
||||
case ACCEL_RED_TAPE:
|
||||
DEBUG_PRINTF("accel red tape %p %p\n", c, c_end);
|
||||
rv = c_end;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(!"not here");
|
||||
return c;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("adjusting for offset %u\n", accel->generic.offset);
|
||||
/* adjust offset to take into account the offset */
|
||||
rv = MAX(c + accel->generic.offset, rv);
|
||||
rv -= accel->generic.offset;
|
||||
|
||||
return rv;
|
||||
}
|
||||
112
src/nfa/accel.h
Normal file
112
src/nfa/accel.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Acceleration: data structures and common definitions.
|
||||
*/
|
||||
|
||||
#ifndef ACCEL_H
|
||||
#define ACCEL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
/* run time defs */
|
||||
#define BAD_ACCEL_DIST 4
|
||||
#define SMALL_ACCEL_PENALTY 8
|
||||
#define BIG_ACCEL_PENALTY 32
|
||||
|
||||
/// Minimum length of the scan buffer for us to attempt acceleration.
|
||||
#define ACCEL_MIN_LEN 16
|
||||
|
||||
enum AccelType {
|
||||
ACCEL_NONE,
|
||||
ACCEL_VERM,
|
||||
ACCEL_VERM_NOCASE,
|
||||
ACCEL_DVERM,
|
||||
ACCEL_DVERM_NOCASE,
|
||||
ACCEL_RVERM,
|
||||
ACCEL_RVERM_NOCASE,
|
||||
ACCEL_RDVERM,
|
||||
ACCEL_RDVERM_NOCASE,
|
||||
ACCEL_REOD,
|
||||
ACCEL_REOD_NOCASE,
|
||||
ACCEL_RDEOD,
|
||||
ACCEL_RDEOD_NOCASE,
|
||||
ACCEL_SHUFTI,
|
||||
ACCEL_DSHUFTI,
|
||||
ACCEL_TRUFFLE,
|
||||
ACCEL_RED_TAPE
|
||||
};
|
||||
|
||||
/** \brief Structure for accel framework. */
|
||||
union AccelAux {
|
||||
u8 accel_type;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
} generic;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
u8 c; // uppercase if nocase
|
||||
} verm;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
u8 c1; // uppercase if nocase
|
||||
u8 c2; // uppercase if nocase
|
||||
} dverm;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
m128 lo;
|
||||
m128 hi;
|
||||
} shufti;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
m128 lo1;
|
||||
m128 hi1;
|
||||
m128 lo2;
|
||||
m128 hi2;
|
||||
} dshufti;
|
||||
struct {
|
||||
u8 accel_type;
|
||||
u8 offset;
|
||||
m128 mask1;
|
||||
m128 mask2;
|
||||
} truffle;
|
||||
};
|
||||
|
||||
/**
|
||||
* Runs the specified acceleration scheme between c and c_end, returns a point
|
||||
* such that the acceleration scheme does not match before.
|
||||
*/
|
||||
const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end);
|
||||
|
||||
#endif
|
||||
152
src/nfa/accel_dump.cpp
Normal file
152
src/nfa/accel_dump.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Acceleration: dump code.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "accel.h"
|
||||
#include "accel_dump.h"
|
||||
#include "shufticompile.h"
|
||||
#include "trufflecompile.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/dump_mask.h"
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
const char *accelName(u8 accel_type) {
|
||||
switch (accel_type) {
|
||||
case ACCEL_NONE:
|
||||
return "none";
|
||||
case ACCEL_VERM:
|
||||
return "vermicelli";
|
||||
case ACCEL_VERM_NOCASE:
|
||||
return "vermicelli nocase";
|
||||
case ACCEL_DVERM:
|
||||
return "double-vermicelli";
|
||||
case ACCEL_DVERM_NOCASE:
|
||||
return "double-vermicelli nocase";
|
||||
case ACCEL_RVERM:
|
||||
return "reverse vermicelli";
|
||||
case ACCEL_RVERM_NOCASE:
|
||||
return "reverse vermicelli nocase";
|
||||
case ACCEL_RDVERM:
|
||||
return "reverse double-vermicelli";
|
||||
case ACCEL_RDVERM_NOCASE:
|
||||
return "reverse double-vermicelli nocase";
|
||||
case ACCEL_REOD:
|
||||
return "reverse eod";
|
||||
case ACCEL_REOD_NOCASE:
|
||||
return "reverse eod nocase";
|
||||
case ACCEL_RDEOD:
|
||||
return "reverse double-eod";
|
||||
case ACCEL_RDEOD_NOCASE:
|
||||
return "reverse double-eod nocase";
|
||||
case ACCEL_SHUFTI:
|
||||
return "shufti";
|
||||
case ACCEL_DSHUFTI:
|
||||
return "double-shufti";
|
||||
case ACCEL_TRUFFLE:
|
||||
return "truffle";
|
||||
case ACCEL_RED_TAPE:
|
||||
return "red tape";
|
||||
default:
|
||||
return "unknown!";
|
||||
}
|
||||
}
|
||||
|
||||
void dumpAccelInfo(FILE *f, const AccelAux &accel) {
|
||||
fprintf(f, " %s", accelName(accel.accel_type));
|
||||
if (accel.generic.offset) {
|
||||
fprintf(f, "+%hhu", accel.generic.offset);
|
||||
}
|
||||
|
||||
switch (accel.accel_type) {
|
||||
case ACCEL_VERM:
|
||||
case ACCEL_VERM_NOCASE:
|
||||
case ACCEL_RVERM:
|
||||
case ACCEL_RVERM_NOCASE:
|
||||
fprintf(f, " [\\x%02hhx]\n", accel.verm.c);
|
||||
break;
|
||||
case ACCEL_DVERM:
|
||||
case ACCEL_DVERM_NOCASE:
|
||||
case ACCEL_RDVERM:
|
||||
case ACCEL_RDVERM_NOCASE:
|
||||
fprintf(f, " [\\x%02hhx\\x%02hhx]\n", accel.dverm.c1, accel.dverm.c2);
|
||||
break;
|
||||
case ACCEL_SHUFTI: {
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "lo %s\n",
|
||||
dumpMask((const u8 *)&accel.shufti.lo, 128).c_str());
|
||||
fprintf(f, "hi %s\n",
|
||||
dumpMask((const u8 *)&accel.shufti.hi, 128).c_str());
|
||||
CharReach cr = shufti2cr(accel.shufti.lo, accel.shufti.hi);
|
||||
fprintf(f, "count %zu class %s\n", cr.count(),
|
||||
describeClass(cr).c_str());
|
||||
break;
|
||||
}
|
||||
case ACCEL_DSHUFTI:
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "lo1 %s\n",
|
||||
dumpMask((const u8 *)&accel.dshufti.lo1, 128).c_str());
|
||||
fprintf(f, "hi1 %s\n",
|
||||
dumpMask((const u8 *)&accel.dshufti.hi1, 128).c_str());
|
||||
fprintf(f, "lo2 %s\n",
|
||||
dumpMask((const u8 *)&accel.dshufti.lo2, 128).c_str());
|
||||
fprintf(f, "hi2 %s\n",
|
||||
dumpMask((const u8 *)&accel.dshufti.hi2, 128).c_str());
|
||||
break;
|
||||
case ACCEL_TRUFFLE: {
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "lo %s\n",
|
||||
dumpMask((const u8 *)&accel.truffle.mask1, 128).c_str());
|
||||
fprintf(f, "hi %s\n",
|
||||
dumpMask((const u8 *)&accel.truffle.mask2, 128).c_str());
|
||||
CharReach cr = truffle2cr(accel.truffle.mask1, accel.truffle.mask2);
|
||||
fprintf(f, "count %zu class %s\n", cr.count(),
|
||||
describeClass(cr).c_str());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fprintf(f, "\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
49
src/nfa/accel_dump.h
Normal file
49
src/nfa/accel_dump.h
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Acceleration: dump code.
|
||||
*/
|
||||
|
||||
#ifndef ACCEL_DUMP_H
|
||||
#define ACCEL_DUMP_H
|
||||
|
||||
#if defined(DUMP_SUPPORT)
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
union AccelAux;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void dumpAccelInfo(FILE *f, const AccelAux &accel);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
#endif // ACCEL_DUMP_H
|
||||
191
src/nfa/accelcompile.cpp
Normal file
191
src/nfa/accelcompile.cpp
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "accel.h"
|
||||
#include "accelcompile.h"
|
||||
#include "shufticompile.h"
|
||||
#include "trufflecompile.h"
|
||||
#include "nfagraph/ng_limex_accel.h" /* for constants */
|
||||
#include "util/bitutils.h"
|
||||
#include "util/verify_types.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
|
||||
assert(aux->accel_type == ACCEL_NONE);
|
||||
if (info.single_stops.all()) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t outs = info.single_stops.count();
|
||||
DEBUG_PRINTF("%zu outs\n", outs);
|
||||
assert(outs && outs < 256);
|
||||
u32 offset = info.single_offset;
|
||||
|
||||
if (outs == 1) {
|
||||
aux->accel_type = ACCEL_VERM;
|
||||
aux->verm.offset = offset;
|
||||
aux->verm.c = info.single_stops.find_first();
|
||||
DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c);
|
||||
return;
|
||||
}
|
||||
|
||||
if (outs == 2 && info.single_stops.isCaselessChar()) {
|
||||
aux->accel_type = ACCEL_VERM_NOCASE;
|
||||
aux->verm.offset = offset;
|
||||
aux->verm.c = info.single_stops.find_first() & CASE_CLEAR;
|
||||
DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n",
|
||||
aux->verm.c);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("attempting shufti for %zu chars\n", outs);
|
||||
if (-1 != shuftiBuildMasks(info.single_stops, &aux->shufti.lo,
|
||||
&aux->shufti.hi)) {
|
||||
aux->accel_type = ACCEL_SHUFTI;
|
||||
aux->shufti.offset = offset;
|
||||
DEBUG_PRINTF("shufti built OK\n");
|
||||
return;
|
||||
} else {
|
||||
DEBUG_PRINTF("shufti build failed, falling through\n");
|
||||
}
|
||||
|
||||
if (outs <= ACCEL_MAX_STOP_CHAR) {
|
||||
DEBUG_PRINTF("building Truffle for %zu chars\n", outs);
|
||||
aux->accel_type = ACCEL_TRUFFLE;
|
||||
aux->truffle.offset = offset;
|
||||
truffleBuildMasks(info.single_stops, &aux->truffle.mask1,
|
||||
&aux->truffle.mask2);
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs);
|
||||
}
|
||||
|
||||
static
|
||||
bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) {
|
||||
// test for vector containing <A,Z> <A,z> <a,Z> <a,z>
|
||||
if (stop.size() != 4) {
|
||||
return false;
|
||||
}
|
||||
const u8 a = stop.begin()->first & CASE_CLEAR;
|
||||
const u8 b = stop.begin()->second & CASE_CLEAR;
|
||||
|
||||
flat_set<pair<u8, u8>>::const_iterator it, ite;
|
||||
for (it = stop.begin(), ite = stop.end(); it != ite; ++it) {
|
||||
if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
|
||||
size_t outs1 = info.double_stop1.count();
|
||||
size_t outs2 = info.double_stop2.size();
|
||||
|
||||
u8 offset = verify_u8(info.double_offset);
|
||||
DEBUG_PRINTF("outs1=%zu, outs2=%zu\n", outs1, outs2);
|
||||
|
||||
assert(aux->accel_type == ACCEL_NONE);
|
||||
|
||||
if (!outs2) {
|
||||
/* no double byte accel available */
|
||||
return;
|
||||
}
|
||||
|
||||
// double-byte accel
|
||||
if (outs1 == 0 && outs2 == 1) {
|
||||
aux->accel_type = ACCEL_DVERM;
|
||||
aux->dverm.offset = offset;
|
||||
aux->dverm.c1 = info.double_stop2.begin()->first;
|
||||
aux->dverm.c2 = info.double_stop2.begin()->second;
|
||||
DEBUG_PRINTF("building double-vermicelli caseful for 0x%02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
return;
|
||||
}
|
||||
|
||||
if (outs1 == 0 && isCaselessDouble(info.double_stop2)) {
|
||||
aux->accel_type = ACCEL_DVERM_NOCASE;
|
||||
aux->dverm.offset = offset;
|
||||
aux->dverm.c1 = info.double_stop2.begin()->first & CASE_CLEAR;
|
||||
aux->dverm.c2 = info.double_stop2.begin()->second & CASE_CLEAR;
|
||||
DEBUG_PRINTF("building double-vermicelli caseless for 0x%02hhx%02hhx\n",
|
||||
aux->dverm.c1, aux->dverm.c2);
|
||||
return;
|
||||
}
|
||||
|
||||
if (outs1 + outs2 <= 8) {
|
||||
if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
|
||||
DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu"
|
||||
" two-byte literals\n", outs1, outs2);
|
||||
aux->accel_type = ACCEL_DSHUFTI;
|
||||
aux->dshufti.offset = offset;
|
||||
shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
|
||||
&aux->dshufti.lo1,
|
||||
&aux->dshufti.hi1,
|
||||
&aux->dshufti.lo2,
|
||||
&aux->dshufti.hi2);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// drop back to attempt single-byte accel
|
||||
DEBUG_PRINTF("dropping back to single-byte acceleration\n");
|
||||
aux->accel_type = ACCEL_NONE;
|
||||
}
|
||||
|
||||
bool buildAccelAux(const AccelInfo &info, AccelAux *aux) {
|
||||
assert(aux->accel_type == ACCEL_NONE);
|
||||
if (info.single_stops.none()) {
|
||||
DEBUG_PRINTF("picked red tape\n");
|
||||
aux->accel_type = ACCEL_RED_TAPE;
|
||||
aux->generic.offset = info.single_offset;
|
||||
} else {
|
||||
buildAccelDouble(info, aux);
|
||||
}
|
||||
if (aux->accel_type == ACCEL_NONE) {
|
||||
buildAccelSingle(info, aux);
|
||||
}
|
||||
|
||||
assert(aux->accel_type == ACCEL_NONE
|
||||
|| aux->generic.offset == info.single_offset
|
||||
|| aux->generic.offset == info.double_offset);
|
||||
return aux->accel_type != ACCEL_NONE;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
56
src/nfa/accelcompile.h
Normal file
56
src/nfa/accelcompile.h
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ACCEL_COMPILE_H
|
||||
#define ACCEL_COMPILE_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
union AccelAux;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct AccelInfo {
|
||||
AccelInfo() : single_offset(0U), double_offset(0U),
|
||||
single_stops(CharReach::dot()) {}
|
||||
u32 single_offset; /**< offset correction to apply to single schemes */
|
||||
u32 double_offset; /**< offset correction to apply to double schemes */
|
||||
CharReach double_stop1; /**< single-byte accel stop literals for double
|
||||
* schemes */
|
||||
flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop
|
||||
* literals */
|
||||
CharReach single_stops; /**< escapes for single byte acceleration */
|
||||
};
|
||||
|
||||
bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
76
src/nfa/callback.h
Normal file
76
src/nfa/callback.h
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief NFA Callback definitions, used at runtime.
|
||||
*/
|
||||
|
||||
#ifndef NFA_CALLBACK_H
|
||||
#define NFA_CALLBACK_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
/** \brief The type for an NFA callback.
|
||||
*
|
||||
* This is a function that takes as arguments the current offset where the
|
||||
* match occurs, the id of the match and the context pointer that was passed
|
||||
* into the NFA API function that executed the NFA.
|
||||
*
|
||||
* The offset where the match occurs will be the offset after the character
|
||||
* that caused the match. Thus, if we have a buffer containing 'abc', then a
|
||||
* pattern that matches an empty string will have an offset of 0, a pattern
|
||||
* that matches 'a' will have an offset of 1, and a pattern that matches 'abc'
|
||||
* will have an offset of 3, which will be a value that is 'beyond' the size of
|
||||
* the buffer. That is, if we have n characters in the buffer, there are n+1
|
||||
* different potential offsets for matches.
|
||||
*
|
||||
* This function should return an int - currently the possible return values
|
||||
* are 0, which means 'stop running the engine' or non-zero, which means
|
||||
* 'continue matching'.
|
||||
*/
|
||||
typedef int (*NfaCallback)(u64a offset, ReportID id, void *context);
|
||||
|
||||
/** \brief The type for an NFA callback which also tracks start of match.
|
||||
*
|
||||
* see \ref NfaCallback
|
||||
*/
|
||||
typedef int (*SomNfaCallback)(u64a from_offset, u64a to_offset, ReportID id,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* standard \ref NfaCallback return value indicating that engine execution
|
||||
* should continue. (any non-zero value will serve this purpose)
|
||||
*/
|
||||
#define MO_CONTINUE_MATCHING 1
|
||||
|
||||
/**
|
||||
* \ref NfaCallback return value indicating that engine execution should halt.
|
||||
*/
|
||||
#define MO_HALT_MATCHING 0
|
||||
|
||||
#endif // NFA_CALLBACK_H
|
||||
1016
src/nfa/castle.c
Normal file
1016
src/nfa/castle.c
Normal file
File diff suppressed because it is too large
Load Diff
64
src/nfa/castle.h
Normal file
64
src/nfa/castle.h
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef NFA_CASTLE_H
|
||||
#define NFA_CASTLE_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
struct mq;
|
||||
struct NFA;
|
||||
|
||||
char nfaExecCastle0_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecCastle0_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
|
||||
struct mq *q);
|
||||
char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
char nfaExecCastle0_queueCompressState(const struct NFA *nfa,
|
||||
const struct mq *q, s64a loc);
|
||||
char nfaExecCastle0_expandState(const struct NFA *nfa, void *dest,
|
||||
const void *src, u64a offset, u8 key);
|
||||
|
||||
#define nfaExecCastle0_testEOD NFA_API_NO_IMPL
|
||||
#define nfaExecCastle0_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecCastle0_zombie_status NFA_API_NO_IMPL
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif
|
||||
116
src/nfa/castle_dump.cpp
Normal file
116
src/nfa/castle_dump.cpp
Normal file
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Castle: multi-tenant repeat engine, dump code.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "castle_dump.h"
|
||||
|
||||
#include "castle_internal.h"
|
||||
#include "nfa_dump_internal.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "shufticompile.h"
|
||||
#include "trufflecompile.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/dump_charclass.h"
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecCastle0_dumpDot(const struct NFA *, FILE *) {
|
||||
// No GraphViz output for Castles.
|
||||
}
|
||||
|
||||
static
|
||||
void dumpTextSubCastle(const SubCastle &sub, FILE *f) {
|
||||
const RepeatInfo *info =
|
||||
(const RepeatInfo *)((const char *)&sub + sub.repeatInfoOffset);
|
||||
fprintf(f, " repeat model: %s\n", repeatTypeName(info->type));
|
||||
fprintf(f, " repeat bounds: {%u, %u}\n", info->repeatMin,
|
||||
info->repeatMax);
|
||||
fprintf(f, " min period: %u\n", info->minPeriod);
|
||||
|
||||
fprintf(f, " report: %u\n", sub.report);
|
||||
fprintf(f, " full state offset: %u\n", sub.fullStateOffset);
|
||||
fprintf(f, " stream state offset: %u\n", sub.streamStateOffset);
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
void nfaExecCastle0_dumpText(const struct NFA *nfa, FILE *f) {
|
||||
const Castle *c = (const Castle *)getImplNfa(nfa);
|
||||
|
||||
fprintf(f, "Castle multi-tenant repeat engine\n");
|
||||
fprintf(f, "\n");
|
||||
fprintf(f, "Number of repeat tenants: %u\n", c->numRepeats);
|
||||
fprintf(f, "Scan type: ");
|
||||
switch (c->type) {
|
||||
case CASTLE_DOT:
|
||||
fprintf(f, "dot\n");
|
||||
break;
|
||||
case CASTLE_VERM:
|
||||
fprintf(f, "verm, scanning for 0x%02x\n", c->u.verm.c);
|
||||
break;
|
||||
case CASTLE_NVERM:
|
||||
fprintf(f, "negated verm, scanning for 0x%02x\n", c->u.verm.c);
|
||||
break;
|
||||
case CASTLE_SHUFTI: {
|
||||
const CharReach cr = shufti2cr(c->u.shuf.mask_lo, c->u.shuf.mask_hi);
|
||||
fprintf(f, "shufti, scanning for %s (%zu chars)\n",
|
||||
describeClass(cr).c_str(), cr.count());
|
||||
break;
|
||||
}
|
||||
case CASTLE_TRUFFLE: {
|
||||
const CharReach cr = truffle2cr(c->u.truffle.mask1, c->u.truffle.mask2);
|
||||
fprintf(f, "truffle, scanning for %s (%zu chars)\n",
|
||||
describeClass(cr).c_str(), cr.count());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fprintf(f, "unknown type %u\n", c->type);
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(f, "\n");
|
||||
dumpTextReverse(nfa, f);
|
||||
fprintf(f, "\n");
|
||||
|
||||
const SubCastle *sub =
|
||||
(const SubCastle *)((const char *)c + sizeof(Castle));
|
||||
for (u32 i = 0; i < c->numRepeats; i++) {
|
||||
fprintf(f, "Sub %u:\n", i);
|
||||
dumpTextSubCastle(sub[i], f);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
47
src/nfa/castle_dump.h
Normal file
47
src/nfa/castle_dump.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CASTLE_DUMP_H
|
||||
#define CASTLE_DUMP_H
|
||||
|
||||
#if defined(DUMP_SUPPORT)
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
void nfaExecCastle0_dumpDot(const NFA *nfa, FILE *file);
|
||||
void nfaExecCastle0_dumpText(const NFA *nfa, FILE *file);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // DUMP_SUPPORT
|
||||
|
||||
#endif
|
||||
101
src/nfa/castle_internal.h
Normal file
101
src/nfa/castle_internal.h
Normal file
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Castle: multi-tenant repeat engine, data structures.
|
||||
*/
|
||||
|
||||
#ifndef NFA_CASTLE_INTERNAL_H
|
||||
#define NFA_CASTLE_INTERNAL_H
|
||||
|
||||
#include "ue2common.h"
|
||||
#include "repeat_internal.h"
|
||||
|
||||
struct SubCastle {
|
||||
ReportID report; //!< report to raise on match
|
||||
u32 fullStateOffset; //!< offset within full state (scratch)
|
||||
u32 streamStateOffset; //!< offset within stream state
|
||||
u32 repeatInfoOffset; //!< offset of RepeatInfo structure
|
||||
// relative to the start of SubCastle
|
||||
char exclusive; //!< exclusive info of this SubCastle
|
||||
};
|
||||
|
||||
#define CASTLE_DOT 0
|
||||
#define CASTLE_VERM 1
|
||||
#define CASTLE_NVERM 2
|
||||
#define CASTLE_SHUFTI 3
|
||||
#define CASTLE_TRUFFLE 4
|
||||
|
||||
/**
|
||||
* \brief Castle engine structure.
|
||||
*
|
||||
* A Castle is a collection of repeats that all share the same character
|
||||
* reachability.
|
||||
*
|
||||
* The whole engine is laid out in memory as:
|
||||
*
|
||||
* - struct NFA
|
||||
* - struct Castle
|
||||
* - struct SubCastle[numRepeats]
|
||||
* - tables for sparse model repeats
|
||||
*
|
||||
* Castle stores an "active repeats" multibit in stream state, followed by the
|
||||
* packed repeat state for each SubCastle. If all SubCastles are mutual
|
||||
* exclusive, we store current active SubCastle id instead of "active repeats"
|
||||
* multibit in stream state. If there are both exclusive and non-exclusive
|
||||
* SubCastle groups, we use an active id for the exclusive group and a multibit
|
||||
* for the non-exclusive group.
|
||||
*
|
||||
* In full state (stored in scratch space) it stores a temporary multibit over
|
||||
* the repeats (used by \ref castleMatchLoop), followed by the repeat control
|
||||
* blocks for each SubCastle. If all SubCastles are mutual exclusive, we only
|
||||
* need to store the repeat control blocks for each SubCastle.
|
||||
*/
|
||||
struct ALIGN_AVX_DIRECTIVE Castle {
|
||||
u32 numRepeats;
|
||||
u8 type; //!< tells us which scanning mechanism (below) to use
|
||||
char exclusive; //!< tells us if there are mutual exclusive SubCastles
|
||||
char pureExclusive; //!< tells us if all SubCastles are mutual exclusive
|
||||
u8 activeIdxSize; //!< number of bytes in stream state to store
|
||||
// active SubCastle id for exclusive mode
|
||||
union {
|
||||
struct {
|
||||
char c;
|
||||
} verm;
|
||||
struct {
|
||||
m128 mask_lo;
|
||||
m128 mask_hi;
|
||||
} shuf;
|
||||
struct {
|
||||
m128 mask1;
|
||||
m128 mask2;
|
||||
} truffle;
|
||||
} u;
|
||||
};
|
||||
|
||||
#endif // NFA_CASTLE_INTERNAL_H
|
||||
1029
src/nfa/castlecompile.cpp
Normal file
1029
src/nfa/castlecompile.cpp
Normal file
File diff suppressed because it is too large
Load Diff
146
src/nfa/castlecompile.h
Normal file
146
src/nfa/castlecompile.h
Normal file
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Castle: multi-tenant repeat engine, compiler code.
|
||||
*/
|
||||
|
||||
#ifndef NFA_CASTLECOMPILE_H
|
||||
#define NFA_CASTLECOMPILE_H
|
||||
|
||||
#include "nfa_kind.h"
|
||||
#include "ue2common.h"
|
||||
#include "nfagraph/ng_repeat.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/depth.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
struct NFA;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
class CharReach;
|
||||
class NGHolder;
|
||||
struct CompileContext;
|
||||
|
||||
/**
|
||||
* \brief Prototype for a Castle engine: contains at least one CastleRepeat.
|
||||
*
|
||||
* Currently, all repeats in a Castle must have the same character
|
||||
* reachability.
|
||||
*
|
||||
* A CastleProto is converted into a single NFA, with each top triggering a
|
||||
* unique repeat. A CastleProto can contain at most CastleProto::max_occupancy
|
||||
* elements.
|
||||
*/
|
||||
struct CastleProto {
|
||||
static constexpr size_t max_occupancy = 65536; // arbitrary limit
|
||||
explicit CastleProto(const PureRepeat &pr);
|
||||
const CharReach &reach() const;
|
||||
|
||||
u32 add(const PureRepeat &pr);
|
||||
|
||||
/**
|
||||
* \brief Merge in the given repeat, returning the top used.
|
||||
*
|
||||
* If the repeat already exists in this castle, we will re-use (and return)
|
||||
* the old top. If it doesn't, it will be added and assigned a new top.
|
||||
* Returns \ref max_occupancy if capacity would be exceeded.
|
||||
*/
|
||||
u32 merge(const PureRepeat &pr);
|
||||
|
||||
/** \brief Mapping from unique top id to repeat. */
|
||||
std::map<u32, PureRepeat> repeats;
|
||||
};
|
||||
|
||||
std::set<ReportID> all_reports(const CastleProto &proto);
|
||||
depth findMinWidth(const CastleProto &proto);
|
||||
depth findMaxWidth(const CastleProto &proto);
|
||||
|
||||
/**
|
||||
* \brief Remap tops to be contiguous.
|
||||
*
|
||||
* Remap the tops in the given CastleProto so that they're contiguous in the
|
||||
* range [0 .. N-1].
|
||||
*/
|
||||
void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map);
|
||||
|
||||
/**
|
||||
* \brief Construct an NFA from a CastleProto.
|
||||
*
|
||||
* NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run
|
||||
* first.
|
||||
*/
|
||||
ue2::aligned_unique_ptr<NFA>
|
||||
buildCastle(const CastleProto &proto,
|
||||
const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
|
||||
const CompileContext &cc);
|
||||
|
||||
/**
|
||||
* \brief Merge two CastleProto prototypes together, if possible.
|
||||
*
|
||||
* Returns true if merge of all repeats in c2 into c1 succeeds, and fills
|
||||
* mapping with the repeat indices.
|
||||
*/
|
||||
bool mergeCastle(CastleProto &c1, const CastleProto &c2,
|
||||
std::map<u32, u32> &top_map);
|
||||
|
||||
/**
|
||||
* \brief True if the two castles are identical with respect to the reports
|
||||
* given; i.e. the same tops lead to the same repeats, just with report1 in c1
|
||||
* and report2 in c2.
|
||||
*
|
||||
* Repeats leading to other reports are ignored.
|
||||
*/
|
||||
bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
|
||||
ReportID report2);
|
||||
|
||||
/**
|
||||
* \brief True if the two castles given are identical.
|
||||
*/
|
||||
bool is_equal(const CastleProto &c1, const CastleProto &c2);
|
||||
|
||||
/**
|
||||
* \brief True if the given castle contains more than a single instance of any
|
||||
* of the reports in the given set.
|
||||
*/
|
||||
bool requiresDedupe(const CastleProto &proto, const std::set<ReportID> &reports);
|
||||
|
||||
/**
|
||||
* \brief Build an NGHolder from a CastleProto.
|
||||
*/
|
||||
std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle, nfa_kind kind,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif // NFA_CASTLECOMPILE_H
|
||||
351
src/nfa/dfa_min.cpp
Normal file
351
src/nfa/dfa_min.cpp
Normal file
@@ -0,0 +1,351 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Build code for DFA minimization
|
||||
*/
|
||||
|
||||
/**
|
||||
* /Summary of the Hopcrofts algorithm/
|
||||
* partition := {F, Q \ F};
|
||||
* work_queue := {F};
|
||||
* while (work_queue is not empty) do
|
||||
* choose and remove a set A from work_queue
|
||||
* for each c in . do
|
||||
* let X be the set of states for which a transition on c
|
||||
* leads to a state in A
|
||||
* for each set Y in partition for which X . Y is nonempty and
|
||||
* Y \ X is nonempty do
|
||||
* replace Y in partition by the two sets X . Y and Y \ X
|
||||
* if Y is in work_queue
|
||||
* replace Y in work_queue by the same two sets
|
||||
* else
|
||||
* if |X . Y| <= |Y \ X|
|
||||
* add X . Y to work_queue
|
||||
* else
|
||||
* add Y \ X to work_queue
|
||||
* end;
|
||||
* end;
|
||||
* end;
|
||||
*/
|
||||
|
||||
#include "dfa_min.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "nfa/rdfa.h"
|
||||
#include "nfagraph/ng_mcclellan.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/partitioned_set.h"
|
||||
#include "util/container.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <iterator>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
namespace {
|
||||
|
||||
struct hopcroft_state_info {
|
||||
vector<vector<dstate_id_t> > prev;
|
||||
};
|
||||
|
||||
struct DFA_components : boost::noncopyable {
|
||||
dstate_id_t nstates;
|
||||
size_t inp_size;
|
||||
set<size_t> work_queue;
|
||||
/*Partition contains reduced states*/
|
||||
partitioned_set<dstate_id_t> partition;
|
||||
vector<hopcroft_state_info> states;
|
||||
|
||||
explicit DFA_components(const raw_dfa &rdfa);
|
||||
};
|
||||
|
||||
} //namespace
|
||||
|
||||
/**
|
||||
* create_map:
|
||||
* Creates an initial partitioning and work_queue.
|
||||
* Initial partition contains {accepting states..., Non-accepting states}
|
||||
* Initial work_queue contains accepting state subsets
|
||||
*
|
||||
* The initial partitioning needs to distinguish between the different
|
||||
* reporting behaviours (unlike standard hopcroft) --> more than one subset
|
||||
* possible for the accepting states.
|
||||
*
|
||||
* Look for accepting states in both reports and reports_eod.
|
||||
* Creates a map with a key(reports, reports_eod) and an id.
|
||||
* Reports of each state are searched against the map and
|
||||
* added to the corresponding id -> partition[id] and work_queue[id].
|
||||
* Non Accept states are added to partition[id+1].
|
||||
*/
|
||||
static
|
||||
vector<size_t> create_map(const raw_dfa &rdfa, set<size_t> &work_queue) {
|
||||
using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>;
|
||||
map<ReportKey, size_t> subset_map;
|
||||
vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET);
|
||||
|
||||
for (size_t i = 0; i < rdfa.states.size(); i++) {
|
||||
if (!rdfa.states[i].reports.empty() ||
|
||||
!rdfa.states[i].reports_eod.empty()) {
|
||||
ReportKey key(rdfa.states[i].reports, rdfa.states[i].reports_eod);
|
||||
if (contains(subset_map, key)) {
|
||||
state_to_subset[i] = subset_map[key];
|
||||
} else {
|
||||
size_t sub = subset_map.size();
|
||||
subset_map[key] = sub;
|
||||
state_to_subset[i] = sub;
|
||||
work_queue.insert(sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* handle non accepts */
|
||||
size_t non_accept_sub = subset_map.size();
|
||||
for (size_t i = 0; i < state_to_subset.size(); i++) {
|
||||
if (state_to_subset[i] == INVALID_SUBSET) {
|
||||
state_to_subset[i] = non_accept_sub;
|
||||
}
|
||||
}
|
||||
|
||||
return state_to_subset;
|
||||
}
|
||||
|
||||
DFA_components::DFA_components(const raw_dfa &rdfa)
|
||||
: nstates(rdfa.states.size()),
|
||||
inp_size(rdfa.states[nstates - 1].next.size()),
|
||||
partition(create_map(rdfa, work_queue)) {
|
||||
/* initializing states */
|
||||
for (size_t i = 0; i < nstates; i++) {
|
||||
states.push_back(hopcroft_state_info());
|
||||
states.back().prev.resize(inp_size);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nstates; i++) { // i is the previous state
|
||||
for (size_t j = 0; j < inp_size; j++) {
|
||||
/* Creating X_table */
|
||||
dstate_id_t present_state = rdfa.states[i].next[j];
|
||||
states[present_state].prev[j].push_back(i);
|
||||
|
||||
DEBUG_PRINTF("rdfa.states[%zu].next[%zu] %hu \n", i, j,
|
||||
rdfa.states[i].next[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* choose and remove a set A from work_queue.
|
||||
*/
|
||||
static
|
||||
void get_work_item(DFA_components &mdfa, ue2::flat_set<dstate_id_t> &A) {
|
||||
A.clear();
|
||||
assert(!mdfa.work_queue.empty());
|
||||
set<size_t>::iterator pt = mdfa.work_queue.begin();
|
||||
insert(&A, mdfa.partition[*pt]);
|
||||
mdfa.work_queue.erase(pt);
|
||||
}
|
||||
|
||||
/**
|
||||
* X is the set of states for which a transition on the input leads to a state
|
||||
* in A.
|
||||
*/
|
||||
static
|
||||
void create_X(const DFA_components &mdfa, const ue2::flat_set<dstate_id_t> &A,
|
||||
size_t inp, ue2::flat_set<dstate_id_t> &X) {
|
||||
X.clear();
|
||||
|
||||
for (dstate_id_t id : A) {
|
||||
insert(&X, mdfa.states[id].prev[inp]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For a split set X, each subset S (given by part_index) in the partition, two
|
||||
* sets are created: v_inter (X intersection S) and v_sub (S - X).
|
||||
*
|
||||
* For each subset S in the partition that could be split (v_inter is nonempty
|
||||
* and v_sub is nonempty):
|
||||
* - replace S in partition by the two sets v_inter and v_sub.
|
||||
* - if S is in work_queue:
|
||||
* - replace S in work_queue by the two subsets.
|
||||
* - else:
|
||||
* - replace S in work_queue by the smaller of the two sets.
|
||||
*/
|
||||
static
|
||||
void split_and_replace_set(const size_t part_index, DFA_components &mdfa,
|
||||
const ue2::flat_set<dstate_id_t> &splitter) {
|
||||
/* singleton sets cannot be split */
|
||||
if (mdfa.partition[part_index].size() == 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t small_index = mdfa.partition.split(part_index, splitter);
|
||||
|
||||
if (small_index == INVALID_SUBSET) {
|
||||
/* the set could not be split */
|
||||
return;
|
||||
}
|
||||
|
||||
/* larger subset remains at the input subset index, if the input subset was
|
||||
* already in the work queue then the larger subset will remain there. */
|
||||
|
||||
mdfa.work_queue.insert(small_index);
|
||||
}
|
||||
|
||||
/**
|
||||
* The complete Hopcrofts algorithm is implemented in this function.
|
||||
* Choose and remove a set tray from work_queue
|
||||
* For each input- X is created.
|
||||
* For each subset in the partition, split_and_replace_sets are called with the
|
||||
* split set.
|
||||
*/
|
||||
static
|
||||
void dfa_min(DFA_components &mdfa) {
|
||||
ue2::flat_set<dstate_id_t> A, X;
|
||||
vector<size_t> cand_subsets;
|
||||
|
||||
while (!mdfa.work_queue.empty()) {
|
||||
get_work_item(mdfa, A);
|
||||
|
||||
for (size_t inp = 0; inp < mdfa.inp_size; inp++) {
|
||||
create_X(mdfa, A, inp, X);
|
||||
if (X.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we only need to consider subsets with at least one member in X for
|
||||
* splitting */
|
||||
cand_subsets.clear();
|
||||
mdfa.partition.find_overlapping(X, &cand_subsets);
|
||||
|
||||
for (size_t sub : cand_subsets) {
|
||||
split_and_replace_set(sub, mdfa, X);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creating new dfa table
|
||||
* Map ordering contains key being an equivalence classes first state
|
||||
* and the value being the equivalence class index.
|
||||
* Eq_state[i] tells us new state id the equivalence class located at
|
||||
* partition[i].
|
||||
*/
|
||||
static
|
||||
void mapping_new_states(const DFA_components &mdfa,
|
||||
vector<dstate_id_t> &old_to_new,
|
||||
raw_dfa &rdfa) {
|
||||
const size_t num_partitions = mdfa.partition.size();
|
||||
|
||||
// Mapping from equiv class's first state to equiv class index.
|
||||
map<dstate_id_t, size_t> ordering;
|
||||
|
||||
// New state id for each equiv class.
|
||||
vector<dstate_id_t> eq_state(num_partitions);
|
||||
|
||||
for (size_t i = 0; i < num_partitions; i++) {
|
||||
ordering[*mdfa.partition[i].begin()] = i;
|
||||
}
|
||||
|
||||
dstate_id_t new_id = 0;
|
||||
for (const auto &m : ordering) {
|
||||
eq_state[m.second] = new_id++;
|
||||
}
|
||||
|
||||
for (size_t t = 0; t < mdfa.partition.size(); t++) {
|
||||
for (dstate_id_t id : mdfa.partition[t]) {
|
||||
old_to_new[id] = eq_state[t];
|
||||
}
|
||||
}
|
||||
|
||||
vector<dstate> new_states;
|
||||
new_states.reserve(num_partitions);
|
||||
for (size_t i = 0; i < mdfa.nstates; i++) {
|
||||
if (contains(ordering, i)) {
|
||||
new_states.push_back(rdfa.states[i]);
|
||||
}
|
||||
}
|
||||
rdfa.states.swap(new_states);
|
||||
}
|
||||
|
||||
static
|
||||
void renumber_new_states(const DFA_components &mdfa,
|
||||
const vector<dstate_id_t> &old_to_new,
|
||||
raw_dfa &rdfa) {
|
||||
for (size_t i = 0; i < mdfa.partition.size(); i++) {
|
||||
for (size_t j = 0; j < mdfa.inp_size; j++) {
|
||||
dstate_id_t output = rdfa.states[i].next[j];
|
||||
rdfa.states[i].next[j] = old_to_new[output];
|
||||
}
|
||||
dstate_id_t dad = rdfa.states[i].daddy;
|
||||
rdfa.states[i].daddy = old_to_new[dad];
|
||||
}
|
||||
|
||||
rdfa.start_floating = old_to_new[rdfa.start_floating];
|
||||
rdfa.start_anchored = old_to_new[rdfa.start_anchored];
|
||||
}
|
||||
|
||||
static
|
||||
void new_dfa(raw_dfa &rdfa, const DFA_components &mdfa) {
|
||||
if (mdfa.partition.size() != mdfa.nstates) {
|
||||
vector<dstate_id_t> old_to_new(mdfa.nstates);
|
||||
mapping_new_states(mdfa, old_to_new, rdfa);
|
||||
renumber_new_states(mdfa, old_to_new, rdfa);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* MAIN FUNCTION
|
||||
*/
|
||||
void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) {
|
||||
if (!grey.minimizeDFA) {
|
||||
return;
|
||||
}
|
||||
|
||||
UNUSED const size_t states_before = rdfa.states.size();
|
||||
|
||||
DFA_components mdfa(rdfa);
|
||||
|
||||
dfa_min(mdfa);
|
||||
new_dfa(rdfa, mdfa);
|
||||
|
||||
DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before,
|
||||
rdfa.states.size());
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
45
src/nfa/dfa_min.h
Normal file
45
src/nfa/dfa_min.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Build code for McClellan DFA.
|
||||
*/
|
||||
|
||||
#ifndef DFA_MIN_H
|
||||
#define DFA_MIN_H
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct raw_dfa;
|
||||
struct Grey;
|
||||
|
||||
void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
1153
src/nfa/gough.c
Normal file
1153
src/nfa/gough.c
Normal file
File diff suppressed because it is too large
Load Diff
82
src/nfa/gough.h
Normal file
82
src/nfa/gough.h
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GOUGH_H
|
||||
#define GOUGH_H
|
||||
|
||||
#include "callback.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
struct NFA;
|
||||
struct mq;
|
||||
|
||||
// 8-bit Gough
|
||||
|
||||
char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, SomNfaCallback som_cb,
|
||||
void *context);
|
||||
char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||
char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q,
|
||||
s64a loc);
|
||||
char nfaExecGough8_expandState(const struct NFA *nfa, void *dest,
|
||||
const void *src, u64a offset, u8 key);
|
||||
|
||||
#define nfaExecGough8_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecGough8_zombie_status NFA_API_NO_IMPL
|
||||
|
||||
// 16-bit Gough
|
||||
|
||||
char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state,
|
||||
const char *streamState, u64a offset,
|
||||
NfaCallback callback, SomNfaCallback som_cb,
|
||||
void *context);
|
||||
char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
|
||||
char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
|
||||
char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
|
||||
char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
|
||||
char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
|
||||
void *state, u8 key);
|
||||
char nfaExecGough16_queueCompressState(const struct NFA *nfa,
|
||||
const struct mq *q, s64a loc);
|
||||
char nfaExecGough16_expandState(const struct NFA *nfa, void *dest,
|
||||
const void *src, u64a offset, u8 key);
|
||||
|
||||
#define nfaExecGough16_B_Reverse NFA_API_NO_IMPL
|
||||
#define nfaExecGough16_zombie_status NFA_API_NO_IMPL
|
||||
|
||||
#endif
|
||||
134
src/nfa/gough_internal.h
Normal file
134
src/nfa/gough_internal.h
Normal file
@@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GOUGH_INTERNAL_H
|
||||
#define GOUGH_INTERNAL_H
|
||||
|
||||
#include "accel.h"
|
||||
#include "mcclellan_internal.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#define INVALID_SLOT (~0U)
|
||||
|
||||
#define GOUGH_INS_END 0
|
||||
#define GOUGH_INS_MOV 1
|
||||
#define GOUGH_INS_NEW 2
|
||||
#define GOUGH_INS_MIN 3
|
||||
/* todo: add instructions targeting acc reg? */
|
||||
|
||||
struct gough_ins {
|
||||
u32 op; /* u32 to avoid padding */
|
||||
u32 dest;
|
||||
u32 src; /* for GOUGH_INS_NEW, this specifies the adjustment to apply to the
|
||||
* current offset */
|
||||
};
|
||||
|
||||
/*
|
||||
* HAPPY FUN ASCII ART TIME
|
||||
*
|
||||
* ----
|
||||
* | | struct NFA
|
||||
* ----
|
||||
* ~~~~ normal(ish) mcclellan engine
|
||||
* ~~~~
|
||||
* ~~~~
|
||||
* ~~~~
|
||||
* ~~~~
|
||||
* ~~~~
|
||||
* ~~~~
|
||||
* ~~~~
|
||||
* ---- = m->haig_offset
|
||||
* | | } struct gough_info
|
||||
* ----
|
||||
* | | }
|
||||
* | | } edge prog table -> provides the offset of the start of the program
|
||||
* | | } to run when the edge is taken. 0 indicates no
|
||||
* | | } work to do
|
||||
* ---- = h->top_prog_offset
|
||||
* | | }
|
||||
* | | } top prog table -> provides the offset of the start of the program
|
||||
* | | } to run when a top is taken from this state. 0
|
||||
* | | } indicates nothing to do
|
||||
* ---- = h->prog_base_offset
|
||||
* | | }
|
||||
* | | } programs to run
|
||||
* | | }
|
||||
* | | }
|
||||
* ----
|
||||
*/
|
||||
|
||||
struct gough_info {
|
||||
u32 top_prog_offset; /**< offset to the base of the top prog table */
|
||||
u32 prog_base_offset; /**< not used at runtime */
|
||||
u32 stream_som_loc_count; /**< number of som locs in the stream state */
|
||||
u8 stream_som_loc_width; /**< number of bytes per som loc */
|
||||
};
|
||||
|
||||
static really_inline
|
||||
const struct gough_info *get_gough(const struct mcclellan *m) {
|
||||
assert(m->haig_offset);
|
||||
const char *n = (const char *)m - sizeof(struct NFA);
|
||||
return (const struct gough_info *)(n + m->haig_offset);
|
||||
}
|
||||
|
||||
static really_inline
|
||||
const u32 *get_gough_top_offsets(const struct mcclellan *m) {
|
||||
const struct gough_info *g = get_gough(m);
|
||||
if (!g->top_prog_offset) {
|
||||
return NULL;
|
||||
}
|
||||
const char *n = (const char *)m - sizeof(struct NFA);
|
||||
return (const u32 *)(n + g->top_prog_offset);
|
||||
}
|
||||
|
||||
/* Gough state representation in scratch.
|
||||
*
|
||||
* During execution, gough tracks a number of variables containing potential
|
||||
* starts of match. These are all stored in a large array of u64a slots.
|
||||
*/
|
||||
struct gough_som_info {
|
||||
u64a slots[1]; /* 'flexible' member array */
|
||||
};
|
||||
|
||||
struct gough_report {
|
||||
ReportID r;
|
||||
u32 som; /* som slot to report */
|
||||
};
|
||||
|
||||
struct gough_report_list {
|
||||
u32 count;
|
||||
struct gough_report report[];
|
||||
};
|
||||
|
||||
struct gough_accel {
|
||||
union AccelAux accel;
|
||||
u8 margin_dist;
|
||||
u32 prog_offset;
|
||||
};
|
||||
|
||||
#endif
|
||||
1320
src/nfa/goughcompile.cpp
Normal file
1320
src/nfa/goughcompile.cpp
Normal file
File diff suppressed because it is too large
Load Diff
93
src/nfa/goughcompile.h
Normal file
93
src/nfa/goughcompile.h
Normal file
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GOUGHCOMPILE_H
|
||||
#define GOUGHCOMPILE_H
|
||||
|
||||
#include "mcclellancompile.h"
|
||||
#include "nfa_kind.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/alloc.h"
|
||||
#include "util/ue2_containers.h"
|
||||
#include "util/order_check.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
#define CREATE_NEW_SOM (~0U)
|
||||
|
||||
/* dest nfa state -> som info for dest state is min of provided loc idx som
|
||||
* info */
|
||||
typedef flat_map<u32, std::vector<u32>> som_tran_info;
|
||||
|
||||
struct som_report {
|
||||
som_report(ReportID r, u32 s) : report(r), slot(s) {}
|
||||
|
||||
ReportID report;
|
||||
u32 slot;
|
||||
|
||||
bool operator<(const som_report &b) const {
|
||||
const som_report &a = *this;
|
||||
ORDER_CHECK(report);
|
||||
ORDER_CHECK(slot);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct dstate_som {
|
||||
std::set<som_report> reports;
|
||||
std::set<som_report> reports_eod;
|
||||
som_tran_info preds; /* live nfa states mapped back to pred states */
|
||||
};
|
||||
|
||||
struct raw_som_dfa : public raw_dfa {
|
||||
raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in)
|
||||
: raw_dfa(k), unordered_som_triggers(unordered_som_triggers_in) {
|
||||
assert(!unordered_som_triggers || is_triggered(kind));
|
||||
}
|
||||
|
||||
std::vector<dstate_som> state_som;
|
||||
u32 stream_som_loc_width;
|
||||
bool unordered_som_triggers;
|
||||
void stripExtraEodReports(void) override;
|
||||
|
||||
std::map<u32, u32> new_som_nfa_states; /* map nfa vertex id -> offset */
|
||||
u32 trigger_nfa_state; /* for triggered cases, slot_id that contains a new
|
||||
* som */
|
||||
};
|
||||
|
||||
aligned_unique_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision,
|
||||
const CompileContext &cc);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
281
src/nfa/goughcompile_accel.cpp
Normal file
281
src/nfa/goughcompile_accel.cpp
Normal file
@@ -0,0 +1,281 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "goughcompile_internal.h"
|
||||
#include "gough_internal.h"
|
||||
#include "grey.h"
|
||||
#include "mcclellancompile.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
template<typename Graph>
|
||||
void add_edge_if_not_selfloop(const typename Graph::vertex_descriptor &u,
|
||||
const typename Graph::vertex_descriptor &v,
|
||||
Graph &g) {
|
||||
if (u != v) {
|
||||
add_edge(u, v, g);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool can_accel_over_selfloop(const GoughVertexProps &vp, const GoughEdge &e,
|
||||
const GoughEdgeProps &ep, u32 *margin) {
|
||||
if (vp.vars.empty() && ep.vars.empty()) {
|
||||
/* if we update no som information, then it is trivial to accelerate */
|
||||
*margin = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* if the effect of running a self loop stabilises after a small number of
|
||||
* iterations, it is possible to accelerate over the state and only then run
|
||||
* the block N times. To model this we create a graph which shows how the
|
||||
* value for a variable at the end of a self loop block is related to values
|
||||
* at the start */
|
||||
|
||||
typedef boost::adjacency_list<boost::vecS, boost::vecS,
|
||||
boost::bidirectionalS> basic_graph;
|
||||
typedef basic_graph::vertex_descriptor basic_vertex;
|
||||
basic_graph bg;
|
||||
|
||||
map<const GoughSSAVar *, basic_vertex> verts;
|
||||
|
||||
/* create verts */
|
||||
for (const auto &var : ep.vars) {
|
||||
verts[var.get()] = add_vertex(bg);
|
||||
}
|
||||
|
||||
for (const auto &var : vp.vars) {
|
||||
verts[var.get()] = add_vertex(bg);
|
||||
}
|
||||
|
||||
/* wire edges */
|
||||
set<basic_vertex> done;
|
||||
for (const auto &var : ep.vars) {
|
||||
assert(contains(verts, var.get()));
|
||||
basic_vertex v = verts[var.get()];
|
||||
for (GoughSSAVar *pred : var->get_inputs()) {
|
||||
if (!contains(verts, pred)) {
|
||||
continue;
|
||||
}
|
||||
basic_vertex u = verts[pred];
|
||||
if (contains(done, u)) { /* u has already taken on new values this
|
||||
* iteration */
|
||||
for (auto p : inv_adjacent_vertices_range(u, bg)) {
|
||||
add_edge_if_not_selfloop(p, v, bg);
|
||||
}
|
||||
} else {
|
||||
add_edge_if_not_selfloop(u, v, bg);
|
||||
}
|
||||
}
|
||||
done.insert(v);
|
||||
}
|
||||
|
||||
for (const auto &var : vp.vars) {
|
||||
GoughSSAVar *pred = var->get_input(e);
|
||||
assert(contains(verts, var.get()));
|
||||
basic_vertex v = verts[var.get()];
|
||||
if (!contains(verts, pred)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
basic_vertex u = verts[pred];
|
||||
if (contains(done, u)) { /* u has already taken on new values this
|
||||
* iteration */
|
||||
for (auto p : inv_adjacent_vertices_range(u, bg)) {
|
||||
add_edge_if_not_selfloop(p, v, bg);
|
||||
}
|
||||
} else {
|
||||
add_edge_if_not_selfloop(u, v, bg);
|
||||
}
|
||||
/* do not add v to done as all joins happen in parallel */
|
||||
}
|
||||
|
||||
/* check for loops - non self loops may prevent settling */
|
||||
|
||||
if (!is_dag(bg)) {
|
||||
DEBUG_PRINTF("can not %u accel as large loops\n", vp.state_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
*margin = num_vertices(bg); /* TODO: be less conservative */
|
||||
|
||||
if (*margin > 50) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool verify_neighbour(const GoughGraph &g, GoughVertex u,
|
||||
const map<gough_edge_id, vector<gough_ins> > &blocks,
|
||||
const set<GoughVertex> &succs,
|
||||
const vector<gough_ins> &block_sl) {
|
||||
for (const auto &e : out_edges_range(u, g)) {
|
||||
if (!g[e].reach.any()) { /* ignore top edges */
|
||||
continue;
|
||||
}
|
||||
|
||||
GoughVertex t = target(e, g);
|
||||
if (!contains(succs, t)) { /* must be an escape string */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!contains(blocks, gough_edge_id(g, e))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (blocks.at(gough_edge_id(g, e)) != block_sl) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
bool verify_neighbour_no_block(const GoughGraph &g, GoughVertex u,
|
||||
const map<gough_edge_id, vector<gough_ins> > &blocks,
|
||||
const set<GoughVertex> &succs) {
|
||||
for (const auto &e : out_edges_range(u, g)) {
|
||||
if (!g[e].reach.any()) { /* ignore top edges */
|
||||
continue;
|
||||
}
|
||||
|
||||
GoughVertex t = target(e, g);
|
||||
if (!contains(succs, t)) { /* must be an escape string */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (contains(blocks, gough_edge_id(g, e))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Checks the som aspects of allowing two byte accel - it is expected that the
|
||||
* mcclellan logic will identify escape strings.
|
||||
*
|
||||
* For 2 byte acceleration to be correct we require that any non-escape sequence
|
||||
* characters xy from the accel state has the same effect as just the character
|
||||
* of y.
|
||||
*
|
||||
* The current way of ensuring this is to require:
|
||||
* (a) all edges out of the cyclic state behave identically to the cyclic self
|
||||
* loop edge
|
||||
* (b) edges out of the neighbouring state which do not correspond to escape
|
||||
* string behave identical to the cyclic state edges.
|
||||
*
|
||||
* TODO: these restrictions could be relaxed by looking at the effect on
|
||||
* relevant (live?) vars only, allowing additions to the escape string set, and
|
||||
* considering one byte escapes.
|
||||
*/
|
||||
static
|
||||
bool allow_two_byte_accel(const GoughGraph &g,
|
||||
const map<gough_edge_id, vector<gough_ins> > &blocks,
|
||||
GoughVertex v, const GoughEdge &self_loop) {
|
||||
if (contains(blocks, gough_edge_id(g, self_loop))) {
|
||||
DEBUG_PRINTF("edge plan on self loop\n");
|
||||
const auto &block_sl = blocks.at(gough_edge_id(g, self_loop));
|
||||
|
||||
set<GoughVertex> succs;
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
if (g[e].reach.none()) { /* ignore top edges */
|
||||
continue;
|
||||
}
|
||||
|
||||
gough_edge_id ged(g, e);
|
||||
if (!contains(blocks, ged) || blocks.at(ged) != block_sl) {
|
||||
DEBUG_PRINTF("different out-edge behaviour\n");
|
||||
return false;
|
||||
}
|
||||
succs.insert(target(e, g));
|
||||
}
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w != v && !verify_neighbour(g, w, blocks, succs, block_sl)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
DEBUG_PRINTF("no edge plan on self loop\n");
|
||||
set<GoughVertex> succs;
|
||||
for (const auto &e : out_edges_range(v, g)) {
|
||||
if (g[e].reach.none()) { /* ignore top edges */
|
||||
continue;
|
||||
}
|
||||
|
||||
gough_edge_id ged(g, e);
|
||||
if (contains(blocks, ged)) {
|
||||
DEBUG_PRINTF("different out-edge behaviour\n");
|
||||
return false;
|
||||
}
|
||||
succs.insert(target(e, g));
|
||||
|
||||
for (auto w : adjacent_vertices_range(v, g)) {
|
||||
if (w != v && !verify_neighbour_no_block(g, w, blocks, succs)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_PRINTF("allowing two byte accel for %u\n", g[v].state_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
void find_allowed_accel_states(const GoughGraph &g,
|
||||
const map<gough_edge_id, vector<gough_ins> > &blocks,
|
||||
map<dstate_id_t, gough_accel_state_info> *out) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
GoughEdge e;
|
||||
if (!find_normal_self_loop(v, g, &e)) {
|
||||
continue; /* not accelerable */
|
||||
}
|
||||
u32 margin = 0;
|
||||
if (!can_accel_over_selfloop(g[v], e, g[e], &margin)) {
|
||||
continue; /* not accelerable */
|
||||
}
|
||||
bool tba = allow_two_byte_accel(g, blocks, v, e);
|
||||
out->emplace(g[v].state_id, gough_accel_state_info(margin, tba));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
334
src/nfa/goughcompile_dump.cpp
Normal file
334
src/nfa/goughcompile_dump.cpp
Normal file
@@ -0,0 +1,334 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "goughcompile_dump.h"
|
||||
#include "goughcompile_internal.h"
|
||||
#include "grey.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph_range.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
string dump_name(const GoughVertexProps &vp) {
|
||||
stringstream ss;
|
||||
ss << "vertex_" << vp.state_id;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
static
|
||||
string dump_name(const GoughGraph &g, const GoughEdge &e) {
|
||||
stringstream ss;
|
||||
ss << "edge_" << g[source(e, g)].state_id << "_"
|
||||
<< g[target(e, g)].state_id;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
string dump_name(const gough_edge_id &e) {
|
||||
stringstream ss;
|
||||
ss << "edge_" << e.src << "_" << e.dest;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
static
|
||||
void dump_graph(const GoughGraph &g, const string &base, const Grey &grey) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "gough_" << base << ".dot";
|
||||
|
||||
FILE *f = fopen(ss.str().c_str(), "w");
|
||||
|
||||
fprintf(f, "digraph NFA {\n");
|
||||
fprintf(f, "rankdir=LR;\n");
|
||||
fprintf(f, "size=\"11.5,8\"\n");
|
||||
fprintf(f, "node [ shape = circle ];\n");
|
||||
fprintf(f, "START [style=invis];\n");
|
||||
|
||||
for (auto v : vertices_range(g)) {
|
||||
fprintf(f, "%s [ width = 1, fixedsize = true, fontsize = 12, ",
|
||||
dump_name(g[v]).c_str());
|
||||
if (!g[v].reports.empty() || !g[v].reports_eod.empty()) {
|
||||
fprintf(f, "shape = doublecircle ");
|
||||
}
|
||||
|
||||
fprintf(f, "label = \"%u\"];\n", g[v].state_id);
|
||||
}
|
||||
for (const auto &e : edges_range(g)) {
|
||||
GoughVertex s = source(e, g);
|
||||
GoughVertex t = target(e, g);
|
||||
|
||||
fprintf(f, "%s -> %s\n",
|
||||
dump_name(g[s]).c_str(), dump_name(g[t]).c_str());
|
||||
}
|
||||
fprintf(f, "}\n");
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
static
|
||||
set<const GoughSSAVar *> uses(const GoughVertexProps &vp) {
|
||||
set<const GoughSSAVar *> rv;
|
||||
for (const auto &r : vp.reports) {
|
||||
if (r.second) {
|
||||
rv.insert(r.second);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &r : vp.reports_eod) {
|
||||
if (r.second) {
|
||||
rv.insert(r.second);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &var : vp.vars) {
|
||||
insert(&rv, var->get_inputs());
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
set<const GoughSSAVar *> uses(const GoughEdgeProps &ep) {
|
||||
set<const GoughSSAVar *> rv;
|
||||
for (const auto &var : ep.vars) {
|
||||
insert(&rv, var->get_inputs());
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
void dump_var_mapping(const GoughGraph &g, const string &base,
|
||||
const Grey &grey) {
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "gough_" << base << "_vars.txt";
|
||||
FILE *f = fopen(ss.str().c_str(), "w");
|
||||
for (auto v : vertices_range(g)) {
|
||||
set<const GoughSSAVar *> used = uses(g[v]);
|
||||
if (g[v].vars.empty() && used.empty()) {
|
||||
continue;
|
||||
}
|
||||
fprintf(f, "%s\n", dump_name(g[v]).c_str());
|
||||
for (u32 i = 0; i < g[v].vars.size(); i++) {
|
||||
const GoughSSAVar *vp = g[v].vars[i].get();
|
||||
fprintf(f, "\t%u: slot %u\n", i, vp->slot);
|
||||
}
|
||||
if (!used.empty()) {
|
||||
fprintf(f, "\tuses:");
|
||||
vector<u32> used_id;
|
||||
for (const GoughSSAVar *var : used) {
|
||||
used_id.push_back(var->slot);
|
||||
}
|
||||
for (const u32 &id : used_id) {
|
||||
fprintf(f, " %u", id);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
for (const auto &e : edges_range(g)) {
|
||||
set<const GoughSSAVar *> used = uses(g[e]);
|
||||
if (g[e].vars.empty() && used.empty()) {
|
||||
continue;
|
||||
}
|
||||
fprintf(f, "%s\n", dump_name(g, e).c_str());
|
||||
for (u32 i = 0; i < g[e].vars.size(); i++) {
|
||||
const GoughSSAVar *vp = g[e].vars[i].get();
|
||||
fprintf(f, "\t%u: slot %u\n", i, vp->slot);
|
||||
}
|
||||
if (!used.empty()) {
|
||||
fprintf(f, "\tuses:");
|
||||
vector<u32> used_id;
|
||||
for (const GoughSSAVar *var : used) {
|
||||
used_id.push_back(var->slot);
|
||||
}
|
||||
for (const u32 &id : used_id) {
|
||||
fprintf(f, " %u", id);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
static
|
||||
void gather_vars(const GoughGraph &g, vector<const GoughSSAVar *> *vars,
|
||||
map<const GoughSSAVar *, string> *names,
|
||||
map<const GoughSSAVar *, string> *src_label,
|
||||
set<const GoughSSAVar *> *reporters) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
for (const auto &r : g[v].reports) {
|
||||
reporters->insert(r.second);
|
||||
}
|
||||
for (const auto &r : g[v].reports_eod) {
|
||||
reporters->insert(r.second);
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < g[v].vars.size(); i++) {
|
||||
const GoughSSAVar *vp = g[v].vars[i].get();
|
||||
stringstream ss;
|
||||
ss << dump_name(g[v]) << "_" << i;
|
||||
vars->push_back(vp);
|
||||
names->insert(make_pair(vp, ss.str()));
|
||||
src_label->insert(make_pair(vp, dump_name(g[v])));
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &e : edges_range(g)) {
|
||||
for (u32 i = 0; i < g[e].vars.size(); i++) {
|
||||
const GoughSSAVar *vp = g[e].vars[i].get();
|
||||
stringstream ss;
|
||||
ss << dump_name(g, e) << "_" << i;
|
||||
vars->push_back(vp);
|
||||
names->insert(make_pair(vp, ss.str()));
|
||||
src_label->insert(make_pair(vp, dump_name(g, e)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dump_vars(const GoughGraph &g, const string &base, const Grey &grey) {
|
||||
FILE *f;
|
||||
{
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "gough_" << base << "_vars.dot";
|
||||
f = fopen(ss.str().c_str(), "w");
|
||||
}
|
||||
fprintf(f, "digraph NFA {\n");
|
||||
fprintf(f, "rankdir=LR;\n");
|
||||
fprintf(f, "size=\"11.5,8\"\n");
|
||||
fprintf(f, "node [ shape = circle ];\n");
|
||||
fprintf(f, "START [style=invis];\n");
|
||||
|
||||
vector<const GoughSSAVar *> vars;
|
||||
map<const GoughSSAVar *, string> names;
|
||||
map<const GoughSSAVar *, string> src_label;
|
||||
set<const GoughSSAVar *> reporters;
|
||||
gather_vars(g, &vars, &names, &src_label, &reporters);
|
||||
|
||||
for (const GoughSSAVar *vp : vars) {
|
||||
fprintf(f, "%s [ width = 1, fixedsize = true, fontsize = 12, ",
|
||||
names[vp].c_str());
|
||||
fprintf(f, "label = \"%s\\n", src_label[vp].c_str());
|
||||
|
||||
if (dynamic_cast<const GoughSSAVarMin *>(vp)) {
|
||||
fprintf(f, "MIN");
|
||||
} else if (dynamic_cast<const GoughSSAVarJoin *>(vp)) {
|
||||
fprintf(f, "JOIN");
|
||||
} else if (dynamic_cast<const GoughSSAVarNew *>(vp)) {
|
||||
fprintf(f, "NEW");
|
||||
} else {
|
||||
fprintf(f, "???");
|
||||
}
|
||||
fprintf(f, "\"];\n");
|
||||
}
|
||||
|
||||
for (const GoughSSAVar *vp : reporters) {
|
||||
if (vp) {
|
||||
fprintf(f, "%s [ shape = doublecircle]\n", names[vp].c_str());
|
||||
} else {
|
||||
fprintf(f, "eps [ label = \"eps\" shape = doublecircle]\n");
|
||||
}
|
||||
}
|
||||
|
||||
for (const GoughSSAVar *vp : vars) {
|
||||
const flat_set<GoughSSAVar *> &inputs = vp->get_inputs();
|
||||
for (const GoughSSAVar *v_in : inputs) {
|
||||
fprintf(f, "%s -> %s\n", names[v_in].c_str(), names[vp].c_str());
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, "}\n");
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void dump(const GoughGraph &g, const string &base, const Grey &grey) {
|
||||
if (!grey.dumpFlags) {
|
||||
return;
|
||||
}
|
||||
|
||||
dump_graph(g, base, grey);
|
||||
dump_var_mapping(g, base, grey);
|
||||
dump_vars(g, base, grey);
|
||||
}
|
||||
|
||||
static
|
||||
void dump_block(FILE *f, const gough_edge_id &e,
|
||||
const vector<gough_ins> &block) {
|
||||
fprintf(f, "%s:\n", dump_name(e).c_str());
|
||||
for (const gough_ins &ins : block) {
|
||||
fprintf(f, "\t");
|
||||
switch (ins.op) {
|
||||
case GOUGH_INS_END:
|
||||
fprintf(f, "END");
|
||||
break;
|
||||
case GOUGH_INS_MOV:
|
||||
fprintf(f, "MOV %u %u", ins.dest, ins.src);
|
||||
break;
|
||||
case GOUGH_INS_NEW:
|
||||
fprintf(f, "NEW %u (+%u)", ins.dest, ins.src);
|
||||
break;
|
||||
case GOUGH_INS_MIN:
|
||||
fprintf(f, "MIN %u %u", ins.dest, ins.src);
|
||||
break;
|
||||
default:
|
||||
fprintf(f, "<UNKNOWN>");
|
||||
break;
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
void dump_blocks(const map<gough_edge_id, vector<gough_ins> > &blocks,
|
||||
const string &base, const Grey &grey) {
|
||||
if (!grey.dumpFlags) {
|
||||
return;
|
||||
}
|
||||
|
||||
FILE *f;
|
||||
{
|
||||
stringstream ss;
|
||||
ss << grey.dumpPath << "gough_" << base << "_programs.txt";
|
||||
f = fopen(ss.str().c_str(), "w");
|
||||
}
|
||||
|
||||
for (const auto &m : blocks) {
|
||||
dump_block(f, m.first, m.second);
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
63
src/nfa/goughcompile_dump.h
Normal file
63
src/nfa/goughcompile_dump.h
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GOUGHCOMPILE_DUMP_H
|
||||
#define GOUGHCOMPILE_DUMP_H
|
||||
|
||||
#include "goughcompile_internal.h"
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct Grey;
|
||||
#ifdef DUMP_SUPPORT
|
||||
|
||||
std::string dump_name(const GoughVertexProps &vp);
|
||||
std::string dump_name(const gough_edge_id &e);
|
||||
void dump(const GoughGraph &g, const std::string &base, const Grey &grey);
|
||||
void dump_blocks(const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
|
||||
const std::string &base, const Grey &grey);
|
||||
#else
|
||||
|
||||
static UNUSED
|
||||
void dump(UNUSED const GoughGraph &g, UNUSED const std::string &base,
|
||||
UNUSED const Grey &grey) {
|
||||
}
|
||||
static UNUSED
|
||||
void dump_blocks(
|
||||
UNUSED const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
|
||||
UNUSED const std::string &base, UNUSED const Grey &grey) {
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
#endif
|
||||
227
src/nfa/goughcompile_internal.h
Normal file
227
src/nfa/goughcompile_internal.h
Normal file
@@ -0,0 +1,227 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef GOUGHCOMPILE_INTERNAL_H
|
||||
#define GOUGHCOMPILE_INTERNAL_H
|
||||
|
||||
#include "gough_internal.h"
|
||||
#include "mcclellancompile.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
#include <boost/graph/adjacency_list.hpp>
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
struct Grey;
|
||||
struct GoughSSAVar;
|
||||
struct GoughSSAVarJoin;
|
||||
|
||||
struct GoughVertexProps {
|
||||
GoughVertexProps() {}
|
||||
explicit GoughVertexProps(u32 state_in) : state_id(state_in) {}
|
||||
u32 state_id = ~0U;
|
||||
|
||||
std::vector<std::shared_ptr<GoughSSAVarJoin> > vars; /* owns variables */
|
||||
|
||||
std::vector<std::pair<ReportID, GoughSSAVar *> > reports; /**< report som,
|
||||
som variable */
|
||||
std::vector<std::pair<ReportID, GoughSSAVar *> > reports_eod;
|
||||
};
|
||||
|
||||
struct GoughEdgeProps {
|
||||
GoughEdgeProps(void) : top(false) {}
|
||||
bool top;
|
||||
CharReach reach;
|
||||
|
||||
std::vector<std::shared_ptr<GoughSSAVar> > vars; /* owns variables */
|
||||
};
|
||||
|
||||
struct GoughGraphProps {
|
||||
boost::adjacency_list_traits<boost::vecS, boost::vecS>::vertex_descriptor
|
||||
initial_vertex; /* for triggered nfas, dead state;
|
||||
* for others start anchored or start floating
|
||||
*/
|
||||
};
|
||||
|
||||
typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS,
|
||||
GoughVertexProps, GoughEdgeProps, GoughGraphProps> GoughGraph;
|
||||
|
||||
typedef GoughGraph::vertex_descriptor GoughVertex;
|
||||
typedef GoughGraph::edge_descriptor GoughEdge;
|
||||
|
||||
struct gough_edge_id {
|
||||
gough_edge_id(const GoughGraph &g, const GoughEdge &e)
|
||||
: src(g[source(e, g)].state_id), dest(g[target(e, g)].state_id),
|
||||
first_char(g[e].reach.find_first()) {}
|
||||
bool operator<(const gough_edge_id &b) const {
|
||||
const gough_edge_id &a = *this;
|
||||
ORDER_CHECK(src);
|
||||
ORDER_CHECK(dest);
|
||||
ORDER_CHECK(first_char);
|
||||
return false;
|
||||
}
|
||||
const u32 src;
|
||||
const u32 dest;
|
||||
const u32 first_char; /* ~0U if only top */
|
||||
};
|
||||
|
||||
struct GoughSSAVarWithInputs;
|
||||
struct GoughSSAVarMin;
|
||||
struct GoughSSAVarJoin;
|
||||
|
||||
struct GoughSSAVar : boost::noncopyable {
|
||||
GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {}
|
||||
virtual ~GoughSSAVar();
|
||||
const ue2::flat_set<GoughSSAVar *> &get_inputs() const {
|
||||
return inputs;
|
||||
}
|
||||
const ue2::flat_set<GoughSSAVarWithInputs *> &get_outputs() const {
|
||||
return outputs;
|
||||
}
|
||||
virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0;
|
||||
|
||||
virtual void generate(std::vector<gough_ins> *out) const = 0;
|
||||
|
||||
bool seen; /* for temp use by remove_dead alg */
|
||||
u32 slot;
|
||||
|
||||
void clear_outputs();
|
||||
|
||||
/** remove all inputs and outputs of the vertex, call before
|
||||
* removing vertex */
|
||||
virtual void clear_all() {
|
||||
clear_outputs();
|
||||
}
|
||||
protected:
|
||||
ue2::flat_set<GoughSSAVar *> inputs;
|
||||
ue2::flat_set<GoughSSAVarWithInputs *> outputs;
|
||||
friend struct GoughSSAVarWithInputs;
|
||||
friend struct GoughSSAVarMin;
|
||||
friend struct GoughSSAVarJoin;
|
||||
};
|
||||
|
||||
struct GoughSSAVarNew : public GoughSSAVar {
|
||||
explicit GoughSSAVarNew(u32 adjust_in) : adjust(adjust_in) {}
|
||||
|
||||
void replace_input(GoughSSAVar *, GoughSSAVar *) override {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
void generate(std::vector<gough_ins> *out) const override;
|
||||
|
||||
const u32 adjust;
|
||||
};
|
||||
|
||||
struct GoughSSAVarWithInputs : public GoughSSAVar {
|
||||
GoughSSAVarWithInputs(void) {}
|
||||
void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override = 0;
|
||||
virtual void clear_inputs() = 0;
|
||||
void clear_all() override;
|
||||
protected:
|
||||
virtual void remove_input_raw(GoughSSAVar *v) = 0;
|
||||
friend struct GoughSSAVar;
|
||||
};
|
||||
|
||||
struct GoughSSAVarMin : public GoughSSAVarWithInputs {
|
||||
GoughSSAVarMin(void) {}
|
||||
void generate(std::vector<gough_ins> *out) const override;
|
||||
|
||||
void clear_inputs() override;
|
||||
void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override;
|
||||
|
||||
virtual void add_input(GoughSSAVar *v) {
|
||||
inputs.insert(v);
|
||||
v->outputs.insert(this);
|
||||
}
|
||||
|
||||
protected:
|
||||
void remove_input_raw(GoughSSAVar *v) override;
|
||||
};
|
||||
|
||||
struct GoughSSAVarJoin : public GoughSSAVarWithInputs {
|
||||
GoughSSAVarJoin(void) {}
|
||||
|
||||
/* dummy; all joins at a point must be generated simultaneously */
|
||||
void generate(std::vector<gough_ins> *out) const override;
|
||||
GoughSSAVar *get_input(const GoughEdge &prev) const;
|
||||
|
||||
void clear_inputs() override;
|
||||
void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override;
|
||||
|
||||
void add_input(GoughSSAVar *v, GoughEdge prev);
|
||||
|
||||
const ue2::flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input)
|
||||
const;
|
||||
const std::map<GoughSSAVar *, ue2::flat_set<GoughEdge> > &get_input_map()
|
||||
const;
|
||||
|
||||
protected:
|
||||
void remove_input_raw(GoughSSAVar *v) override;
|
||||
|
||||
private:
|
||||
std::map<GoughSSAVar *, ue2::flat_set<GoughEdge>> input_map;
|
||||
};
|
||||
|
||||
struct gough_accel_state_info {
|
||||
u32 margin;
|
||||
bool two_byte;
|
||||
|
||||
gough_accel_state_info(u32 margin_in, bool two_byte_in)
|
||||
: margin(margin_in), two_byte(two_byte_in) {
|
||||
}
|
||||
};
|
||||
|
||||
u32 assign_slots(GoughGraph &g, const Grey &grey);
|
||||
void find_allowed_accel_states(const GoughGraph &g,
|
||||
const std::map<gough_edge_id, std::vector<gough_ins> > &blocks,
|
||||
std::map<dstate_id_t, gough_accel_state_info> *out);
|
||||
bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out);
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
// Note: C structure, can't be in namespace ue2
|
||||
static inline
|
||||
bool operator==(const gough_ins &a, const gough_ins &b) {
|
||||
return a.op == b.op && a.dest == b.dest && a.src == b.src;
|
||||
}
|
||||
|
||||
static inline
|
||||
bool operator<(const gough_ins &a, const gough_ins &b) {
|
||||
return std::tie(a.op, a.src, a.dest) < std::tie(b.op, b.src, b.dest);
|
||||
}
|
||||
|
||||
#endif
|
||||
502
src/nfa/goughcompile_reg.cpp
Normal file
502
src/nfa/goughcompile_reg.cpp
Normal file
@@ -0,0 +1,502 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "goughcompile.h"
|
||||
#include "goughcompile_dump.h"
|
||||
#include "goughcompile_internal.h"
|
||||
#include "gough_internal.h"
|
||||
#include "grey.h"
|
||||
#include "util/container.h"
|
||||
#include "util/graph.h"
|
||||
#include "util/graph_range.h"
|
||||
#include "util/order_check.h"
|
||||
#include "util/ue2_containers.h"
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <boost/graph/depth_first_search.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
using namespace std;
|
||||
using boost::adaptors::map_values;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
template<typename VarP, typename VarQ>
|
||||
void push_back_all_raw(vector<VarP> *out, const vector<VarQ> &in) {
|
||||
for (const auto &var : in) {
|
||||
out->push_back(var.get());
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
push_back_all_raw(out, g[v].vars);
|
||||
}
|
||||
for (const auto &e : edges_range(g)) {
|
||||
push_back_all_raw(out, g[e].vars);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct GoughGraphAux {
|
||||
map<const GoughSSAVar *, GoughVertex> containing_v;
|
||||
map<const GoughSSAVar *, GoughEdge> containing_e;
|
||||
map<const GoughSSAVar *, set<GoughVertex> > reporters;
|
||||
};
|
||||
}
|
||||
|
||||
static never_inline
|
||||
void fill_aux(const GoughGraph &g, GoughGraphAux *aux) {
|
||||
for (auto v : vertices_range(g)) {
|
||||
for (const auto &var : g[v].vars) {
|
||||
aux->containing_v[var.get()] = v;
|
||||
DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id);
|
||||
}
|
||||
|
||||
for (GoughSSAVar *var : g[v].reports | map_values) {
|
||||
aux->reporters[var].insert(v);
|
||||
}
|
||||
|
||||
for (GoughSSAVar *var : g[v].reports_eod | map_values) {
|
||||
aux->reporters[var].insert(v);
|
||||
}
|
||||
}
|
||||
for (const auto &e : edges_range(g)) {
|
||||
for (const auto &var : g[e].vars) {
|
||||
aux->containing_e[var.get()] = e;
|
||||
DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot,
|
||||
g[source(e, g)].state_id, g[target(e, g)].state_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var,
|
||||
const GoughGraphAux &aux) {
|
||||
/* if var used as a report, it cannot be considered block local */
|
||||
if (contains(aux.reporters, var)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* (useful) vertex/join vars never local - they are terminal in blocks
|
||||
* and so should be read by another block. */
|
||||
if (!contains(aux.containing_e, var)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* for other cases, require that all uses of var are later in the same edge
|
||||
* or on the target AND if on target it is sole on flow coming from the
|
||||
* edge in question. */
|
||||
const GoughEdge &e = aux.containing_e.at(var);
|
||||
GoughVertex t = target(e, cfg);
|
||||
|
||||
size_t seen_outputs = 0;
|
||||
const flat_set<GoughSSAVarWithInputs *> &out = var->get_outputs();
|
||||
bool seen_var = false;
|
||||
for (const auto &e_var : cfg[e].vars) {
|
||||
if (seen_var) {
|
||||
GoughSSAVarWithInputs *w
|
||||
= dynamic_cast<GoughSSAVarWithInputs *>(e_var.get());
|
||||
if (contains(out, w)) {
|
||||
seen_outputs++;
|
||||
}
|
||||
} else {
|
||||
seen_var = var == e_var.get();
|
||||
}
|
||||
}
|
||||
assert(seen_var);
|
||||
|
||||
for (const auto &t_var : cfg[t].vars) {
|
||||
if (contains(out, t_var.get())) {
|
||||
seen_outputs++;
|
||||
const flat_set<GoughEdge> &flow = t_var->get_edges_for_input(var);
|
||||
if (flow.size() != 1 || *flow.begin() != e) {
|
||||
/* this var is used by the target join var BUT on a different
|
||||
* flow, so this is not a block local variable */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(seen_outputs <= out.size());
|
||||
return seen_outputs == out.size();
|
||||
}
|
||||
|
||||
static
|
||||
void handle_pending_edge(const GoughGraph &g, const GoughEdge &e,
|
||||
GoughSSAVar *start, set<GoughVertex> &pending_vertex,
|
||||
set<const GoughSSAVar *> &rv) {
|
||||
const vector<shared_ptr<GoughSSAVar> > &vars = g[e].vars;
|
||||
bool marking = !start;
|
||||
DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n", g[source(e, g)].state_id,
|
||||
g[target(e, g)].state_id, marking ? "full" : "partial",
|
||||
vars.size());
|
||||
for (auto it = vars.rbegin(); it != vars.rend(); ++it) {
|
||||
GoughSSAVar *var = it->get();
|
||||
if (contains(rv, var)) {
|
||||
DEBUG_PRINTF("somebody has already processed this vertex [%u]\n",
|
||||
var->slot);
|
||||
return;
|
||||
}
|
||||
if (var == start) {
|
||||
assert(!marking);
|
||||
marking = true;
|
||||
continue;
|
||||
}
|
||||
if (marking) {
|
||||
rv.insert(var);
|
||||
}
|
||||
}
|
||||
assert(marking);
|
||||
GoughVertex s = source(e, g);
|
||||
for (const auto &var : g[s].vars) {
|
||||
DEBUG_PRINTF("interferes %u\n", var->slot);
|
||||
rv.insert(var.get());
|
||||
}
|
||||
pending_vertex.insert(s);
|
||||
}
|
||||
|
||||
static
|
||||
void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g,
|
||||
const GoughGraphAux &aux,
|
||||
const flat_set<GoughSSAVarWithInputs *> &pending_var,
|
||||
set<GoughVertex> &pending_vertex,
|
||||
set<const GoughSSAVar *> &rv) {
|
||||
for (GoughSSAVarWithInputs *var : pending_var) {
|
||||
if (contains(aux.containing_v, var)) {
|
||||
/* def is used by join vertex, value only needs to be live on some
|
||||
* incoming edges */
|
||||
GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var;
|
||||
const flat_set<GoughEdge> &live_edges
|
||||
= vj->get_edges_for_input(def);
|
||||
for (const auto &e : live_edges) {
|
||||
handle_pending_edge(g, e, nullptr, pending_vertex, rv);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
const GoughEdge &e = aux.containing_e.at(var);
|
||||
handle_pending_edge(g, e, var, pending_vertex, rv);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g,
|
||||
GoughVertex current,
|
||||
set<GoughVertex> &pending_vertex,
|
||||
set<const GoughSSAVar *> &rv) {
|
||||
DEBUG_PRINTF("---checking vertex %u\n", g[current].state_id);
|
||||
if (def_v == current) {
|
||||
DEBUG_PRINTF("contains target vertex\n");
|
||||
return; /* we have reached def */
|
||||
}
|
||||
for (const auto &e : in_edges_range(current, g)) {
|
||||
handle_pending_edge(g, e, nullptr, pending_vertex, rv);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g,
|
||||
const GoughGraphAux &aux,
|
||||
set<GoughVertex> &pending_vertex,
|
||||
set<const GoughSSAVar *> &rv) {
|
||||
if (pending_vertex.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
GoughVertex def_v = GoughGraph::null_vertex();
|
||||
if (contains(aux.containing_v, def)) {
|
||||
def_v = aux.containing_v.at(def);
|
||||
}
|
||||
ue2::unordered_set<GoughVertex> done;
|
||||
while (!pending_vertex.empty()) {
|
||||
GoughVertex current = *pending_vertex.begin();
|
||||
pending_vertex.erase(current);
|
||||
if (contains(done, current)) {
|
||||
continue;
|
||||
}
|
||||
done.insert(current);
|
||||
handle_pending_vertex(def_v, g, current, pending_vertex, rv);
|
||||
}
|
||||
}
|
||||
|
||||
/* returns set of labels that the given def is live at */
|
||||
static never_inline
|
||||
set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g,
|
||||
const GoughGraphAux &aux) {
|
||||
DEBUG_PRINTF("checking who is defined during %u lifetime\n", def->slot);
|
||||
set<GoughVertex> pending_vertex;
|
||||
|
||||
set<const GoughSSAVar *> rv;
|
||||
rv.insert(def);
|
||||
|
||||
if (contains(aux.reporters, def)) {
|
||||
DEBUG_PRINTF("--> gets reported\n");
|
||||
const set<GoughVertex> &reporters = aux.reporters.at(def);
|
||||
for (auto v : reporters) {
|
||||
pending_vertex.insert(v);
|
||||
for (const auto &var : g[v].vars) {
|
||||
DEBUG_PRINTF("interferes %u\n", var->slot);
|
||||
rv.insert(var.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv);
|
||||
handle_pending_vertices(def, g, aux, pending_vertex, rv);
|
||||
|
||||
rv.erase(def);
|
||||
return rv;
|
||||
}
|
||||
|
||||
template<typename VarP>
|
||||
void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) {
|
||||
for (auto &var : vars) {
|
||||
assert(var->slot == INVALID_SLOT);
|
||||
var->slot = (*next_slot)++;
|
||||
}
|
||||
}
|
||||
|
||||
/* crude, deterministic assignment of symbolic register slots.
|
||||
* returns number of slots given out
|
||||
*/
|
||||
static
|
||||
u32 initial_slots(const GoughGraph &g) {
|
||||
u32 next_slot = 0;
|
||||
for (auto v : vertices_range(g)) {
|
||||
set_initial_slots(g[v].vars, &next_slot);
|
||||
}
|
||||
for (const auto &e : edges_range(g)) {
|
||||
set_initial_slots(g[e].vars, &next_slot);
|
||||
}
|
||||
|
||||
return next_slot;
|
||||
}
|
||||
|
||||
#define NO_COLOUR (~0U)
|
||||
|
||||
static
|
||||
u32 available_colour(const flat_set<u32> &bad_colours) {
|
||||
u32 rv = 0;
|
||||
for (const u32 &colour : bad_colours) {
|
||||
if (colour != rv) {
|
||||
assert(colour > rv);
|
||||
break;
|
||||
}
|
||||
rv = colour + 1;
|
||||
}
|
||||
|
||||
assert(rv != NO_COLOUR);
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
void poison_colours(const set<const GoughSSAVar *> &live, u32 c,
|
||||
const vector<u32> &colour_map,
|
||||
vector<flat_set<u32> > *bad_colour) {
|
||||
for (const GoughSSAVar *var : live) {
|
||||
u32 var_index = var->slot;
|
||||
if (colour_map[var_index] != NO_COLOUR) {
|
||||
assert(c != colour_map[var_index]);
|
||||
} else {
|
||||
(*bad_colour)[var_index].insert(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void find_bad_due_to_live(const set<const GoughSSAVar *> &live,
|
||||
const vector<u32> &colour_map, flat_set<u32> *out) {
|
||||
for (const GoughSSAVar *var : live) {
|
||||
u32 var_index = var->slot;
|
||||
if (colour_map[var_index] != NO_COLOUR) {
|
||||
out->insert(colour_map[var_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux,
|
||||
const vector<GoughSSAVar *> &order,
|
||||
vector<u32> &colour_map) {
|
||||
assert(order.size() < NO_COLOUR);
|
||||
colour_map.clear();
|
||||
colour_map.resize(order.size(), NO_COLOUR);
|
||||
vector<u32> temp(order.size(), ~0U);
|
||||
vector<flat_set<u32> > bad_colour(order.size());
|
||||
|
||||
for (GoughSSAVar *var : order) {
|
||||
u32 var_index = var->slot;
|
||||
if (is_block_local(g, var, aux)) {
|
||||
DEBUG_PRINTF("%u is block local\n", var_index);
|
||||
/* ignore variable whose lifetime is limited to their local block
|
||||
* there is no need to assign stream state to these variables */
|
||||
continue;
|
||||
}
|
||||
assert(colour_map[var_index] == NO_COLOUR);
|
||||
set<const GoughSSAVar *> live = live_during(var, g, aux);
|
||||
flat_set<u32> &local_bad = bad_colour[var_index];
|
||||
find_bad_due_to_live(live, colour_map, &local_bad);
|
||||
DEBUG_PRINTF("colouring %u\n", var_index);
|
||||
u32 c = available_colour(local_bad);
|
||||
colour_map[var_index] = c;
|
||||
assert(!contains(bad_colour[var_index], c));
|
||||
poison_colours(live, c, colour_map, &bad_colour);
|
||||
|
||||
flat_set<u32> temp_set;
|
||||
local_bad.swap(temp_set);
|
||||
DEBUG_PRINTF(" %u coloured %u\n", var_index, c);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename VarP>
|
||||
void add_to_dom_ordering(const vector<VarP> &vars,
|
||||
vector<GoughSSAVar *> *out) {
|
||||
for (const auto &var : vars) {
|
||||
out->push_back(var.get());
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
class FinishVisitor : public boost::default_dfs_visitor {
|
||||
public:
|
||||
explicit FinishVisitor(vector<GoughVertex> *o) : out(o) {}
|
||||
void finish_vertex(const GoughVertex v, const GoughGraph &) {
|
||||
out->push_back(v);
|
||||
}
|
||||
vector<GoughVertex> *out;
|
||||
};
|
||||
}
|
||||
|
||||
static
|
||||
void find_dom_ordering(const GoughGraph &cfg, vector<GoughSSAVar *> *out) {
|
||||
vector<GoughVertex> g_order;
|
||||
|
||||
/* due to construction quirks, default vertex order provides entry points */
|
||||
depth_first_search(cfg, visitor(FinishVisitor(&g_order))
|
||||
.root_vertex(cfg[boost::graph_bundle].initial_vertex));
|
||||
|
||||
for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) {
|
||||
add_to_dom_ordering(cfg[*it].vars, out);
|
||||
for (const auto &e : out_edges_range(*it, cfg)) {
|
||||
add_to_dom_ordering(cfg[e].vars, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count,
|
||||
vector<u32> *old_new) {
|
||||
/* Interference graphs from SSA form are chordal -> optimally colourable in
|
||||
* poly time.
|
||||
*
|
||||
* Chordal graphs can be coloured by walking in perfect elimination order.
|
||||
* If the SSA CFG is iterated over in a way that respects dominance
|
||||
* relationship, the interference graph will be iterated in a perfect
|
||||
* elimination order.
|
||||
*
|
||||
* We can avoid creating the full interference graph and use liveness
|
||||
* information as we iterate over the definitions to perform the colouring.
|
||||
*
|
||||
* See S Hack various 2006-
|
||||
*/
|
||||
vector<GoughSSAVar *> dom_order;
|
||||
|
||||
GoughGraphAux aux;
|
||||
fill_aux(cfg, &aux);
|
||||
|
||||
find_dom_ordering(cfg, &dom_order);
|
||||
assert(dom_order.size() == old_slot_count);
|
||||
sequential_vertex_colouring(cfg, aux, dom_order, *old_new);
|
||||
}
|
||||
|
||||
static
|
||||
void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals,
|
||||
u32 local_base) {
|
||||
DEBUG_PRINTF("%zu local variables\n", locals.size());
|
||||
/* local variables only occur on edges (joins are never local) */
|
||||
|
||||
u32 allocated_count = 0;
|
||||
for (const auto &e : edges_range(g)) {
|
||||
u32 next_slot = local_base;
|
||||
for (auto &var : g[e].vars) {
|
||||
if (contains(locals, var.get())) {
|
||||
DEBUG_PRINTF("updating slot %u using local %u\n", var->slot,
|
||||
next_slot);
|
||||
var->slot = next_slot++;
|
||||
allocated_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(allocated_count == locals.size());
|
||||
}
|
||||
|
||||
static never_inline
|
||||
u32 update_slots(GoughGraph &g, const vector<u32> &old_new,
|
||||
UNUSED u32 old_slot_count) {
|
||||
vector<GoughSSAVar *> vars;
|
||||
set<GoughSSAVar *> locals;
|
||||
all_vars(g, &vars);
|
||||
u32 slot_count = 0;
|
||||
for (GoughSSAVar *v : vars) {
|
||||
assert(v->slot < old_new.size());
|
||||
DEBUG_PRINTF("updating slot %u to %u\n", v->slot, old_new[v->slot]);
|
||||
if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */
|
||||
v->slot = old_new[v->slot];
|
||||
ENSURE_AT_LEAST(&slot_count, v->slot + 1);
|
||||
} else {
|
||||
locals.insert(v);
|
||||
}
|
||||
}
|
||||
assert(slot_count <= old_slot_count);
|
||||
DEBUG_PRINTF("reduce stream slots from %u to %u\n", old_slot_count,
|
||||
slot_count);
|
||||
update_local_slots(g, locals, slot_count);
|
||||
|
||||
return slot_count;
|
||||
}
|
||||
|
||||
u32 assign_slots(GoughGraph &cfg, const Grey &grey) {
|
||||
u32 slot_count = initial_slots(cfg);
|
||||
|
||||
if (!grey.goughRegisterAllocate) {
|
||||
return slot_count;
|
||||
}
|
||||
dump(cfg, "slots_pre", grey);
|
||||
|
||||
vector<u32> old_new;
|
||||
create_slot_mapping(cfg, slot_count, &old_new);
|
||||
slot_count = update_slots(cfg, old_new, slot_count);
|
||||
|
||||
return slot_count;
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
349
src/nfa/goughdump.cpp
Normal file
349
src/nfa/goughdump.cpp
Normal file
@@ -0,0 +1,349 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "goughdump.h"
|
||||
|
||||
#include "gough_internal.h"
|
||||
#include "mcclellandump.h"
|
||||
#include "nfa_dump_internal.h"
|
||||
#include "nfa_internal.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/charreach.h"
|
||||
#include "util/dump_charclass.h"
|
||||
#include "util/unaligned.h"
|
||||
|
||||
#include <cctype>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#ifndef DUMP_SUPPORT
|
||||
#error No dump support!
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace ue2 {
|
||||
|
||||
static
|
||||
void goughGetTransitions(const NFA *n, u16 s, u16 *t) {
|
||||
assert(isGoughType(n->type));
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(n);
|
||||
const mstate_aux *aux = getAux(n, s);
|
||||
const u32 as = m->alphaShift;
|
||||
const char *sher_base
|
||||
= (const char *)m - sizeof(struct NFA) + m->sherman_offset;
|
||||
|
||||
if (n->type == GOUGH_NFA_8) {
|
||||
const u8 *succ_table = (const u8 *)((const char *)m + sizeof(mcclellan));
|
||||
for (u16 c = 0; c < N_CHARS; c++) {
|
||||
t[c] = succ_table[((u32)s << as) + m->remap[c]];
|
||||
}
|
||||
} else {
|
||||
u16 base_s = s;
|
||||
|
||||
if (s >= m->sherman_limit) {
|
||||
const char *state_base
|
||||
= findShermanState(m, sher_base, m->sherman_limit, s);
|
||||
base_s = *(const u16 *)(state_base + SHERMAN_DADDY_OFFSET);
|
||||
}
|
||||
|
||||
const u16 *succ_table = (const u16 *)((const char *)m
|
||||
+ sizeof(mcclellan));
|
||||
for (u16 c = 0; c < N_CHARS; c++) {
|
||||
const u8 *addr
|
||||
= (const u8*)(succ_table + (((u32)base_s << as) + m->remap[c]));
|
||||
t[c] = unaligned_load_u16(addr);
|
||||
t[c] &= STATE_MASK;
|
||||
}
|
||||
|
||||
if (s >= m->sherman_limit) {
|
||||
const char *state_base
|
||||
= findShermanState(m, sher_base, m->sherman_limit, s);
|
||||
u8 len = *(const u8 *)(SHERMAN_LEN_OFFSET + state_base);
|
||||
const u8 *chars = (const u8 *)state_base + SHERMAN_CHARS_OFFSET;
|
||||
const u16 *states
|
||||
= (const u16 *)(state_base + SHERMAN_STATES_OFFSET(len));
|
||||
|
||||
for (u8 i = 0; i < len; i++) {
|
||||
for (u16 c = 0; c < N_CHARS; c++) {
|
||||
if (m->remap[c] != chars[i]) {
|
||||
t[c] = unaligned_load_u16((const u8*)&states[i])
|
||||
& STATE_MASK;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t[TOP] = aux->top & STATE_MASK;
|
||||
}
|
||||
|
||||
static
|
||||
void describeNode(const NFA *n, const mcclellan *m, u16 i, FILE *f) {
|
||||
const mstate_aux *aux = getAux(n, i);
|
||||
|
||||
bool isSherman = m->sherman_limit && i >= m->sherman_limit;
|
||||
const char *sher_base
|
||||
= (const char *)m - sizeof(NFA) + m->sherman_offset;
|
||||
|
||||
fprintf(f, "%u [ width = 1, fixedsize = true, fontsize = 12, "
|
||||
"label = \"%u%s\" ]; \n", i, i, isSherman ? "w":"");
|
||||
|
||||
if (aux->accel_offset) {
|
||||
dumpAccelDot(f, i,
|
||||
&((const gough_accel *)((const char *)m + aux->accel_offset))->accel);
|
||||
}
|
||||
|
||||
if (aux->accept_eod) {
|
||||
fprintf(f, "%u [ color = darkorchid ];\n", i);
|
||||
}
|
||||
|
||||
if (aux->accept) {
|
||||
fprintf(f, "%u [ shape = doublecircle ];\n", i);
|
||||
}
|
||||
|
||||
if (aux->top && aux->top != i) {
|
||||
fprintf(f, "%u -> %u [color = darkgoldenrod weight=0.1 ]\n", i,
|
||||
aux->top);
|
||||
}
|
||||
|
||||
if (i == m->start_anchored) {
|
||||
fprintf(f, "STARTA -> %u [color = blue ]\n", i);
|
||||
}
|
||||
|
||||
if (i == m->start_floating) {
|
||||
fprintf(f, "STARTF -> %u [color = red ]\n", i);
|
||||
}
|
||||
|
||||
if (isSherman) {
|
||||
const char *sherman_state
|
||||
= findShermanState(m, sher_base, m->sherman_limit, i);
|
||||
fprintf(f, "%u [ fillcolor = lightblue style=filled ];\n", i);
|
||||
u16 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET);
|
||||
if (daddy) {
|
||||
fprintf(f, "%u -> %u [ color=royalblue style=dashed weight=0.1]\n",
|
||||
i, daddy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dump_program(FILE *f, const pair<u32, u32> &e, const gough_ins *prog) {
|
||||
fprintf(f, "edge_%u_%u:\n", e.first, e.second);
|
||||
for (const gough_ins *it = prog;; ++it) {
|
||||
fprintf(f, "\t");
|
||||
u32 s = it->src;
|
||||
u32 d = it->dest;
|
||||
switch (it->op) {
|
||||
case GOUGH_INS_END:
|
||||
fprintf(f, "END");
|
||||
fprintf(f, "\n");
|
||||
return;
|
||||
case GOUGH_INS_MOV:
|
||||
fprintf(f, "MOV %u %u", d, s);
|
||||
break;
|
||||
case GOUGH_INS_NEW:
|
||||
fprintf(f, "NEW-%u %u", s, d);
|
||||
break;
|
||||
case GOUGH_INS_MIN:
|
||||
fprintf(f, "MIN %u %u", d, s);
|
||||
break;
|
||||
default:
|
||||
fprintf(f, "<UNKNOWN>");
|
||||
fprintf(f, "\n");
|
||||
return;
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dump_programs(FILE *f, const NFA *nfa,
|
||||
const set<pair<pair<u32, u32>, u32 > > &prog_dump) {
|
||||
fprintf(f, "Edge Programs\n");
|
||||
fprintf(f, "-------------\n");
|
||||
for (set<pair<pair<u32, u32>, u32 > >::const_iterator it
|
||||
= prog_dump.begin(); it != prog_dump.end(); ++it) {
|
||||
assert(it->second);
|
||||
const gough_ins *p = (const gough_ins *)((const u8 *)nfa + it->second);
|
||||
dump_program(f, it->first, p);
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void dumpTransitions(const NFA *nfa, FILE *f,
|
||||
set<pair<pair<u32, u32>, u32 > > *prog_dump) {
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
const gough_info *g = get_gough(m);
|
||||
u32 alphaSize = 1U << m->alphaShift;
|
||||
const u32 *prog_offset_table = (const u32 *)(g + 1);
|
||||
|
||||
for (u16 i = 0; i < m->state_count; i++) {
|
||||
fprintf(f, "%05hu", i);
|
||||
const mstate_aux *aux = getAux(nfa, i);
|
||||
|
||||
if (aux->accel_offset) {
|
||||
dumpAccelText(f, (const union AccelAux *)((const char *)m +
|
||||
aux->accel_offset));
|
||||
}
|
||||
|
||||
u16 trans[ALPHABET_SIZE];
|
||||
goughGetTransitions(nfa, i, trans);
|
||||
|
||||
int rstart = 0;
|
||||
u16 prev = 0xffff;
|
||||
for (int j = 0; j < N_CHARS; j++) {
|
||||
u16 curr = trans[j];
|
||||
if (curr == prev) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (prev != 0xffff) {
|
||||
if (j == rstart + 1) {
|
||||
fprintf(f, " %02x->%hu", rstart, prev);
|
||||
} else {
|
||||
fprintf(f, " [%02x - %02x]->%hu", rstart, j - 1, prev);
|
||||
}
|
||||
}
|
||||
|
||||
prev = curr;
|
||||
rstart = j;
|
||||
|
||||
u32 edge_index = i * alphaSize + m->remap[j];
|
||||
u32 prog_offset = prog_offset_table[edge_index];
|
||||
if (prog_offset) {
|
||||
prog_dump->insert(make_pair(make_pair((u32)i, (u32)trans[j]),
|
||||
prog_offset));
|
||||
}
|
||||
}
|
||||
if (N_CHARS == rstart + 1) {
|
||||
fprintf(f, " %02x->%hu", rstart, prev);
|
||||
} else {
|
||||
fprintf(f, " [%02x - %02x]->%hu", rstart, N_CHARS - 1, prev);
|
||||
}
|
||||
fprintf(f, " TOP->%hu\n", trans[TOP]);
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
|
||||
void nfaExecGough8_dumpDot(const struct NFA *nfa, FILE *f) {
|
||||
assert(nfa->type == GOUGH_NFA_8);
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
|
||||
dumpDotPreambleDfa(f);
|
||||
|
||||
for (u16 i = 1; i < m->state_count; i++) {
|
||||
describeNode(nfa, m, i, f);
|
||||
|
||||
u16 t[ALPHABET_SIZE];
|
||||
|
||||
goughGetTransitions(nfa, i, t);
|
||||
|
||||
describeEdge(f, t, i);
|
||||
}
|
||||
|
||||
fprintf(f, "}\n");
|
||||
}
|
||||
|
||||
void nfaExecGough8_dumpText(const struct NFA *nfa, FILE *f) {
|
||||
|
||||
assert(nfa->type == GOUGH_NFA_8);
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
|
||||
fprintf(f, "gough 8\n");
|
||||
fprintf(f, "report: %u, states %u, length %u\n", m->arb_report,
|
||||
m->state_count, m->length);
|
||||
fprintf(f, "astart: %hu, fstart %hu\n", m->start_anchored,
|
||||
m->start_floating);
|
||||
fprintf(f, "accel_limit: %hu, accept_limit %hu\n", m->accel_limit_8,
|
||||
m->accept_limit_8);
|
||||
fprintf(f, "\n");
|
||||
|
||||
describeAlphabet(f, m);
|
||||
|
||||
set<pair<pair<u32, u32>, u32 > > prog_dump;
|
||||
|
||||
dumpTransitions(nfa, f, &prog_dump);
|
||||
dump_programs(f, nfa, prog_dump);
|
||||
|
||||
dumpTextReverse(nfa, f);
|
||||
}
|
||||
|
||||
void nfaExecGough16_dumpDot(const struct NFA *nfa, FILE *f) {
|
||||
assert(nfa->type == GOUGH_NFA_16);
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
|
||||
dumpDotPreambleDfa(f);
|
||||
|
||||
for (u16 i = 1; i < m->state_count; i++) {
|
||||
describeNode(nfa, m, i, f);
|
||||
|
||||
u16 t[ALPHABET_SIZE];
|
||||
|
||||
goughGetTransitions(nfa, i, t);
|
||||
|
||||
describeEdge(f, t, i);
|
||||
}
|
||||
|
||||
fprintf(f, "}\n");
|
||||
}
|
||||
|
||||
void nfaExecGough16_dumpText(const struct NFA *nfa, FILE *f) {
|
||||
assert(nfa->type == GOUGH_NFA_16);
|
||||
const mcclellan *m = (const mcclellan *)getImplNfa(nfa);
|
||||
// const gough_info *h = get_gough(m);
|
||||
|
||||
fprintf(f, "gough 16\n");
|
||||
fprintf(f, "report: %u, states: %u, length: %u\n", m->arb_report,
|
||||
m->state_count, m->length);
|
||||
fprintf(f, "astart: %hu, fstart: %hu\n", m->start_anchored,
|
||||
m->start_floating);
|
||||
fprintf(f, "single accept: %d\n", !!(int)m->flags & MCCLELLAN_FLAG_SINGLE);
|
||||
fprintf(f, "sherman_limit: %u, sherman_end: %u\n", m->sherman_limit,
|
||||
m->sherman_end);
|
||||
|
||||
describeAlphabet(f, m);
|
||||
|
||||
set<pair<pair<u32, u32>, u32 > > prog_dump;
|
||||
|
||||
dumpTransitions(nfa, f, &prog_dump);
|
||||
dump_programs(f, nfa, prog_dump);
|
||||
|
||||
fprintf(f, "\n");
|
||||
dumpTextReverse(nfa, f);
|
||||
}
|
||||
|
||||
} // namespace ue2
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user