hsbench: add Hyperscan benchmarker

The hsbench tool provides an easy way to measure Hyperscan's
performance for a particular set of patterns and corpus of data
to be scanned.
This commit is contained in:
Matthew Barr 2016-12-14 15:26:01 +11:00
parent 06cde4c94d
commit f626276271
26 changed files with 3145 additions and 1 deletions

53
cmake/sqlite3.cmake Normal file
View File

@ -0,0 +1,53 @@
#
# a lot of noise to find sqlite
#
option(SQLITE_PREFER_STATIC "Build sqlite3 statically instead of using an installed lib" OFF)
if(NOT WIN32 AND NOT SQLITE_PREFER_STATIC)
find_package(PkgConfig QUIET)
# first check for sqlite on the system
pkg_check_modules(SQLITE3 sqlite3)
endif()
if (NOT SQLITE3_FOUND)
message(STATUS "looking for sqlite3 in source tree")
# look in the source tree
if (EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.h" AND
EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
message(STATUS " found sqlite3 in source tree")
set(SQLITE3_FOUND TRUE)
set(SQLITE3_BUILD_SOURCE TRUE)
set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3")
set(SQLITE3_LDFLAGS sqlite3_static)
else()
message(FATAL_ERROR " no sqlite3 in source tree")
endif()
endif()
# now do version checks
if (SQLITE3_FOUND)
list(INSERT CMAKE_REQUIRED_INCLUDES 0 "${SQLITE3_INCLUDE_DIRS}")
CHECK_C_SOURCE_COMPILES("#include <sqlite3.h>\n#if SQLITE_VERSION_NUMBER >= 3008007 && SQLITE_VERSION_NUMBER < 3008010\n#error broken sqlite\n#endif\nint main() {return 0;}" SQLITE_VERSION_OK)
if (NOT SQLITE_VERSION_OK)
message(FATAL_ERROR "sqlite3 is broken from 3.8.7 to 3.8.10 - please find a working version")
endif()
if (NOT SQLITE3_BUILD_SOURCE)
set(_SAVED_FLAGS ${CMAKE_REQUIRED_FLAGS})
list(INSERT CMAKE_REQUIRED_LIBRARIES 0 ${SQLITE3_LDFLAGS})
CHECK_SYMBOL_EXISTS(sqlite3_open_v2 sqlite3.h HAVE_SQLITE3_OPEN_V2)
list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES "${SQLITE3_INCLUDE_DIRS}")
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${SQLITE3_LDFLAGS})
else()
if (NOT TARGET sqlite3_static)
# build sqlite as a static lib to compile into our test programs
add_library(sqlite3_static STATIC "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
if (NOT WIN32)
set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION")
endif()
endif()
endif()
endif()
# that's enough about sqlite

19
tools/CMakeLists.txt Normal file
View File

@ -0,0 +1,19 @@
find_package(Threads)
# remove some warnings
if(CMAKE_CXX_FLAGS MATCHES "-Wmissing-declarations" )
string(REPLACE "-Wmissing-declarations" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
include_directories(${PROJECT_SOURCE_DIR}/util)
# add any subdir with a cmake file
file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
foreach(e ${dirents})
if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${e} AND
EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${e}/CMakeLists.txt)
add_subdirectory(${e})
endif ()
endforeach ()

View File

@ -0,0 +1,36 @@
include (${CMAKE_MODULE_PATH}/sqlite3.cmake)
if (NOT XCODE)
include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS})
else()
# cmake doesn't think Xcode supports isystem
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${SQLITE3_INCLUDE_DIRS}")
endif()
CHECK_FUNCTION_EXISTS(malloc_info HAVE_MALLOC_INFO)
CHECK_FUNCTION_EXISTS(shmget HAVE_SHMGET)
set(HAVE_SHMGET ${HAVE_SHMGET} CACHE BOOL "shmget()")
# only set these after all tests are done
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
SET(hsbench_SOURCES
common.h
data_corpus.cpp
data_corpus.h
engine_hyperscan.cpp
engine_hyperscan.h
heapstats.cpp
heapstats.h
huge.cpp
huge.h
main.cpp
thread_barrier.h
timer.h
)
add_executable(hsbench ${hsbench_SOURCES})
target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS}
${CMAKE_THREAD_LIBS_INIT})

8
tools/hsbench/README.md Normal file
View File

@ -0,0 +1,8 @@
Hyperscan Benchmarker: hsbench
==============================
The `hsbench` tool provides an easy way to measure Hyperscan's performance
for a particular set of patterns and corpus of data to be scanned.
Documentation describing its operation is available in the Tools section of the
[Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/).

42
tools/hsbench/common.h Normal file
View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef COMMON_H
#define COMMON_H
#include <string>
enum class ScanMode { BLOCK, STREAMING, VECTORED };
extern bool echo_matches;
extern bool saveDatabases;
extern bool loadDatabases;
extern std::string serializePath;
extern unsigned int somPrecisionMode;
#endif // COMMON_H

View File

@ -0,0 +1,133 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "data_corpus.h"
#include "util/container.h"
#include "ue2common.h"
#include <cassert>
#include <map>
#include <sstream>
#include <string>
#include <vector>
#include <sqlite3.h>
using namespace std;
using namespace ue2;
static
void readRow(sqlite3_stmt *statement, vector<DataBlock> &blocks,
map<unsigned int, unsigned int> &stream_indices) {
unsigned int id = sqlite3_column_int(statement, 0);
unsigned int stream_id = sqlite3_column_int(statement, 1);
const char *blob = (const char *)sqlite3_column_blob(statement, 2);
unsigned int bytes = sqlite3_column_bytes(statement, 2);
if (!contains(stream_indices, stream_id)) {
unsigned int internal_stream_index = stream_indices.size();
stream_indices[stream_id] = internal_stream_index;
}
auto internal_stream_index = stream_indices[stream_id];
assert(blob || bytes > 0);
blocks.emplace_back(id, stream_id, internal_stream_index,
string(blob, blob + bytes));
}
vector<DataBlock> readCorpus(const string &filename) {
int status;
sqlite3 *db = nullptr;
status = sqlite3_open_v2(filename.c_str(), &db, SQLITE_OPEN_READONLY,
nullptr);
assert(db);
if (status != SQLITE_OK) {
ostringstream err;
err << "Unable to open database '" << filename << "': "
<< sqlite3_errmsg(db);
status = sqlite3_close(db);
assert(status == SQLITE_OK);
throw DataCorpusError(err.str());
}
static const string query("SELECT id, stream_id, data "
"FROM chunk ORDER BY id;");
sqlite3_stmt *statement = nullptr;
status = sqlite3_prepare_v2(db, query.c_str(), query.size(), &statement,
nullptr);
if (status != SQLITE_OK) {
status = sqlite3_finalize(statement);
assert(status == SQLITE_OK);
status = sqlite3_close(db);
assert(status == SQLITE_OK);
ostringstream oss;
oss << "Query failed: " << query;
throw DataCorpusError(oss.str());
}
vector<DataBlock> blocks;
map<unsigned int, unsigned int> stream_indices;
status = sqlite3_step(statement);
while (status == SQLITE_ROW) {
readRow(statement, blocks, stream_indices);
status = sqlite3_step(statement);
}
if (status != SQLITE_DONE) {
ostringstream oss;
oss << "Error retrieving blocks from corpus: "
<< sqlite3_errstr(status);
status = sqlite3_finalize(statement);
assert(status == SQLITE_OK);
status = sqlite3_close(db);
assert(status == SQLITE_OK);
throw DataCorpusError(oss.str());
}
status = sqlite3_finalize(statement);
assert(status == SQLITE_OK);
status = sqlite3_close(db);
assert(status == SQLITE_OK);
if (blocks.empty()) {
throw DataCorpusError("Database contains no blocks.");
}
return blocks;
}

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DATACORPUS_H
#define DATACORPUS_H
#include <vector>
#include <string>
class DataBlock {
public:
DataBlock(unsigned int in_id, unsigned int in_stream,
unsigned int int_stream_index_in, std::string in_data)
: id(in_id), stream_id(in_stream),
internal_stream_index(int_stream_index_in),
payload(std::move(in_data)) {}
unsigned int id; // unique block identifier
unsigned int stream_id; // unique stream identifier (from corpus file)
unsigned int internal_stream_index; /* dense index for this stream
* (allocated by hsbench) */
std::string payload; // actual block payload
};
/** Exception thrown if an error occurs. */
class DataCorpusError {
public:
explicit DataCorpusError(std::string msg_in) : msg(std::move(msg_in)) {}
std::string msg;
};
/**
* Interface to a corpus database. Any error will produce a DataCorpusError
* and should be considered fatal.
*/
std::vector<DataBlock> readCorpus(const std::string &filename);
#endif // DATACORPUS_H

View File

@ -0,0 +1,411 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "ExpressionParser.h"
#include "common.h"
#include "engine_hyperscan.h"
#include "expressions.h"
#include "heapstats.h"
#include "huge.h"
#include "timer.h"
#include "crc32.h"
#include "database.h"
#include "hs_compile.h"
#include "hs_internal.h"
#include "hs_runtime.h"
#include "util/database_util.h"
#include "util/make_unique.h"
#include <cassert>
#include <cstring>
#include <iomanip>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
using namespace std;
EngineContext::EngineContext(const hs_database_t *db) {
hs_alloc_scratch(db, &scratch);
assert(scratch);
}
EngineContext::~EngineContext() {
hs_free_scratch(scratch);
}
namespace /* anonymous */ {
/** Scan context structure passed to the onMatch callback function. */
struct ScanContext {
ScanContext(unsigned id_in, ResultEntry &result_in,
const EngineStream *stream_in)
: id(id_in), result(result_in), stream(stream_in) {}
unsigned id;
ResultEntry &result;
const EngineStream *stream; // nullptr except in streaming mode.
};
} // namespace
/**
* Callback function called for every match that Hyperscan produces, used when
* "echo matches" is off.
*/
static
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
void *ctx) {
ScanContext *sc = static_cast<ScanContext *>(ctx);
assert(sc);
sc->result.matches++;
return 0;
}
/**
* Callback function called for every match that Hyperscan produces when "echo
* matches" is enabled.
*/
static
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
unsigned int, void *ctx) {
ScanContext *sc = static_cast<ScanContext *>(ctx);
assert(sc);
sc->result.matches++;
if (sc->stream) {
printf("Match @%u:%u:%llu for %u\n", sc->stream->sn, sc->id, to, id);
} else {
printf("Match @%u:%llu for %u\n", sc->id, to, id);
}
return 0;
}
EngineHyperscan::EngineHyperscan(hs_database_t *db_in) : db(db_in) {
assert(db);
}
EngineHyperscan::~EngineHyperscan() {
release_huge(db);
}
unique_ptr<EngineContext> EngineHyperscan::makeContext() const {
return ue2::make_unique<EngineContext>(db);
}
void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ctx) const {
assert(data);
ScanContext sc(id, result, nullptr);
auto callback = echo_matches ? onMatchEcho : onMatch;
hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc);
if (rv != HS_SUCCESS) {
printf("Fatal error: hs_scan returned error %d\n", rv);
abort();
}
}
void EngineHyperscan::scan_vectored(const char *const *data,
const unsigned int *len, unsigned int count,
unsigned streamId, ResultEntry &result,
EngineContext &ctx) const {
assert(data);
assert(len);
ScanContext sc(streamId, result, nullptr);
auto callback = echo_matches ? onMatchEcho : onMatch;
hs_error_t rv =
hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc);
if (rv != HS_SUCCESS) {
printf("Fatal error: hs_scan_vector returned error %d\n", rv);
abort();
}
}
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
unsigned streamId) const {
auto stream = ue2::make_unique<EngineStream>();
stream->ctx = &ctx;
hs_open_stream(db, 0, &stream->id);
if (!stream->id) {
// an error occurred, propagate to caller
return nullptr;
}
stream->sn = streamId;
return stream;
}
void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream,
ResultEntry &result) const {
assert(stream);
auto &s = static_cast<EngineStream &>(*stream);
EngineContext &ctx = *s.ctx;
ScanContext sc(0, result, &s);
auto callback = echo_matches ? onMatchEcho : onMatch;
assert(s.id);
hs_close_stream(s.id, ctx.scratch, callback, &sc);
s.id = nullptr;
}
void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
unsigned len, unsigned id,
ResultEntry &result) const {
assert(data);
auto &s = static_cast<EngineStream &>(stream);
EngineContext &ctx = *s.ctx;
ScanContext sc(id, result, &s);
auto callback = echo_matches ? onMatchEcho : onMatch;
hs_error_t rv =
hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc);
if (rv != HS_SUCCESS) {
printf("Fatal error: hs_scan_stream returned error %d\n", rv);
abort();
}
}
static
unsigned makeModeFlags(ScanMode scan_mode) {
switch (scan_mode) {
case ScanMode::BLOCK:
return HS_MODE_BLOCK;
case ScanMode::STREAMING:
return HS_MODE_STREAM;
case ScanMode::VECTORED:
return HS_MODE_VECTORED;
}
assert(0);
return HS_MODE_STREAM;
}
/**
* Hash the settings used to compile a database, returning a string that can be
* used as a filename.
*/
static
string dbSettingsHash(const string &filename, u32 mode) {
ostringstream info_oss;
info_oss << filename.c_str() << ' ';
info_oss << mode << ' ';
string info = info_oss.str();
u32 crc = Crc32c_ComputeBuf(0, info.data(), info.size());
// return STL string with printable version of digest
ostringstream oss;
oss << hex << setw(8) << setfill('0') << crc << dec;
return oss.str();
}
static
string dbFilename(const std::string &name, unsigned mode) {
ostringstream oss;
oss << serializePath << '/' << dbSettingsHash(name, mode) << ".db";
return oss.str();
}
std::unique_ptr<EngineHyperscan>
buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
const std::string &name, UNUSED const ue2::Grey &grey) {
if (expressions.empty()) {
assert(0);
return nullptr;
}
long double compileSecs = 0.0;
size_t compiledSize = 0.0;
size_t streamSize = 0;
size_t scratchSize = 0;
unsigned int peakMemorySize = 0;
unsigned int crc = 0;
std::string db_info;
unsigned int mode = makeModeFlags(scan_mode);
hs_database_t *db;
hs_error_t err;
if (loadDatabases) {
db = loadDatabase(dbFilename(name, mode).c_str());
if (!db) {
return nullptr;
}
} else {
const unsigned int count = expressions.size();
vector<string> exprs;
vector<unsigned int> flags, ids;
vector<hs_expr_ext> ext;
for (const auto &m : expressions) {
string expr;
unsigned int f = 0;
hs_expr_ext extparam;
extparam.flags = 0;
if (!readExpression(m.second, expr, &f, &extparam)) {
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
m.first);
return nullptr;
}
exprs.push_back(expr);
ids.push_back(m.first);
flags.push_back(f);
ext.push_back(extparam);
}
unsigned full_mode = mode;
if (mode == HS_MODE_STREAM) {
full_mode |= somPrecisionMode;
}
// Our compiler takes an array of plain ol' C strings.
vector<const char *> patterns(count);
for (unsigned int i = 0; i < count; i++) {
patterns[i] = exprs[i].c_str();
}
// Extended parameters are passed as pointers to hs_expr_ext structures.
vector<const hs_expr_ext *> ext_ptr(count);
for (unsigned int i = 0; i < count; i++) {
ext_ptr[i] = &ext[i];
}
Timer timer;
timer.start();
hs_compile_error_t *compile_err;
#ifndef RELEASE_BUILD
err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
ext_ptr.data(), count, full_mode, nullptr,
&db, &compile_err, grey);
#else
err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(),
ext_ptr.data(), count, full_mode, nullptr,
&db, &compile_err);
#endif
timer.complete();
compileSecs = timer.seconds();
peakMemorySize = getPeakHeap();
if (err == HS_COMPILER_ERROR) {
if (compile_err->expression >= 0) {
printf("Compile error for signature #%u: %s\n",
compile_err->expression, compile_err->message);
} else {
printf("Compile error: %s\n", compile_err->message);
}
hs_free_compile_error(compile_err);
return nullptr;
}
}
// copy the db into huge pages (where available) to reduce TLB pressure
db = get_huge(db);
if (!db) {
return nullptr;
}
err = hs_database_size(db, &compiledSize);
if (err != HS_SUCCESS) {
return nullptr;
}
assert(compiledSize > 0);
crc = db->crc32;
if (saveDatabases) {
saveDatabase(db, dbFilename(name, mode).c_str());
}
if (mode & HS_MODE_STREAM) {
err = hs_stream_size(db, &streamSize);
if (err != HS_SUCCESS) {
return nullptr;
}
} else {
streamSize = 0;
}
char *info;
err = hs_database_info(db, &info);
if (err != HS_SUCCESS) {
return nullptr;
} else {
db_info = string(info);
free(info);
}
// Allocate scratch temporarily to find its size: this is a good test
// anyway.
hs_scratch_t *scratch = nullptr;
err = hs_alloc_scratch(db, &scratch);
if (err != HS_SUCCESS) {
return nullptr;
}
err = hs_scratch_size(scratch, &scratchSize);
if (err != HS_SUCCESS) {
return nullptr;
}
hs_free_scratch(scratch);
// Output summary information.
printf("Signatures: %s\n", name.c_str());
printf("Hyperscan info: %s\n", db_info.c_str());
printf("Expression count: %'zu\n", expressions.size());
printf("Bytecode size: %'zu bytes\n", compiledSize);
printf("Database CRC: 0x%x\n", crc);
if (mode & HS_MODE_STREAM) {
printf("Stream state size: %'zu bytes\n", streamSize);
}
printf("Scratch size: %'zu bytes\n", scratchSize);
printf("Compile time: %'0.3Lf seconds\n", compileSecs);
printf("Peak heap usage: %'u bytes\n", peakMemorySize);
return ue2::make_unique<EngineHyperscan>(db);
}

View File

@ -0,0 +1,97 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ENGINEHYPERSCAN_H
#define ENGINEHYPERSCAN_H
#include "expressions.h"
#include "common.h"
#include "hs_runtime.h"
#include <memory>
/** Structure for the result of a single complete scan. */
struct ResultEntry {
double seconds = 0; //!< Time taken for scan.
unsigned int matches = 0; //!< Count of matches found.
};
/** Engine context which is allocated on a per-thread basis. */
class EngineContext {
public:
explicit EngineContext(const hs_database_t *db);
~EngineContext();
hs_scratch_t *scratch = nullptr;
};
/** Streaming mode scans have persistent stream state associated with them. */
class EngineStream {
public:
hs_stream_t *id;
unsigned int sn;
EngineContext *ctx;
};
/** Hyperscan Engine for scanning data. */
class EngineHyperscan {
public:
explicit EngineHyperscan(hs_database_t *db);
~EngineHyperscan();
std::unique_ptr<EngineContext> makeContext() const;
void scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ctx) const;
void scan_vectored(const char *const *data, const unsigned int *len,
unsigned int count, unsigned int streamId,
ResultEntry &result, EngineContext &ctx) const;
std::unique_ptr<EngineStream> streamOpen(EngineContext &ctx,
unsigned id) const;
void streamClose(std::unique_ptr<EngineStream> stream,
ResultEntry &result) const;
void streamScan(EngineStream &stream, const char *data, unsigned int len,
unsigned int id, ResultEntry &result) const;
private:
hs_database_t *db;
};
namespace ue2 {
struct Grey;
}
std::unique_ptr<EngineHyperscan>
buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
const std::string &name, const ue2::Grey &grey);
#endif // ENGINEHYPERSCAN_H

146
tools/hsbench/heapstats.cpp Normal file
View File

@ -0,0 +1,146 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Peak heap usage code.
*
* At present, we only have an implementation for modern glibc systems, using
* the malloc_info() call. We return zero elsewhere.
*/
#include "config.h"
#include "heapstats.h"
#if defined HAVE_MALLOC_INFO
#include <cerrno>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <malloc.h>
size_t getPeakHeap(void) {
FILE *tmpf = tmpfile();
if (!tmpf) {
return 0;
}
int rv = malloc_info(0, tmpf);
if (rv != 0) {
fclose(tmpf);
return 0;
}
rewind(tmpf);
// We don't want to depend on a real XML parser. This is ugly and brittle
// and hopefully good enough for the time being. We look for the last
// system tag with type max, which should be the malloc-wide one.
static const char begin[] = "<system type=\"max\" size=\"";
const size_t begin_len = strlen(begin);
char *line = nullptr;
size_t len = 0, maxheap = 0;
ssize_t read;
while ((read = getline(&line, &len, tmpf)) != -1) {
if (strncmp(line, begin, begin_len) == 0) {
errno = 0;
maxheap = (size_t)strtoull(line + begin_len, nullptr, 10);
if (errno != 0) {
goto finish;
}
}
}
finish:
free(line);
fclose(tmpf);
return maxheap;
}
#elif defined __linux
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <string>
#include <sys/types.h>
#include <unistd.h>
using namespace std;
size_t getPeakHeap(void) {
// Modern Linux kernels write a 'VmPeak' value into /proc/$PID/status. This
// is a reasonable approximation, though it likely includes shared libs and
// the like as well...
ostringstream path;
path << "/proc/" << getpid() << "/status";
ifstream f(path.str().c_str());
if (!f.good()) {
return 0;
}
const string vmpeak("VmPeak:");
string line;
while (getline(f, line)) {
istringstream iss(line, istringstream::in);
string word;
iss >> word;
if (word != vmpeak) {
continue;
}
// Skip spaces
while (iss.good() && !isdigit(iss.peek())) {
iss.ignore();
}
size_t num = 0;
iss >> num;
return num * 1024;
}
f.close();
return 0;
}
#else
// Stub.
size_t getPeakHeap(void) {
return 0;
}
#endif

36
tools/hsbench/heapstats.h Normal file
View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HEAPSTATS_H
#define HEAPSTATS_H
#include <cstddef> // for size_t
size_t getPeakHeap(void);
#endif

201
tools/hsbench/huge.cpp Normal file
View File

@ -0,0 +1,201 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "hs.h"
#include "ue2common.h"
#include "common.h"
#include "huge.h"
#ifndef _WIN32
#include <cstdio>
#include <cstring>
#include <errno.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#if defined(HAVE_SHMGET)
#include <sys/ipc.h>
#include <sys/shm.h>
#endif
UNUSED static int hsdb_shmid;
using namespace std;
long gethugepagesize(void);
hs_database_t *get_huge(hs_database_t *db) {
#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB)
/* move the database to huge pages where possible, but fail politely */
hs_error_t err;
size_t len;
char *bytes;
long hpage_size = gethugepagesize();
if (hpage_size < 0) {
printf("Couldn't determine huge page size\n");
hsdb_shmid = -1;
return db;
}
err = hs_serialize_database(db, &bytes, &len);
if (err != HS_SUCCESS) {
printf("Failed to serialize database for copy: %d\n", err);
// this is weird - don't fail gracefully this time
return nullptr;
}
size_t size;
err = hs_serialized_database_size(bytes, len, &size);
if (err != HS_SUCCESS) {
printf("Failed to get database size: %d\n", err);
// this is weird - don't fail gracefully this time
return nullptr;
}
void *shmaddr;
if ((hsdb_shmid = shmget(IPC_PRIVATE, ROUNDUP_N(size, gethugepagesize()),
SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
// This could fail if the user doesn't have permission to shmget(),
// which is OK.
goto fini;
}
shmaddr = shmat(hsdb_shmid, nullptr, SHM_RND);
if (shmaddr == (char *)-1) {
perror("Shared memory attach failure");
goto fini;
}
// Mark this segment to be destroyed after this process detaches.
shmctl(hsdb_shmid, IPC_RMID, nullptr);
err = hs_deserialize_database_at(bytes, len, (hs_database_t *)shmaddr);
if (err != HS_SUCCESS) {
printf("Failed to deserialize database into shm: %d\n", err);
shmdt((const void *)shmaddr);
goto fini;
}
free(bytes);
hs_free_database(db);
return (hs_database_t *)shmaddr;
fini:
free(bytes);
hsdb_shmid = -1;
return db;
#else
return db;
#endif
}
void release_huge(hs_database_t *db) {
#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB)
if (hsdb_shmid != -1) {
if (shmdt((const void *)db) != 0) {
perror("Detach failure");
}
} else {
// fallback
hs_free_database(db);
}
#else
hs_free_database(db);
#endif
}
#define BUF_SIZE 4096
static long read_meminfo(const char *tag) {
int fd;
char buf[BUF_SIZE];
int len;
char *p, *q;
long val;
fd = open("/proc/meminfo", O_RDONLY);
if (fd < 0) {
perror("Couldn't open /proc/meminfo");
return -1;
}
len = read(fd, buf, sizeof(buf));
close(fd);
if (len < 0) {
perror("Error reading /proc/meminfo");
return -1;
}
if (len == sizeof(buf)) {
printf("/proc/meminfo is too large\n");
return -1;
}
buf[len] = '\0';
p = strstr(buf, tag);
if (!p) {
return -1;
}
p += strlen(tag);
val = strtol(p, &q, 0);
if (!isspace(*q)) {
printf("Couldn't parse /proc/meminfo value\n");
return -1;
}
return val;
}
long gethugepagesize(void) {
long hpage_size;
int hpage_kb;
hpage_kb = read_meminfo("Hugepagesize:");
if (hpage_kb < 0) {
hpage_size = -1;
} else {
/* convert from kb to bytes */
hpage_size = 1024 * hpage_kb;
}
return hpage_size;
}
#else
/* No huge page support on WIN32. */
hs_database_t *get_huge(hs_database_t *db) { return db; }
void release_huge(hs_database_t *db) { hs_free_database(db); }
#endif

37
tools/hsbench/huge.h Normal file
View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HUGE_H
#define HUGE_H
#include "hs.h"
hs_database_t *get_huge(hs_database_t *db);
void release_huge(hs_database_t *db);
#endif /* HUGE_H */

780
tools/hsbench/main.cpp Normal file
View File

@ -0,0 +1,780 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "common.h"
#include "data_corpus.h"
#include "engine_hyperscan.h"
#include "expressions.h"
#include "thread_barrier.h"
#include "timer.h"
#include "util/expression_path.h"
#include "util/string_util.h"
#include "grey.h"
#include "hs.h"
#include "ue2common.h"
#include "util/make_unique.h"
#include <algorithm>
#include <clocale>
#include <cmath>
#include <cstdlib>
#include <fstream>
#include <map>
#include <numeric>
#include <sstream>
#include <set>
#include <thread>
#include <getopt.h>
#ifndef _WIN32
#include <pthread.h>
#include <unistd.h>
#endif
#include <boost/core/noncopyable.hpp>
#include <boost/range/adaptor/map.hpp>
using namespace std;
using namespace ue2;
using boost::adaptors::map_keys;
// Globals common to all files.
bool echo_matches = false;
bool saveDatabases = false;
bool loadDatabases = false;
string serializePath("");
unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
namespace /* anonymous */ {
// Globals local to this file.
bool display_per_scan = false;
ScanMode scan_mode = ScanMode::STREAMING;
unsigned repeats = 20;
string exprPath("");
string corpusFile("");
vector<unsigned int> threadCores;
Timer totalTimer;
double totalSecs = 0;
typedef void (*thread_func_t)(void *context);
class ThreadContext : boost::noncopyable {
public:
ThreadContext(unsigned num_in, const EngineHyperscan &db_in,
thread_barrier &tb_in, thread_func_t function_in,
vector<DataBlock> corpus_data_in)
: num(num_in), results(repeats), engine(db_in),
enginectx(db_in.makeContext()), corpus_data(move(corpus_data_in)),
tb(tb_in), function(function_in) {}
// Start the thread.
bool start(int cpu) {
thr = thread(function, this);
// affine if it's asked for
if (cpu >= 0) {
return affine(cpu);
}
return true;
}
// Wait for the thread to exit.
void join() {
thr.join();
}
// Serialise all threads on a global barrier.
void barrier() {
tb.wait();
}
// Apply processor affinity (if available) to this thread.
bool affine(UNUSED int cpu) {
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
assert(cpu >= 0 && cpu < CPU_SETSIZE);
// The 'clang' compiler complains about an unused result here, so we
// silence it.
(void)CPU_SET(cpu, &cpuset);
int rv = pthread_setaffinity_np(thr.native_handle(), sizeof(cpuset),
&cpuset);
return (rv == 0);
#endif
return false; // not available
}
unsigned num;
Timer timer;
vector<ResultEntry> results;
const EngineHyperscan &engine;
unique_ptr<EngineContext> enginectx;
vector<DataBlock> corpus_data;
protected:
thread_barrier &tb; // shared barrier for time sync
thread_func_t function;
thread thr;
};
/** Display usage information, with an optional error. */
static
void usage(const char *error) {
printf("Usage: hsbench [OPTIONS...]\n\n");
printf("Options:\n\n");
printf(" -h Display help and exit.\n");
printf(" -G OVERRIDES Overrides for the grey box.\n");
printf(" -e PATH Path to expression directory.\n");
printf(" -s FILE Signature file to use.\n");
printf(" -z NUM Signature ID to use.\n");
printf(" -c FILE File to use as corpus.\n");
printf(" -n NUMBER Repeat scan NUMBER times (default 20).\n");
printf(" -N Benchmark in block mode"
" (default: streaming).\n");
printf(" -V Benchmark in vectored mode"
" (default: streaming).\n");
printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n");
printf(" -i DIR Don't compile, load from files in DIR"
" instead.\n");
printf(" -w DIR After compiling, save to files in DIR.\n");
printf(" -d NUMBER Set SOM precision mode (default: 8 (large)).\n");
printf("\n");
printf(" --per-scan Display per-scan Mbit/sec results.\n");
printf(" --echo-matches Display all matches that occur during scan.\n");
printf("\n\n");
if (error) {
printf("Error: %s\n", error);
}
}
/** Wraps up a name and the set of signature IDs it refers to. */
struct BenchmarkSigs {
BenchmarkSigs(string name_in, SignatureSet sigs_in)
: name(move(name_in)), sigs(move(sigs_in)) {}
string name;
SignatureSet sigs;
};
/** Process command-line arguments. Prints usage and exits on error. */
static
void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
UNUSED Grey &grey) {
const char options[] = "-b:c:Cd:e:G:hi:n:No:p:sT:Vw:z:";
int in_sigfile = 0;
int do_per_scan = 0;
int do_echo_matches = 0;
vector<string> sigFiles;
static struct option longopts[] = {
{"per-scan", 0, &do_per_scan, 1},
{"echo-matches", 0, &do_echo_matches, 1},
{nullptr, 0, nullptr, 0}
};
for (;;) {
int c = getopt_long(argc, argv, options, longopts, nullptr);
if (c < 0) {
break;
}
switch (c) {
case 'c':
corpusFile.assign(optarg);
break;
case 'd': {
unsigned dist;
if (!fromString(optarg, dist)) {
usage("Must provide an integer argument to '-d' flag");
exit(1);
}
switch (dist) {
case 2:
somPrecisionMode = HS_MODE_SOM_HORIZON_SMALL;
break;
case 4:
somPrecisionMode = HS_MODE_SOM_HORIZON_MEDIUM;
break;
case 8:
somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
break;
default:
usage("SOM precision must be 2, 4 or 8");
exit(1);
}
break;
}
case 'e':
exprPath.assign(optarg);
break;
#ifndef RELEASE_BUILD
case 'G':
applyGreyOverrides(&grey, string(optarg));
break;
#endif
case 'h':
usage(nullptr);
exit(0);
break;
case 'n':
if (!fromString(optarg, repeats) || repeats == 0) {
usage("Couldn't parse argument to -n flag, should be"
" a positive integer.");
exit(1);
}
break;
case 's':
in_sigfile = 2;
break;
case 'N':
scan_mode = ScanMode::BLOCK;
break;
case 'V':
scan_mode = ScanMode::VECTORED;
break;
case 'T':
if (!strToList(optarg, threadCores)) {
usage("Couldn't parse argument to -T flag, should be"
" a list of positive integers.");
exit(1);
}
break;
case 'z': {
unsigned int sinumber;
if (!fromString(optarg, sinumber)) {
usage("Argument to '-z' flag must be an integer");
exit(1);
}
SignatureSet sigs = {sinumber};
sigSets.emplace_back(string("-z ") + optarg, sigs);
break;
}
case 'i':
loadDatabases = true;
serializePath = optarg;
break;
case 'w':
saveDatabases = true;
serializePath = optarg;
break;
case 1:
if (in_sigfile) {
sigFiles.push_back(optarg);
in_sigfile = 2;
break;
}
case 0:
break;
default:
usage("Unrecognised command line argument.");
exit(1);
}
if (in_sigfile) {
in_sigfile--;
}
}
if (do_echo_matches) {
echo_matches = true;
}
if (do_per_scan) {
display_per_scan = true;
}
if (exprPath.empty() && !sigFiles.empty()) {
/* attempt to infer an expression directory */
auto si = sigFiles.begin();
exprPath = inferExpressionPath(*si);
for (++si; si != sigFiles.end(); ++si) {
if (exprPath != inferExpressionPath(*si)) {
usage("Unable to infer consistent expression directory");
exit(1);
}
}
}
// Must have a valid expression path
if (exprPath.empty()) {
usage("Must specify an expression path with the -e option.");
exit(1);
}
// Must have valid database to scan
if (corpusFile.empty()) {
usage("Must specify a corpus file with the -c option.");
exit(1);
}
// Cannot ask for both loading and saving
if (loadDatabases && saveDatabases) {
usage("You cannot both load and save databases.");
exit(1);
}
// Read in any -s signature sets.
for (const auto &file : sigFiles) {
SignatureSet sigs;
loadSignatureList(file, sigs);
sigSets.emplace_back(file, move(sigs));
}
}
/** Start the global timer. */
static
void startTotalTimer(ThreadContext *ctx) {
if (ctx->num != 0) {
return; // only runs in the first thread
}
totalTimer.start();
}
/** Stop the global timer and calculate totals. */
static
void stopTotalTimer(ThreadContext *ctx) {
if (ctx->num != 0) {
return; // only runs in the first thread
}
totalTimer.complete();
totalSecs = totalTimer.seconds();
}
/** Run a benchmark over a given engine and corpus in block mode. */
static
void benchBlock(void *context) {
ThreadContext *ctx = (ThreadContext *)context;
// Synchronization point
ctx->barrier();
startTotalTimer(ctx);
for (ResultEntry &r : ctx->results) {
ctx->timer.start();
for (const DataBlock &block : ctx->corpus_data) {
ctx->engine.scan(block.payload.c_str(), block.payload.size(),
block.id, r, *ctx->enginectx);
}
ctx->timer.complete();
r.seconds = ctx->timer.seconds();
}
// Synchronization point
ctx->barrier();
// Now that all threads are finished, we can stop the clock.
stopTotalTimer(ctx);
}
/** Structure used to represent a stream. */
struct StreamInfo {
unsigned int stream_id = ~0U;
unsigned int first_block_id = ~0U;
unsigned int last_block_id = 0;
unique_ptr<EngineStream> eng_handle;
};
static
u64a count_streams(const vector<DataBlock> &corpus_blocks) {
set<unsigned int> streams;
for (const DataBlock &block : corpus_blocks) {
streams.insert(block.stream_id);
}
return (u64a)streams.size();
}
/**
* Take a ThreadContext and prepare a vector<StreamDataBlock> for streaming mode
* scanning from it.
*/
static
vector<StreamInfo> prepStreamingData(const ThreadContext *ctx) {
vector<StreamInfo> info(count_streams(ctx->corpus_data));
for (const DataBlock &block : ctx->corpus_data) {
assert(block.internal_stream_index < info.size());
StreamInfo &si = info[block.internal_stream_index];
/* check if this is the first time we have encountered this stream */
if (si.first_block_id > si.last_block_id) {
si.stream_id = block.stream_id;
si.first_block_id = block.id;
si.last_block_id = block.id;
} else {
assert(block.stream_id == si.stream_id);
assert(block.id > si.last_block_id);
assert(block.id > si.first_block_id);
si.last_block_id = block.id;
}
}
return info;
}
static
void benchStreamingInternal(ThreadContext *ctx, vector<StreamInfo> &streams) {
assert(ctx);
const EngineHyperscan &e = ctx->engine;
const vector<DataBlock> &blocks = ctx->corpus_data;
for (ResultEntry &r : ctx->results) {
ctx->timer.start();
for (const auto &b : blocks) {
StreamInfo &stream = streams[b.internal_stream_index];
assert(stream.stream_id == b.stream_id);
// If this is the first block in the stream, open the stream
// handle.
if (b.id == stream.first_block_id) {
assert(!stream.eng_handle);
stream.eng_handle = e.streamOpen(*ctx->enginectx, b.stream_id);
if (!stream.eng_handle) {
printf("Fatal error: stream open failed!\n");
exit(1);
}
}
assert(stream.eng_handle);
e.streamScan(*stream.eng_handle, b.payload.c_str(),
b.payload.size(), b.id, r);
// if this was the last block in the stream, close the stream handle
if (b.id == stream.last_block_id) {
e.streamClose(move(stream.eng_handle), r);
stream.eng_handle = nullptr;
}
}
ctx->timer.complete();
r.seconds = ctx->timer.seconds();
}
}
/** Run a benchmark over a given engine and corpus in streaming mode. */
static
void benchStreaming(void *context) {
ThreadContext *ctx = (ThreadContext *)context;
vector<StreamInfo> streams = prepStreamingData(ctx);
// Synchronization point
ctx->barrier();
startTotalTimer(ctx);
benchStreamingInternal(ctx, streams);
// Synchronization point
ctx->barrier();
// Now that all threads are finished, we can stop the clock.
stopTotalTimer(ctx);
}
/** In-memory structure for a data block to be scanned in vectored mode. */
struct VectoredInfo {
vector<const char *> data;
vector<unsigned int> len;
unsigned int stream_id;
};
/**
* Take a ThreadContext and prepare a vector<VectoredInfo> for vectored mode
* scanning from it.
*/
static
vector<VectoredInfo> prepVectorData(const ThreadContext *ctx) {
vector<VectoredInfo> out(count_streams(ctx->corpus_data));
for (const DataBlock &block : ctx->corpus_data) {
VectoredInfo &vi = out[block.internal_stream_index];
if (vi.data.empty()) {
vi.stream_id = block.stream_id;
} else {
assert(vi.stream_id == block.stream_id);
}
vi.data.push_back(block.payload.c_str());
vi.len.push_back(block.payload.size());
}
return out;
}
/** Run a benchmark over a given engine and corpus in vectored mode. */
static
void benchVectored(void *context) {
ThreadContext *ctx = (ThreadContext *)context;
vector<VectoredInfo> v_plans = prepVectorData(ctx);
// Synchronization point
ctx->barrier();
startTotalTimer(ctx);
for (ResultEntry &r : ctx->results) {
ctx->timer.start();
for (const VectoredInfo &v_plan : v_plans) {
ctx->engine.scan_vectored(&v_plan.data[0], &v_plan.len[0],
v_plan.data.size(), v_plan.stream_id, r,
*ctx->enginectx);
}
ctx->timer.complete();
r.seconds = ctx->timer.seconds();
}
// Synchronization point
ctx->barrier();
// Now that all threads are finished, we can stop the clock.
stopTotalTimer(ctx);
}
/** Given a time and a size, compute the throughput in megabits/sec. */
static
long double calc_mbps(double seconds, u64a bytes) {
assert(seconds > 0);
return (long double)bytes / ((long double)seconds * 125000);
}
/** Dump per-scan throughput data to screen. */
static
void displayPerScanResults(const vector<unique_ptr<ThreadContext>> &threads,
u64a bytesPerRun) {
for (const auto &t : threads) {
const auto &results = t->results;
for (size_t j = 0; j != results.size(); j++) {
const auto &r = results[j];
double mbps = calc_mbps(r.seconds, bytesPerRun);
printf("T %2u Scan %2zu: %'0.2f Mbit/sec\n", t->num, j, mbps);
}
}
printf("\n");
}
static
u64a byte_size(const vector<DataBlock> &corpus_blocks) {
u64a total = 0;
for (const DataBlock &block : corpus_blocks) {
total += block.payload.size();
}
return total;
}
/** Dump benchmark results to screen. */
static
void displayResults(const vector<unique_ptr<ThreadContext>> &threads,
const vector<DataBlock> &corpus_blocks) {
u64a bytesPerRun = byte_size(corpus_blocks);
u64a matchesPerRun = threads[0]->results[0].matches;
// Sanity check: all of our results should have the same match count.
for (const auto &t : threads) {
if (!all_of(begin(t->results), end(t->results),
[&matchesPerRun](const ResultEntry &e) {
return e.matches == matchesPerRun;
})) {
printf("\nWARNING: PER-SCAN MATCH COUNTS ARE INCONSISTENT!\n\n");
break;
}
}
printf("Time spent scanning: %'0.3f seconds\n", totalSecs);
printf("Corpus size: %'llu bytes ", bytesPerRun);
switch (scan_mode) {
case ScanMode::STREAMING:
printf("(%'zu blocks in %'llu streams)\n", corpus_blocks.size(),
count_streams(corpus_blocks));
break;
case ScanMode::VECTORED:
printf("(%'zu blocks in %'llu vectors)\n", corpus_blocks.size(),
count_streams(corpus_blocks));
break;
case ScanMode::BLOCK:
printf("(%'zu blocks)\n", corpus_blocks.size());
break;
}
u64a totalBytes = bytesPerRun * repeats * threads.size();
u64a totalBlocks = corpus_blocks.size() * repeats * threads.size();
double matchRate = ((double)matchesPerRun * 1024) / bytesPerRun;
printf("Matches per iteration: %'llu (%'0.3f matches/kilobyte)\n",
matchesPerRun, matchRate);
double blockRate = (double)totalBlocks / (double)totalSecs;
printf("Overall block rate: %'0.2f blocks/sec\n", blockRate);
printf("Overall throughput: %'0.2Lf Mbit/sec\n",
calc_mbps(totalSecs, totalBytes));
printf("\n");
if (display_per_scan) {
displayPerScanResults(threads, bytesPerRun);
}
}
/**
* Construct a thread context for this scanning mode.
*
* Note: does not take blocks by reference. This is to give every thread their
* own copy of the data. It would be unrealistic for every thread to be scanning
* the same copy of the data.
*/
static
unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
const vector<DataBlock> &blocks,
unsigned id,
thread_barrier &sync_barrier) {
thread_func_t fn = nullptr;
switch (scan_mode) {
case ScanMode::STREAMING:
fn = benchStreaming;
break;
case ScanMode::VECTORED:
fn = benchVectored;
break;
case ScanMode::BLOCK:
fn = benchBlock;
break;
}
assert(fn);
return ue2::make_unique<ThreadContext>(id, db, sync_barrier, fn, blocks);
}
/** Run the given benchmark. */
static
void runBenchmark(const EngineHyperscan &db,
const vector<DataBlock> &corpus_blocks) {
size_t numThreads;
bool useAffinity = false;
if (threadCores.empty()) {
numThreads = 1;
} else {
numThreads = threadCores.size();
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
useAffinity = true;
#else
useAffinity = false;
#endif
}
// Initialise a barrier that will let us sync threads before/after scanning
// for timer measurements.
thread_barrier sync_barrier(numThreads);
vector<unique_ptr<ThreadContext>> threads;
for (unsigned i = 0; i < numThreads; i++) {
auto t = makeThreadContext(db, corpus_blocks, i, sync_barrier);
int core = useAffinity ? (int)threadCores[i] : -1;
if (!t->start(core)) {
printf("Unable to start processing thread %u\n", i);
exit(1);
}
threads.push_back(move(t));
}
// Reap threads.
for (auto &t : threads) {
t->join();
}
// Display global results.
displayResults(threads, corpus_blocks);
}
} // namespace
/** Main driver. */
int main(int argc, char *argv[]) {
Grey grey;
setlocale(LC_ALL, ""); // use the user's locale
#ifndef NDEBUG
printf("\nWARNING: DO NOT BENCHMARK A HYPERSCAN BUILD WITH ASSERTIONS\n\n");
#endif
vector<BenchmarkSigs> sigSets;
processArgs(argc, argv, sigSets, grey);
// read in and process our expressions
ExpressionMap exprMapTemplate;
loadExpressions(exprPath, exprMapTemplate);
// If we have no signature sets, the user wants us to benchmark all the
// known expressions together.
if (sigSets.empty()) {
SignatureSet sigs;
for (auto i : exprMapTemplate | map_keys) {
sigs.push_back(i);
}
sigSets.emplace_back(exprPath, move(sigs));
}
// read in and process our corpus
vector<DataBlock> corpus_blocks;
try {
corpus_blocks = readCorpus(corpusFile);
} catch (const DataCorpusError &e) {
printf("Corpus data error: %s\n", e.msg.c_str());
return 1;
}
for (const auto &s : sigSets) {
ExpressionMap exprMap = exprMapTemplate; // copy
limitBySignature(exprMap, s.sigs);
if (exprMap.empty()) {
continue;
}
auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, grey);
if (!engine) {
printf("Error: expressions failed to compile.\n");
exit(1);
}
printf("\n");
runBenchmark(*engine, corpus_blocks);
}
return 0;
}

View File

@ -0,0 +1,58 @@
#!/usr/bin/python
'''
A module to construct corpora databases for the Hyperscan benchmarker
(hsbench).
After construction, simply add blocks with the add_chunk() method, then call
finish() when you're done.
'''
import os.path
try:
from sqlite3 import dbapi2 as sqlite
except:
from pysqlite2 import dbapi2 as sqlite
class CorpusBuilder:
SCHEMA = '''
CREATE TABLE chunk (
id integer primary key,
stream_id integer not null,
data blob
);
'''
def __init__(self, outfile):
if os.path.exists(outfile):
raise RuntimeError("Database '%s' already exists" % outfile)
self.outfile = outfile
self.db = sqlite.connect(self.outfile)
self.db.executescript(CorpusBuilder.SCHEMA)
self.current_chunk_id = 0;
def add_chunk(self, stream_id, data):
chunk_id = self.current_chunk_id;
c = self.db.cursor()
q = 'insert into chunk (id, stream_id, data) values (?, ?, ?)'
c.execute(q, (chunk_id, stream_id, sqlite.Binary(data)))
self.current_chunk_id += 1
return chunk_id
def finish(self):
self.db.commit()
c = self.db.cursor()
q = 'create index chunk_stream_id_idx on chunk(stream_id)'
c.execute(q)
c = self.db.cursor()
q = 'vacuum'
c.execute(q)
c = self.db.cursor()
q = 'analyze'
c.execute(q)
self.db.commit()

View File

@ -0,0 +1,68 @@
#!/usr/bin/python
'''
This script creates a Hyperscan benchmarking corpus database from a supplied
group of Project Gutenberg texts.
'''
import sys, getopt, os.path
import gutenberg.acquire, gutenberg.cleanup, gutenberg.query
from CorpusBuilder import CorpusBuilder
stream_id = 0
stream_bytes = 0
def addBlocks(builder, block_size, stream_size, text_id, text):
global stream_id
global stream_bytes
print "text", text_id, "len", len(text)
i = 0
while i < len(text):
chunk = text[i:min(len(text), i + block_size)]
builder.add_chunk(stream_id, chunk)
i += block_size
stream_bytes += len(chunk)
if stream_bytes >= stream_size:
stream_id += 1
stream_bytes = 0
print "Text", text_id, ": added", i/block_size, "blocks of", block_size, "bytes."
def buildCorpus(outFN, block_size, stream_size, text_ids):
if len(text_ids) == 0:
print >>sys.stderr, "Must provide at least one input ID"
sys.exit(0)
builder = CorpusBuilder(outFN)
total_bytes = 0
stream_id = 0
stream_bytes = 0
for text_id in text_ids:
text_id = int(text_id)
text = gutenberg.acquire.load_etext(text_id)
text = gutenberg.cleanup.strip_headers(text).strip()
addBlocks(builder, block_size, stream_size, text_id, text)
total_bytes += len(text)
builder.finish()
print "Total:", total_bytes, "bytes."
def usage(exeName):
errmsg = "Usage: %s -o <output file> -b <block size> -s <max stream size> <gutenberg text id>..."
errmsg = errmsg % exeName
print >> sys.stderr, errmsg
sys.exit(-1)
if __name__ == '__main__':
opts, args = getopt.getopt(sys.argv[1:], 'o:b:s:')
opts = dict(opts)
requiredKeys = [ '-o', '-b', '-s' ]
for k in requiredKeys:
if not opts.has_key(k):
usage(os.path.basename(sys.argv[0]))
buildCorpus(opts['-o'], int(opts['-b']), int(opts['-s']), args)

View File

@ -0,0 +1,53 @@
#!/usr/bin/python
'''
Simple script to take a file full of lines of text and push them into a
Hyperscan benchmarking corpus database, one block per line.
'''
import sys, getopt, os.path
from CorpusBuilder import CorpusBuilder
def lineCorpus(inFN, outFN):
'''
Read lines from file name @inFN and write them as blocks to a new db with
name @outFN.
'''
if not os.path.exists(inFN):
print >> sys.stderr, "Input file '%s' does not exist. Exiting." % outFN
sys.exit(-1)
lines = open(inFN).readlines()
if len(lines) == 0:
print >> sys.stderr, "Input file contained no lines. Exiting."
sys.exit(0)
builder = CorpusBuilder(outFN)
# write a single stream to contain everything
streamId = 0
for l in lines:
builder.add_chunk(streamId, l.rstrip())
builder.finish()
def usage(exeName):
errmsg = "Usage: %s -i <input file> -o <output file>"
errmsg = errmsg % exeName
print >> sys.stderr, errmsg
sys.exit(-1)
if __name__ == '__main__':
args = getopt.getopt(sys.argv[1:], 'i:o:c:')
args = dict(args[0])
requiredKeys = [ '-i', '-o' ]
for k in requiredKeys:
if not args.has_key(k):
usage(os.path.basename(sys.argv[0]))
fnArgs = tuple([args[k] for k in requiredKeys])
lineCorpus(*fnArgs)

View File

@ -0,0 +1,301 @@
#!/usr/bin/env python
'''
Script to convert a pcap file containing UDP and TCP packets to a corpus file.
'''
import sys, getopt, pprint, os
from sqlite3 import dbapi2 as sqlite
import pcap
from optparse import OptionParser
from socket import AF_INET, IPPROTO_UDP, IPPROTO_TCP, inet_ntop, ntohs, ntohl, inet_ntoa
import struct
from CorpusBuilder import CorpusBuilder
ETHERTYPE_IP = 0x0800 # IP protocol
ETHERTYPE_ARP = 0x0806 # Addr. resolution protocol
ETHERTYPE_REVARP = 0x8035 # reverse Addr. resolution protocol
ETHERTYPE_VLAN = 0x8100 # IEEE 802.1Q VLAN tagging
ETHERTYPE_IPV6 = 0x86dd # IPv6
#
# A dictionary of active TCP streams
#
tcp_streams = {}
#
# A dictionary of UDP streams
#
udp_streams = {}
#
# Current stream id
cur_stream_id = 0
def usage(exeName) :
errmsg = "Usage: %s -i <pcap-file> -o <sqlite-file>"
errmsg = errmsg % exeName
print >> sys.stderr, errmsg
sys.exit(-1)
class FiveTuple(object):
def __init__(self, protocol, src_addr, src_port, dst_addr, dst_port):
self.protocol = protocol
self.src_addr = src_addr
self.src_port = src_port
self.dst_addr = dst_addr
self.dst_port = dst_port
def __str__(self):
return "%d,%s,%d,%s,%d" % (self.protocol, self.src_addr, self.src_port, self.dst_addr, self.dst_port)
class UdpSegment:
"""Definition of a UDP segment
"""
def __init__(self, five_tuple, header, payload):
self.five_tuple = five_tuple
self.udp_header = header
self.udp_payload = payload
class TcpSegment:
"""Definition of a TCP segment
"""
def __init__(self, five_tuple, header, payload):
self.five_tuple = five_tuple
self.tcp_header = header
self.tcp_payload = payload
self.tcp_sequence_number, self.tcp_acknowledgement_number = struct.unpack('!LL', header[4:12])
def opt_isset_FIN(self):
opts = ord(self.tcp_header[13]) & 0x3F
return (opts & 0x01)
def opt_isset_SYN(self):
opts = ord(self.tcp_header[13]) & 0x3F
return (opts & 0x02)
def get_sequence_number(self):
return self.tcp_sequence_number
def __cmp__(self, other):
return cmp(self.tcp_sequence_number, other.tcp_sequence_number)
class TcpStream:
"""Definition of a TCP stream.
"""
TCP_STREAM_ACTIVE = 0x1
TCP_STREAM_CLOSED = 0x02
def __init__(self, five_tuple):
self.five_tuple = five_tuple
self.initial_sequence_number = 0
self.segments = []
def reset_stream(self):
self.segments = []
self.initial_sequence_number = 0
def set_initial_sequence_number(self, sequence_number):
self.initial_sequence_number = sequence_number
def append_segment(self, tcp_segment):
if len(self.segments) == 0:
self.set_initial_sequence_number(tcp_segment.get_sequence_number())
self.segments.append(tcp_segment)
def get_segments_sorted(self):
return sorted(self.segments)
class UdpStream:
"""A container for UDP packets that share the same 5-tuple
"""
def __init__(self, five_tuple):
self.five_tuple = five_tuple
self.segments = []
def append_segment(self, udp_segment):
self.segments.append(udp_segment)
def newStream(five_tuple):
'''
Create a new stream using the arguments passed-in and return its ID.
'''
global cur_stream_id
stream_id = cur_stream_id
cur_stream_id += 1
return stream_id
def process_tcp_segment(builder, segment):
"""Process a tcp segment. It checks for SYN and FIN segments are
if set modifies the associated stream.
"""
segment_id = str(segment.five_tuple)
if segment_id in tcp_streams:
m_tcp_stream = tcp_streams[segment_id]
m_tcp_stream.append_segment(segment)
else:
m_tcp_stream = TcpStream(segment.five_tuple)
m_tcp_stream.append_segment(segment)
tcp_streams[segment_id] = m_tcp_stream
if segment.opt_isset_SYN():
m_tcp_stream.segments = []
if segment.opt_isset_FIN():
#
# Finished with the stream - add the segments in the
# stream to db allowing the stream to be reused.
#
db_add_tcp_stream_segments(builder, m_tcp_stream)
del tcp_streams[segment_id]
def process_udp_segment(builder, segment):
""" Process a UDP segment. Given the connectionless nature of the UDP
protocol we simple accumulate the segment for later processing
when all the packets have been read
"""
segment_id = str(segment.five_tuple)
if segment_id in udp_streams:
m_udp_stream = udp_streams[segment_id]
m_udp_stream.append_segment(segment)
else:
m_udp_stream = UdpStream(segment.five_tuple)
m_udp_stream.append_segment(segment)
udp_streams[segment_id] = m_udp_stream
def db_add_tcp_stream_segments(builder, tcp_stream):
"""Add the contents of a tcp stream to the database
"""
tcp_segments = tcp_stream.get_segments_sorted()
last_sequence_num = 0
streamID = None
for tcp_segment in tcp_segments:
if (len(tcp_segment.tcp_payload) > 0) and (tcp_segment.tcp_sequence_number > last_sequence_num):
#
# Segment with an actual payload - add it to the stream's
# list of chunks.
#
# Note: delay creating the stream until we have a via chunk to
# commit to it
#
if streamID == None:
streamID = newStream(tcp_stream.five_tuple)
builder.add_chunk(streamID, tcp_segment.tcp_payload)
last_sequence_num = tcp_segment.tcp_sequence_number
def db_add_udp_stream_segments(builder, udp_stream):
"""Add the contents of a UDP stream to the database. Since UDP is
connection-less, a UDP stream object is really just an accumulation
of all the packets associated with a given 5-tuple.
"""
udp_segments = udp_stream.segments
streamID = None
for udp_segment in udp_segments:
if len(udp_segment.udp_payload) > 0:
if streamID == None:
streamID = newStream(udp_stream.five_tuple)
builder.add_chunk(streamID, udp_segment.udp_payload)
def enchunk_pcap(pcapFN, sqliteFN):
"""Read the contents of a pcap file with name @pcapFN and produce
a sqlite db with name @sqliteFN. It will contain chunks of data
from TCP and UDP streams,
"""
if not os.path.exists(pcapFN):
print >> sys.stderr, "Input file '%s' does not exist. Exiting." % pcapFN
sys.exit(-1)
builder = CorpusBuilder(sqliteFN)
#
# Read in the contents of the pcap file, adding stream segments as found
#
pkt_cnt = 0;
ip_pkt_cnt = 0;
unsupported_ip_protocol_cnt = 0
pcap_ref = pcap.pcap(pcapFN)
done = False
while not done:
try:
ts, packet = pcap_ref.next()
except:
break
pkt_cnt += 1
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0]
if linkLayerType != ETHERTYPE_IP:
#
# We're only interested in IP packets
#
continue
ip_pkt_cnt += 1
ip_pkt_total_len = struct.unpack('!H', packet[pcap_ref.dloff + 2: pcap_ref.dloff + 4])[0]
ip_pkt = packet[pcap_ref.dloff:pcap_ref.dloff + ip_pkt_total_len]
pkt_protocol = struct.unpack('B', ip_pkt[9])[0]
if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP):
#
# we're only interested in UDP and TCP packets at the moment
#
continue
pkt_src_addr = inet_ntoa(ip_pkt[12:16])
pkt_dst_addr = inet_ntoa(ip_pkt[16:20])
ip_hdr_len_offset = (ord(ip_pkt[0]) & 0x0f) * 4
ip_payload = ip_pkt[ip_hdr_len_offset:len(ip_pkt)]
pkt_src_port, pkt_dst_port = struct.unpack('!HH', ip_payload[0:4])
five_tuple = FiveTuple(pkt_protocol, pkt_src_addr, pkt_src_port, pkt_dst_addr, pkt_dst_port)
five_tuple_id = str(five_tuple)
if pkt_protocol == IPPROTO_UDP:
udp_payload_len = struct.unpack('!H', ip_payload[4:6])[0] - 8
udp_header = ip_payload[0:8]
udp_payload = ip_payload[8:len(ip_payload)]
udp_segment = UdpSegment(five_tuple, udp_header, udp_payload)
process_udp_segment(builder, udp_segment)
elif pkt_protocol == IPPROTO_TCP:
tcp_hdr_len = (ord(ip_payload[12]) >> 4) * 4
tcp_header = ip_payload[0:tcp_hdr_len]
tcp_payload = ip_payload[tcp_hdr_len:len(ip_payload)]
segment = TcpSegment(five_tuple, tcp_header, tcp_payload)
process_tcp_segment(builder, segment)
#
# Having read the contents of the pcap, we fill the database with any
# remaining TCP and UDP segments
#
for tcp_stream in tcp_streams.itervalues():
db_add_tcp_stream_segments(builder, tcp_stream)
for udp_stream in udp_streams.itervalues():
db_add_udp_stream_segments(builder, udp_stream)
#
# We've finished with the database
#
builder.finish()
if __name__ == '__main__' :
args = getopt.getopt(sys.argv[1:], 'i:o:')
args = dict(args[0])
requiredKeys = [ '-i', '-o']
for k in requiredKeys :
if not args.has_key(k) :
usage(os.path.basename(sys.argv[0]))
fnArgs = tuple([ args[k] for k in requiredKeys ])
enchunk_pcap(*fnArgs)

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* \brief Simple thread barrier.
*/
#ifndef TOOLS_THREAD_BARRIER_H
#define TOOLS_THREAD_BARRIER_H
#include <condition_variable>
#include <mutex>
/**
* \brief Simple thread barrier class.
*
* Blocks until wait() has been called N times.
*/
class thread_barrier {
public:
explicit thread_barrier(unsigned int n) : max(n) {
if (max == 0) {
throw std::runtime_error("invalid barrier");
}
}
void wait() {
std::unique_lock<std::mutex> lock(mtx);
count++;
if (count >= max) {
count = 0;
condvar.notify_all();
} else {
condvar.wait(lock);
}
}
private:
std::mutex mtx;
std::condition_variable condvar;
unsigned int count = 0;
unsigned int max;
};
#endif // TOOLS_THREAD_BARRIER_H

59
tools/hsbench/timer.h Normal file
View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TIMER_H
#define TIMER_H
#include "ue2common.h"
#include <chrono>
class Timer {
public:
Timer() = default;
void start() {
clock_start = Clock::now();
}
void complete() {
clock_end = Clock::now();
}
double seconds() const {
std::chrono::duration<double> secs = clock_end - clock_start;
return secs.count();
}
protected:
using Clock = std::chrono::steady_clock;
std::chrono::time_point<Clock> clock_start;
std::chrono::time_point<Clock> clock_end;
};
#endif // TIMER_H

View File

@ -1,7 +1,10 @@
# utility libs
CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
${PROJECT_SOURCE_DIR})
set_source_files_properties(
${CMAKE_BINARY_DIR}/tools/ExpressionParser.cpp
@ -31,3 +34,14 @@ SET(corpusomatic_SRCS
)
add_library(corpusomatic STATIC ${corpusomatic_SRCS})
set(databaseutil_SRCS
database_util.cpp
database_util.h
)
add_library(databaseutil STATIC ${databaseutil_SRCS})
set(crosscompileutil_SRCS
cross_compile.cpp
cross_compile.h
)
add_library(crosscompileutil STATIC ${crosscompileutil_SRCS})

115
util/cross_compile.cpp Normal file
View File

@ -0,0 +1,115 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "cross_compile.h"
#include "src/ue2common.h"
#include "src/hs_compile.h"
#include "src/util/make_unique.h"
#include <sstream>
#include <string>
using namespace std;
struct XcompileMode {
const char *name;
unsigned long long cpu_features;
};
static const XcompileMode xcompile_options[] = {
{ "avx2", HS_CPU_FEATURES_AVX2 },
{ "base", 0 },
};
unique_ptr<hs_platform_info> xcompileReadMode(const char *s) {
hs_platform_info rv;
UNUSED hs_error_t err;
err = hs_populate_platform(&rv);
assert(!err);
string str(s);
string mode = str.substr(0, str.find(":"));
string opt = str.substr(str.find(":")+1, str.npos);
bool found_mode = false;
if (!opt.empty()) {
const size_t numOpts = ARRAY_LENGTH(xcompile_options);
for (size_t i = 0; i < numOpts; i++) {
if (opt.compare(xcompile_options[i].name) == 0) {
DEBUG_PRINTF("found opt %zu:%llu\n", i,
xcompile_options[i].cpu_features);
rv.cpu_features = xcompile_options[i].cpu_features;
found_mode = true;
break;
}
}
}
if (!found_mode) {
return nullptr;
} else {
DEBUG_PRINTF("cpu_features %llx\n", rv.cpu_features);
return ue2::make_unique<hs_platform_info>(rv);
}
}
string to_string(const hs_platform_info &p) {
ostringstream out;
if (p.tune) {
out << p.tune;
}
if (p.cpu_features) {
u64a features = p.cpu_features;
if (features & HS_CPU_FEATURES_AVX2) {
out << " avx2";
features &= ~HS_CPU_FEATURES_AVX2;
}
if (features) {
out << " " << "?cpu_features?:" << features;
}
}
return out.str();
}
string xcompileUsage(void) {
string variants = "Instruction set options: ";
const size_t numOpts = ARRAY_LENGTH(xcompile_options);
for (size_t i = 0; i < numOpts; i++) {
variants += xcompile_options[i].name;
if (i + 1 != numOpts) {
variants += ", ";
}
}
return variants;
}

42
util/cross_compile.h Normal file
View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CROSS_COMPILE_H
#define CROSS_COMPILE_H
#include <memory>
#include <string>
struct hs_platform_info;
std::unique_ptr<hs_platform_info> xcompileReadMode(const char *s);
std::string xcompileUsage(void);
std::string to_string(const hs_platform_info &p);
#endif /* CROSS_COMPILE_H */

155
util/database_util.cpp Normal file
View File

@ -0,0 +1,155 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "database_util.h"
#include "hs_common.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#if defined(HAVE_MMAP)
#include <sys/mman.h> // for mmap
#include <unistd.h> // for close
#include <sys/fcntl.h>
#include <sys/stat.h>
#endif
using namespace std;
bool saveDatabase(const hs_database_t *db, const char *filename, bool verbose) {
assert(db);
assert(filename);
if (verbose) {
cout << "Saving database to: " << filename << endl;
}
char *bytes = nullptr;
size_t length = 0;
hs_error_t err = hs_serialize_database(db, &bytes, &length);
if (err != HS_SUCCESS) {
return false;
}
assert(bytes);
assert(length > 0);
ofstream out(filename, ios::binary);
out.write(bytes, length);
out.close();
::free(bytes);
return true;
}
hs_database_t * loadDatabase(const char *filename, bool verbose) {
assert(filename);
if (verbose) {
cout << "Loading database from: " << filename << endl;
}
char *bytes = nullptr;
#if defined(HAVE_MMAP)
// Use mmap to read the file
int fd = open(filename, O_RDONLY);
if (fd < 0) {
return nullptr;
}
struct stat st;
if (fstat(fd, &st) < 0) {
close(fd);
return nullptr;
}
size_t len = st.st_size;
bytes = (char *)mmap(nullptr, len, PROT_READ, MAP_SHARED, fd, 0);
if (bytes == MAP_FAILED) {
cout << "mmap failed" << endl;
close(fd);
return nullptr;
}
#else
// Fall back on stream IO
ifstream is;
is.open(filename, ios::in | ios::binary);
if (!is.is_open()) {
return nullptr;
}
is.seekg(0, ios::end);
size_t len = is.tellg();
if (verbose) {
cout << "Reading " << len << " bytes" << endl;
}
is.seekg(0, ios::beg);
bytes = new char[len];
is.read(bytes, len);
is.close();
#endif
assert(bytes);
if (verbose) {
char *info = nullptr;
hs_error_t err = hs_serialized_database_info(bytes, len, &info);
if (err) {
cout << "Unable to decode serialized database info: " << err
<< endl;
} else if (info) {
cout << "Serialized database info: " << info << endl;
std::free(info);
} else {
cout << "Unable to decode serialized database info." << endl;
}
}
hs_database_t *db = nullptr;
hs_error_t err = hs_deserialize_database(bytes, len, &db);
#if defined(HAVE_MMAP)
munmap(bytes, len);
close(fd);
#else
delete [] bytes;
#endif
if (err != HS_SUCCESS) {
cout << "hs_deserialize_database call failed: " << err << endl;
return nullptr;
}
assert(db);
return db;
}

39
util/database_util.h Normal file
View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DATABASE_UTIL_H
#define DATABASE_UTIL_H
struct hs_database;
bool saveDatabase(const hs_database *db, const char *filename,
bool verbose = false);
hs_database *loadDatabase(const char *filename, bool verbose = false);
#endif /* DATABASE_UTIL_H */

107
util/expression_path.h Normal file
View File

@ -0,0 +1,107 @@
/*
* Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef EXPRESSION_PATH_H
#define EXPRESSION_PATH_H
#include "ue2common.h"
#include <cerrno>
#include <cstring>
#include <iostream>
#include <string>
#include <vector>
#include <sys/stat.h>
#if !defined(_WIN32)
#include <unistd.h>
#include <libgen.h>
#endif
//
// Utility functions
//
/**
* Given a path to a signature file, infer the path of the pcre directory.
*/
static inline
std::string inferExpressionPath(const std::string &sigFile) {
#ifndef _WIN32
// POSIX variant.
// dirname() may modify its argument, so we must make a copy.
std::vector<char> path(sigFile.size() + 1);
memcpy(path.data(), sigFile.c_str(), sigFile.size());
path[sigFile.size()] = 0; // ensure null termination.
std::string rv = dirname(path.data());
#else
// Windows variant.
if (sigFile.size() >= _MAX_DIR) {
return std::string();
}
char path[_MAX_DIR];
_splitpath(sigFile.c_str(), nullptr, path, nullptr, nullptr);
std::string rv(path);
#endif
rv += "/../pcre";
return rv;
}
#if defined(_WIN32)
#define stat _stat
#define S_IFREG _S_IFREG
#endif
static inline
bool isDir(const std::string &filename) {
struct stat s;
if (stat(filename.c_str(), &s) == -1) {
std::cerr << "stat: " << strerror(errno) << std::endl;
return false;
}
return (S_IFDIR & s.st_mode);
}
static inline
bool isFile(const std::string &filename) {
struct stat s;
if (stat(filename.c_str(), &s) == -1) {
std::cerr << "stat: " << strerror(errno) << std::endl;
return false;
}
return (S_IFREG & s.st_mode);
}
#endif /* EXPRESSION_PATH_H */