mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
hsbench: add Hyperscan benchmarker
The hsbench tool provides an easy way to measure Hyperscan's performance for a particular set of patterns and corpus of data to be scanned.
This commit is contained in:
parent
06cde4c94d
commit
f626276271
53
cmake/sqlite3.cmake
Normal file
53
cmake/sqlite3.cmake
Normal file
@ -0,0 +1,53 @@
|
||||
#
|
||||
# a lot of noise to find sqlite
|
||||
#
|
||||
|
||||
option(SQLITE_PREFER_STATIC "Build sqlite3 statically instead of using an installed lib" OFF)
|
||||
|
||||
if(NOT WIN32 AND NOT SQLITE_PREFER_STATIC)
|
||||
find_package(PkgConfig QUIET)
|
||||
|
||||
# first check for sqlite on the system
|
||||
pkg_check_modules(SQLITE3 sqlite3)
|
||||
endif()
|
||||
|
||||
if (NOT SQLITE3_FOUND)
|
||||
message(STATUS "looking for sqlite3 in source tree")
|
||||
# look in the source tree
|
||||
if (EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.h" AND
|
||||
EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
|
||||
message(STATUS " found sqlite3 in source tree")
|
||||
set(SQLITE3_FOUND TRUE)
|
||||
set(SQLITE3_BUILD_SOURCE TRUE)
|
||||
set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3")
|
||||
set(SQLITE3_LDFLAGS sqlite3_static)
|
||||
else()
|
||||
message(FATAL_ERROR " no sqlite3 in source tree")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# now do version checks
|
||||
if (SQLITE3_FOUND)
|
||||
list(INSERT CMAKE_REQUIRED_INCLUDES 0 "${SQLITE3_INCLUDE_DIRS}")
|
||||
CHECK_C_SOURCE_COMPILES("#include <sqlite3.h>\n#if SQLITE_VERSION_NUMBER >= 3008007 && SQLITE_VERSION_NUMBER < 3008010\n#error broken sqlite\n#endif\nint main() {return 0;}" SQLITE_VERSION_OK)
|
||||
if (NOT SQLITE_VERSION_OK)
|
||||
message(FATAL_ERROR "sqlite3 is broken from 3.8.7 to 3.8.10 - please find a working version")
|
||||
endif()
|
||||
if (NOT SQLITE3_BUILD_SOURCE)
|
||||
set(_SAVED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
list(INSERT CMAKE_REQUIRED_LIBRARIES 0 ${SQLITE3_LDFLAGS})
|
||||
CHECK_SYMBOL_EXISTS(sqlite3_open_v2 sqlite3.h HAVE_SQLITE3_OPEN_V2)
|
||||
list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES "${SQLITE3_INCLUDE_DIRS}")
|
||||
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${SQLITE3_LDFLAGS})
|
||||
else()
|
||||
if (NOT TARGET sqlite3_static)
|
||||
# build sqlite as a static lib to compile into our test programs
|
||||
add_library(sqlite3_static STATIC "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
|
||||
if (NOT WIN32)
|
||||
set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# that's enough about sqlite
|
19
tools/CMakeLists.txt
Normal file
19
tools/CMakeLists.txt
Normal file
@ -0,0 +1,19 @@
|
||||
find_package(Threads)
|
||||
|
||||
# remove some warnings
|
||||
if(CMAKE_CXX_FLAGS MATCHES "-Wmissing-declarations" )
|
||||
string(REPLACE "-Wmissing-declarations" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR})
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/util)
|
||||
|
||||
# add any subdir with a cmake file
|
||||
file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
|
||||
foreach(e ${dirents})
|
||||
if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${e} AND
|
||||
EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${e}/CMakeLists.txt)
|
||||
add_subdirectory(${e})
|
||||
endif ()
|
||||
endforeach ()
|
36
tools/hsbench/CMakeLists.txt
Normal file
36
tools/hsbench/CMakeLists.txt
Normal file
@ -0,0 +1,36 @@
|
||||
include (${CMAKE_MODULE_PATH}/sqlite3.cmake)
|
||||
|
||||
if (NOT XCODE)
|
||||
include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS})
|
||||
else()
|
||||
# cmake doesn't think Xcode supports isystem
|
||||
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${SQLITE3_INCLUDE_DIRS}")
|
||||
endif()
|
||||
|
||||
CHECK_FUNCTION_EXISTS(malloc_info HAVE_MALLOC_INFO)
|
||||
CHECK_FUNCTION_EXISTS(shmget HAVE_SHMGET)
|
||||
set(HAVE_SHMGET ${HAVE_SHMGET} CACHE BOOL "shmget()")
|
||||
|
||||
# only set these after all tests are done
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
|
||||
|
||||
SET(hsbench_SOURCES
|
||||
common.h
|
||||
data_corpus.cpp
|
||||
data_corpus.h
|
||||
engine_hyperscan.cpp
|
||||
engine_hyperscan.h
|
||||
heapstats.cpp
|
||||
heapstats.h
|
||||
huge.cpp
|
||||
huge.h
|
||||
main.cpp
|
||||
thread_barrier.h
|
||||
timer.h
|
||||
)
|
||||
|
||||
add_executable(hsbench ${hsbench_SOURCES})
|
||||
target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
8
tools/hsbench/README.md
Normal file
8
tools/hsbench/README.md
Normal file
@ -0,0 +1,8 @@
|
||||
Hyperscan Benchmarker: hsbench
|
||||
==============================
|
||||
|
||||
The `hsbench` tool provides an easy way to measure Hyperscan's performance
|
||||
for a particular set of patterns and corpus of data to be scanned.
|
||||
|
||||
Documentation describing its operation is available in the Tools section of the
|
||||
[Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/).
|
42
tools/hsbench/common.h
Normal file
42
tools/hsbench/common.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#include <string>
|
||||
|
||||
enum class ScanMode { BLOCK, STREAMING, VECTORED };
|
||||
|
||||
extern bool echo_matches;
|
||||
extern bool saveDatabases;
|
||||
extern bool loadDatabases;
|
||||
extern std::string serializePath;
|
||||
extern unsigned int somPrecisionMode;
|
||||
|
||||
#endif // COMMON_H
|
133
tools/hsbench/data_corpus.cpp
Normal file
133
tools/hsbench/data_corpus.cpp
Normal file
@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "data_corpus.h"
|
||||
|
||||
#include "util/container.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <sqlite3.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace ue2;
|
||||
|
||||
static
|
||||
void readRow(sqlite3_stmt *statement, vector<DataBlock> &blocks,
|
||||
map<unsigned int, unsigned int> &stream_indices) {
|
||||
unsigned int id = sqlite3_column_int(statement, 0);
|
||||
unsigned int stream_id = sqlite3_column_int(statement, 1);
|
||||
const char *blob = (const char *)sqlite3_column_blob(statement, 2);
|
||||
unsigned int bytes = sqlite3_column_bytes(statement, 2);
|
||||
|
||||
if (!contains(stream_indices, stream_id)) {
|
||||
unsigned int internal_stream_index = stream_indices.size();
|
||||
stream_indices[stream_id] = internal_stream_index;
|
||||
}
|
||||
auto internal_stream_index = stream_indices[stream_id];
|
||||
|
||||
assert(blob || bytes > 0);
|
||||
blocks.emplace_back(id, stream_id, internal_stream_index,
|
||||
string(blob, blob + bytes));
|
||||
}
|
||||
|
||||
vector<DataBlock> readCorpus(const string &filename) {
|
||||
int status;
|
||||
sqlite3 *db = nullptr;
|
||||
|
||||
status = sqlite3_open_v2(filename.c_str(), &db, SQLITE_OPEN_READONLY,
|
||||
nullptr);
|
||||
|
||||
assert(db);
|
||||
if (status != SQLITE_OK) {
|
||||
ostringstream err;
|
||||
err << "Unable to open database '" << filename << "': "
|
||||
<< sqlite3_errmsg(db);
|
||||
status = sqlite3_close(db);
|
||||
assert(status == SQLITE_OK);
|
||||
throw DataCorpusError(err.str());
|
||||
}
|
||||
|
||||
static const string query("SELECT id, stream_id, data "
|
||||
"FROM chunk ORDER BY id;");
|
||||
|
||||
sqlite3_stmt *statement = nullptr;
|
||||
|
||||
status = sqlite3_prepare_v2(db, query.c_str(), query.size(), &statement,
|
||||
nullptr);
|
||||
if (status != SQLITE_OK) {
|
||||
status = sqlite3_finalize(statement);
|
||||
assert(status == SQLITE_OK);
|
||||
status = sqlite3_close(db);
|
||||
assert(status == SQLITE_OK);
|
||||
|
||||
ostringstream oss;
|
||||
oss << "Query failed: " << query;
|
||||
throw DataCorpusError(oss.str());
|
||||
}
|
||||
|
||||
vector<DataBlock> blocks;
|
||||
map<unsigned int, unsigned int> stream_indices;
|
||||
|
||||
status = sqlite3_step(statement);
|
||||
while (status == SQLITE_ROW) {
|
||||
readRow(statement, blocks, stream_indices);
|
||||
status = sqlite3_step(statement);
|
||||
}
|
||||
|
||||
if (status != SQLITE_DONE) {
|
||||
ostringstream oss;
|
||||
oss << "Error retrieving blocks from corpus: "
|
||||
<< sqlite3_errstr(status);
|
||||
|
||||
status = sqlite3_finalize(statement);
|
||||
assert(status == SQLITE_OK);
|
||||
status = sqlite3_close(db);
|
||||
assert(status == SQLITE_OK);
|
||||
|
||||
throw DataCorpusError(oss.str());
|
||||
}
|
||||
|
||||
status = sqlite3_finalize(statement);
|
||||
assert(status == SQLITE_OK);
|
||||
status = sqlite3_close(db);
|
||||
assert(status == SQLITE_OK);
|
||||
|
||||
if (blocks.empty()) {
|
||||
throw DataCorpusError("Database contains no blocks.");
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
63
tools/hsbench/data_corpus.h
Normal file
63
tools/hsbench/data_corpus.h
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DATACORPUS_H
|
||||
#define DATACORPUS_H
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
class DataBlock {
|
||||
public:
|
||||
DataBlock(unsigned int in_id, unsigned int in_stream,
|
||||
unsigned int int_stream_index_in, std::string in_data)
|
||||
: id(in_id), stream_id(in_stream),
|
||||
internal_stream_index(int_stream_index_in),
|
||||
payload(std::move(in_data)) {}
|
||||
|
||||
unsigned int id; // unique block identifier
|
||||
unsigned int stream_id; // unique stream identifier (from corpus file)
|
||||
unsigned int internal_stream_index; /* dense index for this stream
|
||||
* (allocated by hsbench) */
|
||||
std::string payload; // actual block payload
|
||||
};
|
||||
|
||||
/** Exception thrown if an error occurs. */
|
||||
class DataCorpusError {
|
||||
public:
|
||||
explicit DataCorpusError(std::string msg_in) : msg(std::move(msg_in)) {}
|
||||
std::string msg;
|
||||
};
|
||||
|
||||
/**
|
||||
* Interface to a corpus database. Any error will produce a DataCorpusError
|
||||
* and should be considered fatal.
|
||||
*/
|
||||
std::vector<DataBlock> readCorpus(const std::string &filename);
|
||||
|
||||
#endif // DATACORPUS_H
|
411
tools/hsbench/engine_hyperscan.cpp
Normal file
411
tools/hsbench/engine_hyperscan.cpp
Normal file
@ -0,0 +1,411 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "ExpressionParser.h"
|
||||
#include "common.h"
|
||||
#include "engine_hyperscan.h"
|
||||
#include "expressions.h"
|
||||
#include "heapstats.h"
|
||||
#include "huge.h"
|
||||
#include "timer.h"
|
||||
|
||||
#include "crc32.h"
|
||||
#include "database.h"
|
||||
#include "hs_compile.h"
|
||||
#include "hs_internal.h"
|
||||
#include "hs_runtime.h"
|
||||
#include "util/database_util.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
EngineContext::EngineContext(const hs_database_t *db) {
|
||||
hs_alloc_scratch(db, &scratch);
|
||||
assert(scratch);
|
||||
}
|
||||
|
||||
EngineContext::~EngineContext() {
|
||||
hs_free_scratch(scratch);
|
||||
}
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanContext {
|
||||
ScanContext(unsigned id_in, ResultEntry &result_in,
|
||||
const EngineStream *stream_in)
|
||||
: id(id_in), result(result_in), stream(stream_in) {}
|
||||
unsigned id;
|
||||
ResultEntry &result;
|
||||
const EngineStream *stream; // nullptr except in streaming mode.
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Callback function called for every match that Hyperscan produces, used when
|
||||
* "echo matches" is off.
|
||||
*/
|
||||
static
|
||||
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||
void *ctx) {
|
||||
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function called for every match that Hyperscan produces when "echo
|
||||
* matches" is enabled.
|
||||
*/
|
||||
static
|
||||
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
unsigned int, void *ctx) {
|
||||
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
if (sc->stream) {
|
||||
printf("Match @%u:%u:%llu for %u\n", sc->stream->sn, sc->id, to, id);
|
||||
} else {
|
||||
printf("Match @%u:%llu for %u\n", sc->id, to, id);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
EngineHyperscan::EngineHyperscan(hs_database_t *db_in) : db(db_in) {
|
||||
assert(db);
|
||||
}
|
||||
|
||||
EngineHyperscan::~EngineHyperscan() {
|
||||
release_huge(db);
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EngineHyperscan::makeContext() const {
|
||||
return ue2::make_unique<EngineContext>(db);
|
||||
}
|
||||
|
||||
void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ctx) const {
|
||||
assert(data);
|
||||
|
||||
ScanContext sc(id, result, nullptr);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc);
|
||||
|
||||
if (rv != HS_SUCCESS) {
|
||||
printf("Fatal error: hs_scan returned error %d\n", rv);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void EngineHyperscan::scan_vectored(const char *const *data,
|
||||
const unsigned int *len, unsigned int count,
|
||||
unsigned streamId, ResultEntry &result,
|
||||
EngineContext &ctx) const {
|
||||
assert(data);
|
||||
assert(len);
|
||||
|
||||
ScanContext sc(streamId, result, nullptr);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv =
|
||||
hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc);
|
||||
|
||||
if (rv != HS_SUCCESS) {
|
||||
printf("Fatal error: hs_scan_vector returned error %d\n", rv);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
|
||||
unsigned streamId) const {
|
||||
auto stream = ue2::make_unique<EngineStream>();
|
||||
stream->ctx = &ctx;
|
||||
|
||||
hs_open_stream(db, 0, &stream->id);
|
||||
if (!stream->id) {
|
||||
// an error occurred, propagate to caller
|
||||
return nullptr;
|
||||
}
|
||||
stream->sn = streamId;
|
||||
return stream;
|
||||
}
|
||||
|
||||
void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const {
|
||||
assert(stream);
|
||||
|
||||
auto &s = static_cast<EngineStream &>(*stream);
|
||||
EngineContext &ctx = *s.ctx;
|
||||
|
||||
ScanContext sc(0, result, &s);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
|
||||
assert(s.id);
|
||||
hs_close_stream(s.id, ctx.scratch, callback, &sc);
|
||||
s.id = nullptr;
|
||||
}
|
||||
|
||||
void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
|
||||
unsigned len, unsigned id,
|
||||
ResultEntry &result) const {
|
||||
assert(data);
|
||||
|
||||
auto &s = static_cast<EngineStream &>(stream);
|
||||
EngineContext &ctx = *s.ctx;
|
||||
|
||||
ScanContext sc(id, result, &s);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv =
|
||||
hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc);
|
||||
|
||||
if (rv != HS_SUCCESS) {
|
||||
printf("Fatal error: hs_scan_stream returned error %d\n", rv);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
unsigned makeModeFlags(ScanMode scan_mode) {
|
||||
switch (scan_mode) {
|
||||
case ScanMode::BLOCK:
|
||||
return HS_MODE_BLOCK;
|
||||
case ScanMode::STREAMING:
|
||||
return HS_MODE_STREAM;
|
||||
case ScanMode::VECTORED:
|
||||
return HS_MODE_VECTORED;
|
||||
}
|
||||
assert(0);
|
||||
return HS_MODE_STREAM;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hash the settings used to compile a database, returning a string that can be
|
||||
* used as a filename.
|
||||
*/
|
||||
static
|
||||
string dbSettingsHash(const string &filename, u32 mode) {
|
||||
ostringstream info_oss;
|
||||
|
||||
info_oss << filename.c_str() << ' ';
|
||||
info_oss << mode << ' ';
|
||||
|
||||
string info = info_oss.str();
|
||||
|
||||
u32 crc = Crc32c_ComputeBuf(0, info.data(), info.size());
|
||||
|
||||
// return STL string with printable version of digest
|
||||
ostringstream oss;
|
||||
oss << hex << setw(8) << setfill('0') << crc << dec;
|
||||
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
static
|
||||
string dbFilename(const std::string &name, unsigned mode) {
|
||||
ostringstream oss;
|
||||
oss << serializePath << '/' << dbSettingsHash(name, mode) << ".db";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::unique_ptr<EngineHyperscan>
|
||||
buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
|
||||
const std::string &name, UNUSED const ue2::Grey &grey) {
|
||||
if (expressions.empty()) {
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
long double compileSecs = 0.0;
|
||||
size_t compiledSize = 0.0;
|
||||
size_t streamSize = 0;
|
||||
size_t scratchSize = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
unsigned int crc = 0;
|
||||
std::string db_info;
|
||||
|
||||
unsigned int mode = makeModeFlags(scan_mode);
|
||||
|
||||
hs_database_t *db;
|
||||
hs_error_t err;
|
||||
|
||||
if (loadDatabases) {
|
||||
db = loadDatabase(dbFilename(name, mode).c_str());
|
||||
if (!db) {
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
const unsigned int count = expressions.size();
|
||||
|
||||
vector<string> exprs;
|
||||
vector<unsigned int> flags, ids;
|
||||
vector<hs_expr_ext> ext;
|
||||
|
||||
for (const auto &m : expressions) {
|
||||
string expr;
|
||||
unsigned int f = 0;
|
||||
hs_expr_ext extparam;
|
||||
extparam.flags = 0;
|
||||
if (!readExpression(m.second, expr, &f, &extparam)) {
|
||||
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
|
||||
m.first);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
exprs.push_back(expr);
|
||||
ids.push_back(m.first);
|
||||
flags.push_back(f);
|
||||
ext.push_back(extparam);
|
||||
}
|
||||
|
||||
unsigned full_mode = mode;
|
||||
if (mode == HS_MODE_STREAM) {
|
||||
full_mode |= somPrecisionMode;
|
||||
}
|
||||
|
||||
// Our compiler takes an array of plain ol' C strings.
|
||||
vector<const char *> patterns(count);
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
patterns[i] = exprs[i].c_str();
|
||||
}
|
||||
|
||||
// Extended parameters are passed as pointers to hs_expr_ext structures.
|
||||
vector<const hs_expr_ext *> ext_ptr(count);
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
ext_ptr[i] = &ext[i];
|
||||
}
|
||||
|
||||
Timer timer;
|
||||
timer.start();
|
||||
|
||||
hs_compile_error_t *compile_err;
|
||||
|
||||
#ifndef RELEASE_BUILD
|
||||
err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
|
||||
ext_ptr.data(), count, full_mode, nullptr,
|
||||
&db, &compile_err, grey);
|
||||
#else
|
||||
err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(),
|
||||
ext_ptr.data(), count, full_mode, nullptr,
|
||||
&db, &compile_err);
|
||||
#endif
|
||||
|
||||
timer.complete();
|
||||
compileSecs = timer.seconds();
|
||||
peakMemorySize = getPeakHeap();
|
||||
|
||||
if (err == HS_COMPILER_ERROR) {
|
||||
if (compile_err->expression >= 0) {
|
||||
printf("Compile error for signature #%u: %s\n",
|
||||
compile_err->expression, compile_err->message);
|
||||
} else {
|
||||
printf("Compile error: %s\n", compile_err->message);
|
||||
}
|
||||
hs_free_compile_error(compile_err);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// copy the db into huge pages (where available) to reduce TLB pressure
|
||||
db = get_huge(db);
|
||||
if (!db) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
err = hs_database_size(db, &compiledSize);
|
||||
if (err != HS_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
assert(compiledSize > 0);
|
||||
|
||||
crc = db->crc32;
|
||||
|
||||
if (saveDatabases) {
|
||||
saveDatabase(db, dbFilename(name, mode).c_str());
|
||||
}
|
||||
|
||||
if (mode & HS_MODE_STREAM) {
|
||||
err = hs_stream_size(db, &streamSize);
|
||||
if (err != HS_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
streamSize = 0;
|
||||
}
|
||||
|
||||
char *info;
|
||||
err = hs_database_info(db, &info);
|
||||
if (err != HS_SUCCESS) {
|
||||
return nullptr;
|
||||
} else {
|
||||
db_info = string(info);
|
||||
free(info);
|
||||
}
|
||||
|
||||
// Allocate scratch temporarily to find its size: this is a good test
|
||||
// anyway.
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
err = hs_alloc_scratch(db, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
err = hs_scratch_size(scratch, &scratchSize);
|
||||
if (err != HS_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
hs_free_scratch(scratch);
|
||||
|
||||
// Output summary information.
|
||||
printf("Signatures: %s\n", name.c_str());
|
||||
printf("Hyperscan info: %s\n", db_info.c_str());
|
||||
printf("Expression count: %'zu\n", expressions.size());
|
||||
printf("Bytecode size: %'zu bytes\n", compiledSize);
|
||||
printf("Database CRC: 0x%x\n", crc);
|
||||
if (mode & HS_MODE_STREAM) {
|
||||
printf("Stream state size: %'zu bytes\n", streamSize);
|
||||
}
|
||||
printf("Scratch size: %'zu bytes\n", scratchSize);
|
||||
printf("Compile time: %'0.3Lf seconds\n", compileSecs);
|
||||
printf("Peak heap usage: %'u bytes\n", peakMemorySize);
|
||||
|
||||
return ue2::make_unique<EngineHyperscan>(db);
|
||||
}
|
97
tools/hsbench/engine_hyperscan.h
Normal file
97
tools/hsbench/engine_hyperscan.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINEHYPERSCAN_H
|
||||
#define ENGINEHYPERSCAN_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "common.h"
|
||||
#include "hs_runtime.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
/** Structure for the result of a single complete scan. */
|
||||
struct ResultEntry {
|
||||
double seconds = 0; //!< Time taken for scan.
|
||||
unsigned int matches = 0; //!< Count of matches found.
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EngineContext {
|
||||
public:
|
||||
explicit EngineContext(const hs_database_t *db);
|
||||
~EngineContext();
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
};
|
||||
|
||||
/** Streaming mode scans have persistent stream state associated with them. */
|
||||
class EngineStream {
|
||||
public:
|
||||
hs_stream_t *id;
|
||||
unsigned int sn;
|
||||
EngineContext *ctx;
|
||||
};
|
||||
|
||||
/** Hyperscan Engine for scanning data. */
|
||||
class EngineHyperscan {
|
||||
public:
|
||||
explicit EngineHyperscan(hs_database_t *db);
|
||||
~EngineHyperscan();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ctx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ctx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ctx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
|
||||
private:
|
||||
hs_database_t *db;
|
||||
};
|
||||
|
||||
namespace ue2 {
|
||||
struct Grey;
|
||||
}
|
||||
|
||||
std::unique_ptr<EngineHyperscan>
|
||||
buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
|
||||
const std::string &name, const ue2::Grey &grey);
|
||||
|
||||
#endif // ENGINEHYPERSCAN_H
|
146
tools/hsbench/heapstats.cpp
Normal file
146
tools/hsbench/heapstats.cpp
Normal file
@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Peak heap usage code.
|
||||
*
|
||||
* At present, we only have an implementation for modern glibc systems, using
|
||||
* the malloc_info() call. We return zero elsewhere.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "heapstats.h"
|
||||
|
||||
#if defined HAVE_MALLOC_INFO
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include <malloc.h>
|
||||
|
||||
size_t getPeakHeap(void) {
|
||||
FILE *tmpf = tmpfile();
|
||||
if (!tmpf) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rv = malloc_info(0, tmpf);
|
||||
if (rv != 0) {
|
||||
fclose(tmpf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
rewind(tmpf);
|
||||
|
||||
// We don't want to depend on a real XML parser. This is ugly and brittle
|
||||
// and hopefully good enough for the time being. We look for the last
|
||||
// system tag with type max, which should be the malloc-wide one.
|
||||
|
||||
static const char begin[] = "<system type=\"max\" size=\"";
|
||||
const size_t begin_len = strlen(begin);
|
||||
|
||||
char *line = nullptr;
|
||||
size_t len = 0, maxheap = 0;
|
||||
ssize_t read;
|
||||
|
||||
while ((read = getline(&line, &len, tmpf)) != -1) {
|
||||
if (strncmp(line, begin, begin_len) == 0) {
|
||||
errno = 0;
|
||||
maxheap = (size_t)strtoull(line + begin_len, nullptr, 10);
|
||||
if (errno != 0) {
|
||||
goto finish;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
finish:
|
||||
free(line);
|
||||
fclose(tmpf);
|
||||
return maxheap;
|
||||
}
|
||||
|
||||
#elif defined __linux
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
size_t getPeakHeap(void) {
|
||||
// Modern Linux kernels write a 'VmPeak' value into /proc/$PID/status. This
|
||||
// is a reasonable approximation, though it likely includes shared libs and
|
||||
// the like as well...
|
||||
ostringstream path;
|
||||
path << "/proc/" << getpid() << "/status";
|
||||
|
||||
ifstream f(path.str().c_str());
|
||||
if (!f.good()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const string vmpeak("VmPeak:");
|
||||
|
||||
string line;
|
||||
while (getline(f, line)) {
|
||||
istringstream iss(line, istringstream::in);
|
||||
string word;
|
||||
iss >> word;
|
||||
if (word != vmpeak) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip spaces
|
||||
while (iss.good() && !isdigit(iss.peek())) {
|
||||
iss.ignore();
|
||||
}
|
||||
|
||||
size_t num = 0;
|
||||
iss >> num;
|
||||
return num * 1024;
|
||||
}
|
||||
|
||||
f.close();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Stub.
|
||||
size_t getPeakHeap(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
36
tools/hsbench/heapstats.h
Normal file
36
tools/hsbench/heapstats.h
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HEAPSTATS_H
|
||||
#define HEAPSTATS_H
|
||||
|
||||
#include <cstddef> // for size_t
|
||||
|
||||
size_t getPeakHeap(void);
|
||||
|
||||
#endif
|
201
tools/hsbench/huge.cpp
Normal file
201
tools/hsbench/huge.cpp
Normal file
@ -0,0 +1,201 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "hs.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "huge.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#if defined(HAVE_SHMGET)
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#endif
|
||||
|
||||
UNUSED static int hsdb_shmid;
|
||||
|
||||
using namespace std;
|
||||
|
||||
long gethugepagesize(void);
|
||||
|
||||
hs_database_t *get_huge(hs_database_t *db) {
|
||||
#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB)
|
||||
/* move the database to huge pages where possible, but fail politely */
|
||||
hs_error_t err;
|
||||
size_t len;
|
||||
char *bytes;
|
||||
|
||||
long hpage_size = gethugepagesize();
|
||||
if (hpage_size < 0) {
|
||||
printf("Couldn't determine huge page size\n");
|
||||
hsdb_shmid = -1;
|
||||
return db;
|
||||
}
|
||||
|
||||
err = hs_serialize_database(db, &bytes, &len);
|
||||
if (err != HS_SUCCESS) {
|
||||
printf("Failed to serialize database for copy: %d\n", err);
|
||||
// this is weird - don't fail gracefully this time
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t size;
|
||||
err = hs_serialized_database_size(bytes, len, &size);
|
||||
if (err != HS_SUCCESS) {
|
||||
printf("Failed to get database size: %d\n", err);
|
||||
// this is weird - don't fail gracefully this time
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void *shmaddr;
|
||||
if ((hsdb_shmid = shmget(IPC_PRIVATE, ROUNDUP_N(size, gethugepagesize()),
|
||||
SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
|
||||
// This could fail if the user doesn't have permission to shmget(),
|
||||
// which is OK.
|
||||
goto fini;
|
||||
}
|
||||
|
||||
shmaddr = shmat(hsdb_shmid, nullptr, SHM_RND);
|
||||
if (shmaddr == (char *)-1) {
|
||||
perror("Shared memory attach failure");
|
||||
goto fini;
|
||||
}
|
||||
|
||||
// Mark this segment to be destroyed after this process detaches.
|
||||
shmctl(hsdb_shmid, IPC_RMID, nullptr);
|
||||
|
||||
err = hs_deserialize_database_at(bytes, len, (hs_database_t *)shmaddr);
|
||||
if (err != HS_SUCCESS) {
|
||||
printf("Failed to deserialize database into shm: %d\n", err);
|
||||
shmdt((const void *)shmaddr);
|
||||
goto fini;
|
||||
}
|
||||
|
||||
free(bytes);
|
||||
hs_free_database(db);
|
||||
return (hs_database_t *)shmaddr;
|
||||
|
||||
fini:
|
||||
free(bytes);
|
||||
hsdb_shmid = -1;
|
||||
return db;
|
||||
#else
|
||||
return db;
|
||||
#endif
|
||||
}
|
||||
|
||||
void release_huge(hs_database_t *db) {
|
||||
#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB)
|
||||
if (hsdb_shmid != -1) {
|
||||
if (shmdt((const void *)db) != 0) {
|
||||
perror("Detach failure");
|
||||
}
|
||||
} else {
|
||||
// fallback
|
||||
hs_free_database(db);
|
||||
}
|
||||
#else
|
||||
hs_free_database(db);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define BUF_SIZE 4096
|
||||
static long read_meminfo(const char *tag) {
|
||||
int fd;
|
||||
char buf[BUF_SIZE];
|
||||
int len;
|
||||
char *p, *q;
|
||||
long val;
|
||||
|
||||
fd = open("/proc/meminfo", O_RDONLY);
|
||||
if (fd < 0) {
|
||||
perror("Couldn't open /proc/meminfo");
|
||||
return -1;
|
||||
}
|
||||
|
||||
len = read(fd, buf, sizeof(buf));
|
||||
close(fd);
|
||||
if (len < 0) {
|
||||
perror("Error reading /proc/meminfo");
|
||||
return -1;
|
||||
}
|
||||
if (len == sizeof(buf)) {
|
||||
printf("/proc/meminfo is too large\n");
|
||||
return -1;
|
||||
}
|
||||
buf[len] = '\0';
|
||||
|
||||
p = strstr(buf, tag);
|
||||
if (!p) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
p += strlen(tag);
|
||||
val = strtol(p, &q, 0);
|
||||
if (!isspace(*q)) {
|
||||
printf("Couldn't parse /proc/meminfo value\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
long gethugepagesize(void) {
|
||||
long hpage_size;
|
||||
int hpage_kb;
|
||||
|
||||
hpage_kb = read_meminfo("Hugepagesize:");
|
||||
if (hpage_kb < 0) {
|
||||
hpage_size = -1;
|
||||
} else {
|
||||
/* convert from kb to bytes */
|
||||
hpage_size = 1024 * hpage_kb;
|
||||
}
|
||||
|
||||
return hpage_size;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* No huge page support on WIN32. */
|
||||
|
||||
hs_database_t *get_huge(hs_database_t *db) { return db; }
|
||||
|
||||
void release_huge(hs_database_t *db) { hs_free_database(db); }
|
||||
|
||||
#endif
|
37
tools/hsbench/huge.h
Normal file
37
tools/hsbench/huge.h
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HUGE_H
|
||||
#define HUGE_H
|
||||
|
||||
#include "hs.h"
|
||||
|
||||
hs_database_t *get_huge(hs_database_t *db);
|
||||
void release_huge(hs_database_t *db);
|
||||
|
||||
#endif /* HUGE_H */
|
780
tools/hsbench/main.cpp
Normal file
780
tools/hsbench/main.cpp
Normal file
@ -0,0 +1,780 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "data_corpus.h"
|
||||
#include "engine_hyperscan.h"
|
||||
#include "expressions.h"
|
||||
#include "thread_barrier.h"
|
||||
#include "timer.h"
|
||||
#include "util/expression_path.h"
|
||||
#include "util/string_util.h"
|
||||
|
||||
#include "grey.h"
|
||||
#include "hs.h"
|
||||
#include "ue2common.h"
|
||||
#include "util/make_unique.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <clocale>
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <set>
|
||||
#include <thread>
|
||||
|
||||
#include <getopt.h>
|
||||
#ifndef _WIN32
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
|
||||
using namespace std;
|
||||
using namespace ue2;
|
||||
using boost::adaptors::map_keys;
|
||||
|
||||
// Globals common to all files.
|
||||
bool echo_matches = false;
|
||||
bool saveDatabases = false;
|
||||
bool loadDatabases = false;
|
||||
string serializePath("");
|
||||
unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
// Globals local to this file.
|
||||
bool display_per_scan = false;
|
||||
ScanMode scan_mode = ScanMode::STREAMING;
|
||||
unsigned repeats = 20;
|
||||
string exprPath("");
|
||||
string corpusFile("");
|
||||
vector<unsigned int> threadCores;
|
||||
Timer totalTimer;
|
||||
double totalSecs = 0;
|
||||
|
||||
typedef void (*thread_func_t)(void *context);
|
||||
|
||||
class ThreadContext : boost::noncopyable {
|
||||
public:
|
||||
ThreadContext(unsigned num_in, const EngineHyperscan &db_in,
|
||||
thread_barrier &tb_in, thread_func_t function_in,
|
||||
vector<DataBlock> corpus_data_in)
|
||||
: num(num_in), results(repeats), engine(db_in),
|
||||
enginectx(db_in.makeContext()), corpus_data(move(corpus_data_in)),
|
||||
tb(tb_in), function(function_in) {}
|
||||
|
||||
// Start the thread.
|
||||
bool start(int cpu) {
|
||||
thr = thread(function, this);
|
||||
|
||||
// affine if it's asked for
|
||||
if (cpu >= 0) {
|
||||
return affine(cpu);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Wait for the thread to exit.
|
||||
void join() {
|
||||
thr.join();
|
||||
}
|
||||
|
||||
// Serialise all threads on a global barrier.
|
||||
void barrier() {
|
||||
tb.wait();
|
||||
}
|
||||
|
||||
// Apply processor affinity (if available) to this thread.
|
||||
bool affine(UNUSED int cpu) {
|
||||
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||
cpu_set_t cpuset;
|
||||
CPU_ZERO(&cpuset);
|
||||
assert(cpu >= 0 && cpu < CPU_SETSIZE);
|
||||
|
||||
// The 'clang' compiler complains about an unused result here, so we
|
||||
// silence it.
|
||||
(void)CPU_SET(cpu, &cpuset);
|
||||
|
||||
int rv = pthread_setaffinity_np(thr.native_handle(), sizeof(cpuset),
|
||||
&cpuset);
|
||||
return (rv == 0);
|
||||
#endif
|
||||
return false; // not available
|
||||
}
|
||||
|
||||
unsigned num;
|
||||
Timer timer;
|
||||
vector<ResultEntry> results;
|
||||
const EngineHyperscan &engine;
|
||||
unique_ptr<EngineContext> enginectx;
|
||||
vector<DataBlock> corpus_data;
|
||||
|
||||
protected:
|
||||
thread_barrier &tb; // shared barrier for time sync
|
||||
thread_func_t function;
|
||||
thread thr;
|
||||
};
|
||||
|
||||
/** Display usage information, with an optional error. */
|
||||
static
|
||||
void usage(const char *error) {
|
||||
printf("Usage: hsbench [OPTIONS...]\n\n");
|
||||
printf("Options:\n\n");
|
||||
printf(" -h Display help and exit.\n");
|
||||
printf(" -G OVERRIDES Overrides for the grey box.\n");
|
||||
printf(" -e PATH Path to expression directory.\n");
|
||||
printf(" -s FILE Signature file to use.\n");
|
||||
printf(" -z NUM Signature ID to use.\n");
|
||||
printf(" -c FILE File to use as corpus.\n");
|
||||
printf(" -n NUMBER Repeat scan NUMBER times (default 20).\n");
|
||||
printf(" -N Benchmark in block mode"
|
||||
" (default: streaming).\n");
|
||||
printf(" -V Benchmark in vectored mode"
|
||||
" (default: streaming).\n");
|
||||
printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n");
|
||||
printf(" -i DIR Don't compile, load from files in DIR"
|
||||
" instead.\n");
|
||||
printf(" -w DIR After compiling, save to files in DIR.\n");
|
||||
printf(" -d NUMBER Set SOM precision mode (default: 8 (large)).\n");
|
||||
printf("\n");
|
||||
printf(" --per-scan Display per-scan Mbit/sec results.\n");
|
||||
printf(" --echo-matches Display all matches that occur during scan.\n");
|
||||
printf("\n\n");
|
||||
|
||||
if (error) {
|
||||
printf("Error: %s\n", error);
|
||||
}
|
||||
}
|
||||
|
||||
/** Wraps up a name and the set of signature IDs it refers to. */
|
||||
struct BenchmarkSigs {
|
||||
BenchmarkSigs(string name_in, SignatureSet sigs_in)
|
||||
: name(move(name_in)), sigs(move(sigs_in)) {}
|
||||
string name;
|
||||
SignatureSet sigs;
|
||||
};
|
||||
|
||||
/** Process command-line arguments. Prints usage and exits on error. */
|
||||
static
|
||||
void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
UNUSED Grey &grey) {
|
||||
const char options[] = "-b:c:Cd:e:G:hi:n:No:p:sT:Vw:z:";
|
||||
int in_sigfile = 0;
|
||||
int do_per_scan = 0;
|
||||
int do_echo_matches = 0;
|
||||
vector<string> sigFiles;
|
||||
|
||||
static struct option longopts[] = {
|
||||
{"per-scan", 0, &do_per_scan, 1},
|
||||
{"echo-matches", 0, &do_echo_matches, 1},
|
||||
{nullptr, 0, nullptr, 0}
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
int c = getopt_long(argc, argv, options, longopts, nullptr);
|
||||
if (c < 0) {
|
||||
break;
|
||||
}
|
||||
switch (c) {
|
||||
case 'c':
|
||||
corpusFile.assign(optarg);
|
||||
break;
|
||||
case 'd': {
|
||||
unsigned dist;
|
||||
if (!fromString(optarg, dist)) {
|
||||
usage("Must provide an integer argument to '-d' flag");
|
||||
exit(1);
|
||||
}
|
||||
switch (dist) {
|
||||
case 2:
|
||||
somPrecisionMode = HS_MODE_SOM_HORIZON_SMALL;
|
||||
break;
|
||||
case 4:
|
||||
somPrecisionMode = HS_MODE_SOM_HORIZON_MEDIUM;
|
||||
break;
|
||||
case 8:
|
||||
somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
|
||||
break;
|
||||
default:
|
||||
usage("SOM precision must be 2, 4 or 8");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'e':
|
||||
exprPath.assign(optarg);
|
||||
break;
|
||||
#ifndef RELEASE_BUILD
|
||||
case 'G':
|
||||
applyGreyOverrides(&grey, string(optarg));
|
||||
break;
|
||||
#endif
|
||||
case 'h':
|
||||
usage(nullptr);
|
||||
exit(0);
|
||||
break;
|
||||
case 'n':
|
||||
if (!fromString(optarg, repeats) || repeats == 0) {
|
||||
usage("Couldn't parse argument to -n flag, should be"
|
||||
" a positive integer.");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
in_sigfile = 2;
|
||||
break;
|
||||
case 'N':
|
||||
scan_mode = ScanMode::BLOCK;
|
||||
break;
|
||||
case 'V':
|
||||
scan_mode = ScanMode::VECTORED;
|
||||
break;
|
||||
case 'T':
|
||||
if (!strToList(optarg, threadCores)) {
|
||||
usage("Couldn't parse argument to -T flag, should be"
|
||||
" a list of positive integers.");
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 'z': {
|
||||
unsigned int sinumber;
|
||||
if (!fromString(optarg, sinumber)) {
|
||||
usage("Argument to '-z' flag must be an integer");
|
||||
exit(1);
|
||||
}
|
||||
SignatureSet sigs = {sinumber};
|
||||
sigSets.emplace_back(string("-z ") + optarg, sigs);
|
||||
break;
|
||||
}
|
||||
case 'i':
|
||||
loadDatabases = true;
|
||||
serializePath = optarg;
|
||||
break;
|
||||
case 'w':
|
||||
saveDatabases = true;
|
||||
serializePath = optarg;
|
||||
break;
|
||||
case 1:
|
||||
if (in_sigfile) {
|
||||
sigFiles.push_back(optarg);
|
||||
in_sigfile = 2;
|
||||
break;
|
||||
}
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
usage("Unrecognised command line argument.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (in_sigfile) {
|
||||
in_sigfile--;
|
||||
}
|
||||
}
|
||||
|
||||
if (do_echo_matches) {
|
||||
echo_matches = true;
|
||||
}
|
||||
if (do_per_scan) {
|
||||
display_per_scan = true;
|
||||
}
|
||||
|
||||
if (exprPath.empty() && !sigFiles.empty()) {
|
||||
/* attempt to infer an expression directory */
|
||||
auto si = sigFiles.begin();
|
||||
exprPath = inferExpressionPath(*si);
|
||||
for (++si; si != sigFiles.end(); ++si) {
|
||||
if (exprPath != inferExpressionPath(*si)) {
|
||||
usage("Unable to infer consistent expression directory");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Must have a valid expression path
|
||||
if (exprPath.empty()) {
|
||||
usage("Must specify an expression path with the -e option.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Must have valid database to scan
|
||||
if (corpusFile.empty()) {
|
||||
usage("Must specify a corpus file with the -c option.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Cannot ask for both loading and saving
|
||||
if (loadDatabases && saveDatabases) {
|
||||
usage("You cannot both load and save databases.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Read in any -s signature sets.
|
||||
for (const auto &file : sigFiles) {
|
||||
SignatureSet sigs;
|
||||
loadSignatureList(file, sigs);
|
||||
sigSets.emplace_back(file, move(sigs));
|
||||
}
|
||||
}
|
||||
|
||||
/** Start the global timer. */
|
||||
static
|
||||
void startTotalTimer(ThreadContext *ctx) {
|
||||
if (ctx->num != 0) {
|
||||
return; // only runs in the first thread
|
||||
}
|
||||
totalTimer.start();
|
||||
}
|
||||
|
||||
/** Stop the global timer and calculate totals. */
|
||||
static
|
||||
void stopTotalTimer(ThreadContext *ctx) {
|
||||
if (ctx->num != 0) {
|
||||
return; // only runs in the first thread
|
||||
}
|
||||
totalTimer.complete();
|
||||
totalSecs = totalTimer.seconds();
|
||||
}
|
||||
|
||||
/** Run a benchmark over a given engine and corpus in block mode. */
|
||||
static
|
||||
void benchBlock(void *context) {
|
||||
ThreadContext *ctx = (ThreadContext *)context;
|
||||
|
||||
// Synchronization point
|
||||
ctx->barrier();
|
||||
|
||||
startTotalTimer(ctx);
|
||||
|
||||
for (ResultEntry &r : ctx->results) {
|
||||
ctx->timer.start();
|
||||
|
||||
for (const DataBlock &block : ctx->corpus_data) {
|
||||
ctx->engine.scan(block.payload.c_str(), block.payload.size(),
|
||||
block.id, r, *ctx->enginectx);
|
||||
}
|
||||
|
||||
ctx->timer.complete();
|
||||
r.seconds = ctx->timer.seconds();
|
||||
}
|
||||
|
||||
// Synchronization point
|
||||
ctx->barrier();
|
||||
|
||||
// Now that all threads are finished, we can stop the clock.
|
||||
stopTotalTimer(ctx);
|
||||
}
|
||||
|
||||
/** Structure used to represent a stream. */
|
||||
struct StreamInfo {
|
||||
unsigned int stream_id = ~0U;
|
||||
unsigned int first_block_id = ~0U;
|
||||
unsigned int last_block_id = 0;
|
||||
unique_ptr<EngineStream> eng_handle;
|
||||
};
|
||||
|
||||
static
|
||||
u64a count_streams(const vector<DataBlock> &corpus_blocks) {
|
||||
set<unsigned int> streams;
|
||||
for (const DataBlock &block : corpus_blocks) {
|
||||
streams.insert(block.stream_id);
|
||||
}
|
||||
|
||||
return (u64a)streams.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Take a ThreadContext and prepare a vector<StreamDataBlock> for streaming mode
|
||||
* scanning from it.
|
||||
*/
|
||||
static
|
||||
vector<StreamInfo> prepStreamingData(const ThreadContext *ctx) {
|
||||
vector<StreamInfo> info(count_streams(ctx->corpus_data));
|
||||
for (const DataBlock &block : ctx->corpus_data) {
|
||||
assert(block.internal_stream_index < info.size());
|
||||
StreamInfo &si = info[block.internal_stream_index];
|
||||
|
||||
/* check if this is the first time we have encountered this stream */
|
||||
if (si.first_block_id > si.last_block_id) {
|
||||
si.stream_id = block.stream_id;
|
||||
si.first_block_id = block.id;
|
||||
si.last_block_id = block.id;
|
||||
} else {
|
||||
assert(block.stream_id == si.stream_id);
|
||||
assert(block.id > si.last_block_id);
|
||||
assert(block.id > si.first_block_id);
|
||||
si.last_block_id = block.id;
|
||||
}
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
static
|
||||
void benchStreamingInternal(ThreadContext *ctx, vector<StreamInfo> &streams) {
|
||||
assert(ctx);
|
||||
const EngineHyperscan &e = ctx->engine;
|
||||
const vector<DataBlock> &blocks = ctx->corpus_data;
|
||||
|
||||
for (ResultEntry &r : ctx->results) {
|
||||
ctx->timer.start();
|
||||
|
||||
for (const auto &b : blocks) {
|
||||
StreamInfo &stream = streams[b.internal_stream_index];
|
||||
assert(stream.stream_id == b.stream_id);
|
||||
|
||||
// If this is the first block in the stream, open the stream
|
||||
// handle.
|
||||
if (b.id == stream.first_block_id) {
|
||||
assert(!stream.eng_handle);
|
||||
stream.eng_handle = e.streamOpen(*ctx->enginectx, b.stream_id);
|
||||
if (!stream.eng_handle) {
|
||||
printf("Fatal error: stream open failed!\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
assert(stream.eng_handle);
|
||||
|
||||
e.streamScan(*stream.eng_handle, b.payload.c_str(),
|
||||
b.payload.size(), b.id, r);
|
||||
|
||||
// if this was the last block in the stream, close the stream handle
|
||||
if (b.id == stream.last_block_id) {
|
||||
e.streamClose(move(stream.eng_handle), r);
|
||||
stream.eng_handle = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
ctx->timer.complete();
|
||||
r.seconds = ctx->timer.seconds();
|
||||
}
|
||||
}
|
||||
|
||||
/** Run a benchmark over a given engine and corpus in streaming mode. */
|
||||
static
|
||||
void benchStreaming(void *context) {
|
||||
ThreadContext *ctx = (ThreadContext *)context;
|
||||
vector<StreamInfo> streams = prepStreamingData(ctx);
|
||||
|
||||
// Synchronization point
|
||||
ctx->barrier();
|
||||
|
||||
startTotalTimer(ctx);
|
||||
|
||||
benchStreamingInternal(ctx, streams);
|
||||
|
||||
// Synchronization point
|
||||
ctx->barrier();
|
||||
|
||||
// Now that all threads are finished, we can stop the clock.
|
||||
stopTotalTimer(ctx);
|
||||
}
|
||||
|
||||
/** In-memory structure for a data block to be scanned in vectored mode. */
|
||||
struct VectoredInfo {
|
||||
vector<const char *> data;
|
||||
vector<unsigned int> len;
|
||||
unsigned int stream_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* Take a ThreadContext and prepare a vector<VectoredInfo> for vectored mode
|
||||
* scanning from it.
|
||||
*/
|
||||
static
|
||||
vector<VectoredInfo> prepVectorData(const ThreadContext *ctx) {
|
||||
vector<VectoredInfo> out(count_streams(ctx->corpus_data));
|
||||
for (const DataBlock &block : ctx->corpus_data) {
|
||||
VectoredInfo &vi = out[block.internal_stream_index];
|
||||
if (vi.data.empty()) {
|
||||
vi.stream_id = block.stream_id;
|
||||
} else {
|
||||
assert(vi.stream_id == block.stream_id);
|
||||
}
|
||||
vi.data.push_back(block.payload.c_str());
|
||||
vi.len.push_back(block.payload.size());
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Run a benchmark over a given engine and corpus in vectored mode. */
|
||||
static
|
||||
void benchVectored(void *context) {
|
||||
ThreadContext *ctx = (ThreadContext *)context;
|
||||
|
||||
vector<VectoredInfo> v_plans = prepVectorData(ctx);
|
||||
|
||||
// Synchronization point
|
||||
ctx->barrier();
|
||||
|
||||
startTotalTimer(ctx);
|
||||
|
||||
for (ResultEntry &r : ctx->results) {
|
||||
ctx->timer.start();
|
||||
|
||||
for (const VectoredInfo &v_plan : v_plans) {
|
||||
ctx->engine.scan_vectored(&v_plan.data[0], &v_plan.len[0],
|
||||
v_plan.data.size(), v_plan.stream_id, r,
|
||||
*ctx->enginectx);
|
||||
}
|
||||
|
||||
ctx->timer.complete();
|
||||
r.seconds = ctx->timer.seconds();
|
||||
}
|
||||
|
||||
// Synchronization point
|
||||
ctx->barrier();
|
||||
|
||||
// Now that all threads are finished, we can stop the clock.
|
||||
stopTotalTimer(ctx);
|
||||
}
|
||||
|
||||
/** Given a time and a size, compute the throughput in megabits/sec. */
|
||||
static
|
||||
long double calc_mbps(double seconds, u64a bytes) {
|
||||
assert(seconds > 0);
|
||||
return (long double)bytes / ((long double)seconds * 125000);
|
||||
}
|
||||
|
||||
/** Dump per-scan throughput data to screen. */
|
||||
static
|
||||
void displayPerScanResults(const vector<unique_ptr<ThreadContext>> &threads,
|
||||
u64a bytesPerRun) {
|
||||
for (const auto &t : threads) {
|
||||
const auto &results = t->results;
|
||||
for (size_t j = 0; j != results.size(); j++) {
|
||||
const auto &r = results[j];
|
||||
double mbps = calc_mbps(r.seconds, bytesPerRun);
|
||||
printf("T %2u Scan %2zu: %'0.2f Mbit/sec\n", t->num, j, mbps);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static
|
||||
u64a byte_size(const vector<DataBlock> &corpus_blocks) {
|
||||
u64a total = 0;
|
||||
for (const DataBlock &block : corpus_blocks) {
|
||||
total += block.payload.size();
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
/** Dump benchmark results to screen. */
|
||||
static
|
||||
void displayResults(const vector<unique_ptr<ThreadContext>> &threads,
|
||||
const vector<DataBlock> &corpus_blocks) {
|
||||
u64a bytesPerRun = byte_size(corpus_blocks);
|
||||
u64a matchesPerRun = threads[0]->results[0].matches;
|
||||
|
||||
// Sanity check: all of our results should have the same match count.
|
||||
for (const auto &t : threads) {
|
||||
if (!all_of(begin(t->results), end(t->results),
|
||||
[&matchesPerRun](const ResultEntry &e) {
|
||||
return e.matches == matchesPerRun;
|
||||
})) {
|
||||
printf("\nWARNING: PER-SCAN MATCH COUNTS ARE INCONSISTENT!\n\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
printf("Time spent scanning: %'0.3f seconds\n", totalSecs);
|
||||
printf("Corpus size: %'llu bytes ", bytesPerRun);
|
||||
switch (scan_mode) {
|
||||
case ScanMode::STREAMING:
|
||||
printf("(%'zu blocks in %'llu streams)\n", corpus_blocks.size(),
|
||||
count_streams(corpus_blocks));
|
||||
break;
|
||||
case ScanMode::VECTORED:
|
||||
printf("(%'zu blocks in %'llu vectors)\n", corpus_blocks.size(),
|
||||
count_streams(corpus_blocks));
|
||||
break;
|
||||
case ScanMode::BLOCK:
|
||||
printf("(%'zu blocks)\n", corpus_blocks.size());
|
||||
break;
|
||||
}
|
||||
|
||||
u64a totalBytes = bytesPerRun * repeats * threads.size();
|
||||
u64a totalBlocks = corpus_blocks.size() * repeats * threads.size();
|
||||
|
||||
double matchRate = ((double)matchesPerRun * 1024) / bytesPerRun;
|
||||
printf("Matches per iteration: %'llu (%'0.3f matches/kilobyte)\n",
|
||||
matchesPerRun, matchRate);
|
||||
|
||||
double blockRate = (double)totalBlocks / (double)totalSecs;
|
||||
printf("Overall block rate: %'0.2f blocks/sec\n", blockRate);
|
||||
printf("Overall throughput: %'0.2Lf Mbit/sec\n",
|
||||
calc_mbps(totalSecs, totalBytes));
|
||||
printf("\n");
|
||||
|
||||
if (display_per_scan) {
|
||||
displayPerScanResults(threads, bytesPerRun);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a thread context for this scanning mode.
|
||||
*
|
||||
* Note: does not take blocks by reference. This is to give every thread their
|
||||
* own copy of the data. It would be unrealistic for every thread to be scanning
|
||||
* the same copy of the data.
|
||||
*/
|
||||
static
|
||||
unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
|
||||
const vector<DataBlock> &blocks,
|
||||
unsigned id,
|
||||
thread_barrier &sync_barrier) {
|
||||
thread_func_t fn = nullptr;
|
||||
switch (scan_mode) {
|
||||
case ScanMode::STREAMING:
|
||||
fn = benchStreaming;
|
||||
break;
|
||||
case ScanMode::VECTORED:
|
||||
fn = benchVectored;
|
||||
break;
|
||||
case ScanMode::BLOCK:
|
||||
fn = benchBlock;
|
||||
break;
|
||||
}
|
||||
assert(fn);
|
||||
|
||||
return ue2::make_unique<ThreadContext>(id, db, sync_barrier, fn, blocks);
|
||||
}
|
||||
|
||||
/** Run the given benchmark. */
|
||||
static
|
||||
void runBenchmark(const EngineHyperscan &db,
|
||||
const vector<DataBlock> &corpus_blocks) {
|
||||
size_t numThreads;
|
||||
bool useAffinity = false;
|
||||
|
||||
if (threadCores.empty()) {
|
||||
numThreads = 1;
|
||||
} else {
|
||||
numThreads = threadCores.size();
|
||||
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||
useAffinity = true;
|
||||
#else
|
||||
useAffinity = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Initialise a barrier that will let us sync threads before/after scanning
|
||||
// for timer measurements.
|
||||
thread_barrier sync_barrier(numThreads);
|
||||
|
||||
vector<unique_ptr<ThreadContext>> threads;
|
||||
|
||||
for (unsigned i = 0; i < numThreads; i++) {
|
||||
auto t = makeThreadContext(db, corpus_blocks, i, sync_barrier);
|
||||
int core = useAffinity ? (int)threadCores[i] : -1;
|
||||
if (!t->start(core)) {
|
||||
printf("Unable to start processing thread %u\n", i);
|
||||
exit(1);
|
||||
}
|
||||
threads.push_back(move(t));
|
||||
}
|
||||
|
||||
// Reap threads.
|
||||
for (auto &t : threads) {
|
||||
t->join();
|
||||
}
|
||||
|
||||
// Display global results.
|
||||
displayResults(threads, corpus_blocks);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/** Main driver. */
|
||||
int main(int argc, char *argv[]) {
|
||||
Grey grey;
|
||||
|
||||
setlocale(LC_ALL, ""); // use the user's locale
|
||||
|
||||
#ifndef NDEBUG
|
||||
printf("\nWARNING: DO NOT BENCHMARK A HYPERSCAN BUILD WITH ASSERTIONS\n\n");
|
||||
#endif
|
||||
|
||||
vector<BenchmarkSigs> sigSets;
|
||||
processArgs(argc, argv, sigSets, grey);
|
||||
|
||||
// read in and process our expressions
|
||||
ExpressionMap exprMapTemplate;
|
||||
loadExpressions(exprPath, exprMapTemplate);
|
||||
|
||||
// If we have no signature sets, the user wants us to benchmark all the
|
||||
// known expressions together.
|
||||
if (sigSets.empty()) {
|
||||
SignatureSet sigs;
|
||||
for (auto i : exprMapTemplate | map_keys) {
|
||||
sigs.push_back(i);
|
||||
}
|
||||
sigSets.emplace_back(exprPath, move(sigs));
|
||||
}
|
||||
|
||||
// read in and process our corpus
|
||||
vector<DataBlock> corpus_blocks;
|
||||
try {
|
||||
corpus_blocks = readCorpus(corpusFile);
|
||||
} catch (const DataCorpusError &e) {
|
||||
printf("Corpus data error: %s\n", e.msg.c_str());
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (const auto &s : sigSets) {
|
||||
ExpressionMap exprMap = exprMapTemplate; // copy
|
||||
|
||||
limitBySignature(exprMap, s.sigs);
|
||||
if (exprMap.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, grey);
|
||||
if (!engine) {
|
||||
printf("Error: expressions failed to compile.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
runBenchmark(*engine, corpus_blocks);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
58
tools/hsbench/scripts/CorpusBuilder.py
Executable file
58
tools/hsbench/scripts/CorpusBuilder.py
Executable file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
'''
|
||||
A module to construct corpora databases for the Hyperscan benchmarker
|
||||
(hsbench).
|
||||
|
||||
After construction, simply add blocks with the add_chunk() method, then call
|
||||
finish() when you're done.
|
||||
'''
|
||||
|
||||
import os.path
|
||||
|
||||
try:
|
||||
from sqlite3 import dbapi2 as sqlite
|
||||
except:
|
||||
from pysqlite2 import dbapi2 as sqlite
|
||||
|
||||
class CorpusBuilder:
|
||||
SCHEMA = '''
|
||||
CREATE TABLE chunk (
|
||||
id integer primary key,
|
||||
stream_id integer not null,
|
||||
data blob
|
||||
);
|
||||
'''
|
||||
|
||||
def __init__(self, outfile):
|
||||
if os.path.exists(outfile):
|
||||
raise RuntimeError("Database '%s' already exists" % outfile)
|
||||
self.outfile = outfile
|
||||
self.db = sqlite.connect(self.outfile)
|
||||
self.db.executescript(CorpusBuilder.SCHEMA)
|
||||
self.current_chunk_id = 0;
|
||||
|
||||
def add_chunk(self, stream_id, data):
|
||||
chunk_id = self.current_chunk_id;
|
||||
c = self.db.cursor()
|
||||
q = 'insert into chunk (id, stream_id, data) values (?, ?, ?)'
|
||||
c.execute(q, (chunk_id, stream_id, sqlite.Binary(data)))
|
||||
self.current_chunk_id += 1
|
||||
return chunk_id
|
||||
|
||||
def finish(self):
|
||||
self.db.commit()
|
||||
|
||||
c = self.db.cursor()
|
||||
q = 'create index chunk_stream_id_idx on chunk(stream_id)'
|
||||
c.execute(q)
|
||||
|
||||
c = self.db.cursor()
|
||||
q = 'vacuum'
|
||||
c.execute(q)
|
||||
|
||||
c = self.db.cursor()
|
||||
q = 'analyze'
|
||||
c.execute(q)
|
||||
|
||||
self.db.commit()
|
68
tools/hsbench/scripts/gutenbergCorpus.py
Executable file
68
tools/hsbench/scripts/gutenbergCorpus.py
Executable file
@ -0,0 +1,68 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
'''
|
||||
This script creates a Hyperscan benchmarking corpus database from a supplied
|
||||
group of Project Gutenberg texts.
|
||||
'''
|
||||
|
||||
import sys, getopt, os.path
|
||||
import gutenberg.acquire, gutenberg.cleanup, gutenberg.query
|
||||
from CorpusBuilder import CorpusBuilder
|
||||
|
||||
stream_id = 0
|
||||
stream_bytes = 0
|
||||
|
||||
def addBlocks(builder, block_size, stream_size, text_id, text):
|
||||
global stream_id
|
||||
global stream_bytes
|
||||
|
||||
print "text", text_id, "len", len(text)
|
||||
i = 0
|
||||
while i < len(text):
|
||||
chunk = text[i:min(len(text), i + block_size)]
|
||||
builder.add_chunk(stream_id, chunk)
|
||||
i += block_size
|
||||
stream_bytes += len(chunk)
|
||||
if stream_bytes >= stream_size:
|
||||
stream_id += 1
|
||||
stream_bytes = 0
|
||||
print "Text", text_id, ": added", i/block_size, "blocks of", block_size, "bytes."
|
||||
|
||||
def buildCorpus(outFN, block_size, stream_size, text_ids):
|
||||
if len(text_ids) == 0:
|
||||
print >>sys.stderr, "Must provide at least one input ID"
|
||||
sys.exit(0)
|
||||
|
||||
builder = CorpusBuilder(outFN)
|
||||
|
||||
total_bytes = 0
|
||||
stream_id = 0
|
||||
stream_bytes = 0
|
||||
|
||||
for text_id in text_ids:
|
||||
text_id = int(text_id)
|
||||
text = gutenberg.acquire.load_etext(text_id)
|
||||
text = gutenberg.cleanup.strip_headers(text).strip()
|
||||
addBlocks(builder, block_size, stream_size, text_id, text)
|
||||
total_bytes += len(text)
|
||||
|
||||
builder.finish()
|
||||
|
||||
print "Total:", total_bytes, "bytes."
|
||||
|
||||
def usage(exeName):
|
||||
errmsg = "Usage: %s -o <output file> -b <block size> -s <max stream size> <gutenberg text id>..."
|
||||
errmsg = errmsg % exeName
|
||||
print >> sys.stderr, errmsg
|
||||
sys.exit(-1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'o:b:s:')
|
||||
opts = dict(opts)
|
||||
|
||||
requiredKeys = [ '-o', '-b', '-s' ]
|
||||
for k in requiredKeys:
|
||||
if not opts.has_key(k):
|
||||
usage(os.path.basename(sys.argv[0]))
|
||||
|
||||
buildCorpus(opts['-o'], int(opts['-b']), int(opts['-s']), args)
|
53
tools/hsbench/scripts/linebasedCorpus.py
Executable file
53
tools/hsbench/scripts/linebasedCorpus.py
Executable file
@ -0,0 +1,53 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
'''
|
||||
Simple script to take a file full of lines of text and push them into a
|
||||
Hyperscan benchmarking corpus database, one block per line.
|
||||
'''
|
||||
|
||||
import sys, getopt, os.path
|
||||
from CorpusBuilder import CorpusBuilder
|
||||
|
||||
def lineCorpus(inFN, outFN):
|
||||
'''
|
||||
Read lines from file name @inFN and write them as blocks to a new db with
|
||||
name @outFN.
|
||||
'''
|
||||
|
||||
if not os.path.exists(inFN):
|
||||
print >> sys.stderr, "Input file '%s' does not exist. Exiting." % outFN
|
||||
sys.exit(-1)
|
||||
|
||||
lines = open(inFN).readlines()
|
||||
|
||||
if len(lines) == 0:
|
||||
print >> sys.stderr, "Input file contained no lines. Exiting."
|
||||
sys.exit(0)
|
||||
|
||||
builder = CorpusBuilder(outFN)
|
||||
|
||||
# write a single stream to contain everything
|
||||
streamId = 0
|
||||
|
||||
for l in lines:
|
||||
builder.add_chunk(streamId, l.rstrip())
|
||||
|
||||
builder.finish()
|
||||
|
||||
def usage(exeName):
|
||||
errmsg = "Usage: %s -i <input file> -o <output file>"
|
||||
errmsg = errmsg % exeName
|
||||
print >> sys.stderr, errmsg
|
||||
sys.exit(-1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = getopt.getopt(sys.argv[1:], 'i:o:c:')
|
||||
args = dict(args[0])
|
||||
|
||||
requiredKeys = [ '-i', '-o' ]
|
||||
for k in requiredKeys:
|
||||
if not args.has_key(k):
|
||||
usage(os.path.basename(sys.argv[0]))
|
||||
|
||||
fnArgs = tuple([args[k] for k in requiredKeys])
|
||||
lineCorpus(*fnArgs)
|
301
tools/hsbench/scripts/pcapCorpus.py
Executable file
301
tools/hsbench/scripts/pcapCorpus.py
Executable file
@ -0,0 +1,301 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
'''
|
||||
Script to convert a pcap file containing UDP and TCP packets to a corpus file.
|
||||
'''
|
||||
|
||||
import sys, getopt, pprint, os
|
||||
from sqlite3 import dbapi2 as sqlite
|
||||
import pcap
|
||||
from optparse import OptionParser
|
||||
from socket import AF_INET, IPPROTO_UDP, IPPROTO_TCP, inet_ntop, ntohs, ntohl, inet_ntoa
|
||||
import struct
|
||||
from CorpusBuilder import CorpusBuilder
|
||||
|
||||
ETHERTYPE_IP = 0x0800 # IP protocol
|
||||
ETHERTYPE_ARP = 0x0806 # Addr. resolution protocol
|
||||
ETHERTYPE_REVARP = 0x8035 # reverse Addr. resolution protocol
|
||||
ETHERTYPE_VLAN = 0x8100 # IEEE 802.1Q VLAN tagging
|
||||
ETHERTYPE_IPV6 = 0x86dd # IPv6
|
||||
|
||||
#
|
||||
# A dictionary of active TCP streams
|
||||
#
|
||||
tcp_streams = {}
|
||||
|
||||
#
|
||||
# A dictionary of UDP streams
|
||||
#
|
||||
udp_streams = {}
|
||||
|
||||
#
|
||||
# Current stream id
|
||||
cur_stream_id = 0
|
||||
|
||||
def usage(exeName) :
|
||||
errmsg = "Usage: %s -i <pcap-file> -o <sqlite-file>"
|
||||
errmsg = errmsg % exeName
|
||||
print >> sys.stderr, errmsg
|
||||
sys.exit(-1)
|
||||
|
||||
class FiveTuple(object):
|
||||
def __init__(self, protocol, src_addr, src_port, dst_addr, dst_port):
|
||||
self.protocol = protocol
|
||||
self.src_addr = src_addr
|
||||
self.src_port = src_port
|
||||
self.dst_addr = dst_addr
|
||||
self.dst_port = dst_port
|
||||
|
||||
def __str__(self):
|
||||
return "%d,%s,%d,%s,%d" % (self.protocol, self.src_addr, self.src_port, self.dst_addr, self.dst_port)
|
||||
|
||||
class UdpSegment:
|
||||
"""Definition of a UDP segment
|
||||
"""
|
||||
def __init__(self, five_tuple, header, payload):
|
||||
self.five_tuple = five_tuple
|
||||
self.udp_header = header
|
||||
self.udp_payload = payload
|
||||
|
||||
class TcpSegment:
|
||||
"""Definition of a TCP segment
|
||||
"""
|
||||
def __init__(self, five_tuple, header, payload):
|
||||
self.five_tuple = five_tuple
|
||||
self.tcp_header = header
|
||||
self.tcp_payload = payload
|
||||
self.tcp_sequence_number, self.tcp_acknowledgement_number = struct.unpack('!LL', header[4:12])
|
||||
|
||||
def opt_isset_FIN(self):
|
||||
opts = ord(self.tcp_header[13]) & 0x3F
|
||||
return (opts & 0x01)
|
||||
|
||||
def opt_isset_SYN(self):
|
||||
opts = ord(self.tcp_header[13]) & 0x3F
|
||||
return (opts & 0x02)
|
||||
|
||||
def get_sequence_number(self):
|
||||
return self.tcp_sequence_number
|
||||
|
||||
def __cmp__(self, other):
|
||||
return cmp(self.tcp_sequence_number, other.tcp_sequence_number)
|
||||
|
||||
class TcpStream:
|
||||
"""Definition of a TCP stream.
|
||||
"""
|
||||
TCP_STREAM_ACTIVE = 0x1
|
||||
TCP_STREAM_CLOSED = 0x02
|
||||
|
||||
def __init__(self, five_tuple):
|
||||
self.five_tuple = five_tuple
|
||||
self.initial_sequence_number = 0
|
||||
self.segments = []
|
||||
|
||||
def reset_stream(self):
|
||||
self.segments = []
|
||||
self.initial_sequence_number = 0
|
||||
|
||||
def set_initial_sequence_number(self, sequence_number):
|
||||
self.initial_sequence_number = sequence_number
|
||||
|
||||
def append_segment(self, tcp_segment):
|
||||
if len(self.segments) == 0:
|
||||
self.set_initial_sequence_number(tcp_segment.get_sequence_number())
|
||||
self.segments.append(tcp_segment)
|
||||
|
||||
def get_segments_sorted(self):
|
||||
return sorted(self.segments)
|
||||
|
||||
class UdpStream:
|
||||
"""A container for UDP packets that share the same 5-tuple
|
||||
"""
|
||||
def __init__(self, five_tuple):
|
||||
self.five_tuple = five_tuple
|
||||
self.segments = []
|
||||
|
||||
def append_segment(self, udp_segment):
|
||||
self.segments.append(udp_segment)
|
||||
|
||||
|
||||
def newStream(five_tuple):
|
||||
'''
|
||||
Create a new stream using the arguments passed-in and return its ID.
|
||||
'''
|
||||
global cur_stream_id
|
||||
stream_id = cur_stream_id
|
||||
cur_stream_id += 1
|
||||
return stream_id
|
||||
|
||||
def process_tcp_segment(builder, segment):
|
||||
"""Process a tcp segment. It checks for SYN and FIN segments are
|
||||
if set modifies the associated stream.
|
||||
"""
|
||||
segment_id = str(segment.five_tuple)
|
||||
if segment_id in tcp_streams:
|
||||
m_tcp_stream = tcp_streams[segment_id]
|
||||
m_tcp_stream.append_segment(segment)
|
||||
else:
|
||||
m_tcp_stream = TcpStream(segment.five_tuple)
|
||||
m_tcp_stream.append_segment(segment)
|
||||
tcp_streams[segment_id] = m_tcp_stream
|
||||
|
||||
|
||||
if segment.opt_isset_SYN():
|
||||
m_tcp_stream.segments = []
|
||||
|
||||
if segment.opt_isset_FIN():
|
||||
#
|
||||
# Finished with the stream - add the segments in the
|
||||
# stream to db allowing the stream to be reused.
|
||||
#
|
||||
db_add_tcp_stream_segments(builder, m_tcp_stream)
|
||||
del tcp_streams[segment_id]
|
||||
|
||||
def process_udp_segment(builder, segment):
|
||||
""" Process a UDP segment. Given the connectionless nature of the UDP
|
||||
protocol we simple accumulate the segment for later processing
|
||||
when all the packets have been read
|
||||
"""
|
||||
segment_id = str(segment.five_tuple)
|
||||
if segment_id in udp_streams:
|
||||
m_udp_stream = udp_streams[segment_id]
|
||||
m_udp_stream.append_segment(segment)
|
||||
else:
|
||||
m_udp_stream = UdpStream(segment.five_tuple)
|
||||
m_udp_stream.append_segment(segment)
|
||||
udp_streams[segment_id] = m_udp_stream
|
||||
|
||||
|
||||
def db_add_tcp_stream_segments(builder, tcp_stream):
|
||||
"""Add the contents of a tcp stream to the database
|
||||
"""
|
||||
tcp_segments = tcp_stream.get_segments_sorted()
|
||||
last_sequence_num = 0
|
||||
streamID = None
|
||||
|
||||
for tcp_segment in tcp_segments:
|
||||
if (len(tcp_segment.tcp_payload) > 0) and (tcp_segment.tcp_sequence_number > last_sequence_num):
|
||||
#
|
||||
# Segment with an actual payload - add it to the stream's
|
||||
# list of chunks.
|
||||
#
|
||||
# Note: delay creating the stream until we have a via chunk to
|
||||
# commit to it
|
||||
#
|
||||
if streamID == None:
|
||||
streamID = newStream(tcp_stream.five_tuple)
|
||||
builder.add_chunk(streamID, tcp_segment.tcp_payload)
|
||||
last_sequence_num = tcp_segment.tcp_sequence_number
|
||||
|
||||
|
||||
def db_add_udp_stream_segments(builder, udp_stream):
|
||||
"""Add the contents of a UDP stream to the database. Since UDP is
|
||||
connection-less, a UDP stream object is really just an accumulation
|
||||
of all the packets associated with a given 5-tuple.
|
||||
"""
|
||||
udp_segments = udp_stream.segments
|
||||
streamID = None
|
||||
for udp_segment in udp_segments:
|
||||
if len(udp_segment.udp_payload) > 0:
|
||||
if streamID == None:
|
||||
streamID = newStream(udp_stream.five_tuple)
|
||||
builder.add_chunk(streamID, udp_segment.udp_payload)
|
||||
|
||||
def enchunk_pcap(pcapFN, sqliteFN):
|
||||
"""Read the contents of a pcap file with name @pcapFN and produce
|
||||
a sqlite db with name @sqliteFN. It will contain chunks of data
|
||||
from TCP and UDP streams,
|
||||
"""
|
||||
|
||||
if not os.path.exists(pcapFN):
|
||||
print >> sys.stderr, "Input file '%s' does not exist. Exiting." % pcapFN
|
||||
sys.exit(-1)
|
||||
|
||||
builder = CorpusBuilder(sqliteFN)
|
||||
|
||||
#
|
||||
# Read in the contents of the pcap file, adding stream segments as found
|
||||
#
|
||||
pkt_cnt = 0;
|
||||
ip_pkt_cnt = 0;
|
||||
unsupported_ip_protocol_cnt = 0
|
||||
pcap_ref = pcap.pcap(pcapFN)
|
||||
done = False
|
||||
|
||||
while not done:
|
||||
try:
|
||||
ts, packet = pcap_ref.next()
|
||||
except:
|
||||
break
|
||||
|
||||
pkt_cnt += 1
|
||||
|
||||
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0]
|
||||
if linkLayerType != ETHERTYPE_IP:
|
||||
#
|
||||
# We're only interested in IP packets
|
||||
#
|
||||
continue
|
||||
|
||||
ip_pkt_cnt += 1
|
||||
|
||||
ip_pkt_total_len = struct.unpack('!H', packet[pcap_ref.dloff + 2: pcap_ref.dloff + 4])[0]
|
||||
ip_pkt = packet[pcap_ref.dloff:pcap_ref.dloff + ip_pkt_total_len]
|
||||
pkt_protocol = struct.unpack('B', ip_pkt[9])[0]
|
||||
|
||||
if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP):
|
||||
#
|
||||
# we're only interested in UDP and TCP packets at the moment
|
||||
#
|
||||
continue
|
||||
|
||||
pkt_src_addr = inet_ntoa(ip_pkt[12:16])
|
||||
pkt_dst_addr = inet_ntoa(ip_pkt[16:20])
|
||||
|
||||
ip_hdr_len_offset = (ord(ip_pkt[0]) & 0x0f) * 4
|
||||
ip_payload = ip_pkt[ip_hdr_len_offset:len(ip_pkt)]
|
||||
|
||||
pkt_src_port, pkt_dst_port = struct.unpack('!HH', ip_payload[0:4])
|
||||
five_tuple = FiveTuple(pkt_protocol, pkt_src_addr, pkt_src_port, pkt_dst_addr, pkt_dst_port)
|
||||
five_tuple_id = str(five_tuple)
|
||||
|
||||
if pkt_protocol == IPPROTO_UDP:
|
||||
udp_payload_len = struct.unpack('!H', ip_payload[4:6])[0] - 8
|
||||
udp_header = ip_payload[0:8]
|
||||
udp_payload = ip_payload[8:len(ip_payload)]
|
||||
udp_segment = UdpSegment(five_tuple, udp_header, udp_payload)
|
||||
process_udp_segment(builder, udp_segment)
|
||||
elif pkt_protocol == IPPROTO_TCP:
|
||||
tcp_hdr_len = (ord(ip_payload[12]) >> 4) * 4
|
||||
tcp_header = ip_payload[0:tcp_hdr_len]
|
||||
tcp_payload = ip_payload[tcp_hdr_len:len(ip_payload)]
|
||||
segment = TcpSegment(five_tuple, tcp_header, tcp_payload)
|
||||
process_tcp_segment(builder, segment)
|
||||
|
||||
#
|
||||
# Having read the contents of the pcap, we fill the database with any
|
||||
# remaining TCP and UDP segments
|
||||
#
|
||||
for tcp_stream in tcp_streams.itervalues():
|
||||
db_add_tcp_stream_segments(builder, tcp_stream)
|
||||
|
||||
for udp_stream in udp_streams.itervalues():
|
||||
db_add_udp_stream_segments(builder, udp_stream)
|
||||
|
||||
#
|
||||
# We've finished with the database
|
||||
#
|
||||
builder.finish()
|
||||
|
||||
if __name__ == '__main__' :
|
||||
|
||||
args = getopt.getopt(sys.argv[1:], 'i:o:')
|
||||
args = dict(args[0])
|
||||
|
||||
requiredKeys = [ '-i', '-o']
|
||||
for k in requiredKeys :
|
||||
if not args.has_key(k) :
|
||||
usage(os.path.basename(sys.argv[0]))
|
||||
|
||||
fnArgs = tuple([ args[k] for k in requiredKeys ])
|
||||
enchunk_pcap(*fnArgs)
|
71
tools/hsbench/thread_barrier.h
Normal file
71
tools/hsbench/thread_barrier.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief Simple thread barrier.
|
||||
*/
|
||||
|
||||
#ifndef TOOLS_THREAD_BARRIER_H
|
||||
#define TOOLS_THREAD_BARRIER_H
|
||||
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
|
||||
/**
|
||||
* \brief Simple thread barrier class.
|
||||
*
|
||||
* Blocks until wait() has been called N times.
|
||||
*/
|
||||
class thread_barrier {
|
||||
public:
|
||||
explicit thread_barrier(unsigned int n) : max(n) {
|
||||
if (max == 0) {
|
||||
throw std::runtime_error("invalid barrier");
|
||||
}
|
||||
}
|
||||
|
||||
void wait() {
|
||||
std::unique_lock<std::mutex> lock(mtx);
|
||||
count++;
|
||||
if (count >= max) {
|
||||
count = 0;
|
||||
condvar.notify_all();
|
||||
} else {
|
||||
condvar.wait(lock);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mtx;
|
||||
std::condition_variable condvar;
|
||||
unsigned int count = 0;
|
||||
unsigned int max;
|
||||
};
|
||||
|
||||
#endif // TOOLS_THREAD_BARRIER_H
|
59
tools/hsbench/timer.h
Normal file
59
tools/hsbench/timer.h
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef TIMER_H
|
||||
#define TIMER_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <chrono>
|
||||
|
||||
class Timer {
|
||||
public:
|
||||
Timer() = default;
|
||||
|
||||
void start() {
|
||||
clock_start = Clock::now();
|
||||
}
|
||||
|
||||
void complete() {
|
||||
clock_end = Clock::now();
|
||||
}
|
||||
|
||||
double seconds() const {
|
||||
std::chrono::duration<double> secs = clock_end - clock_start;
|
||||
return secs.count();
|
||||
}
|
||||
|
||||
protected:
|
||||
using Clock = std::chrono::steady_clock;
|
||||
std::chrono::time_point<Clock> clock_start;
|
||||
std::chrono::time_point<Clock> clock_end;
|
||||
};
|
||||
|
||||
#endif // TIMER_H
|
@ -1,7 +1,10 @@
|
||||
# utility libs
|
||||
|
||||
CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
|
||||
${PROJECT_SOURCE_DIR})
|
||||
|
||||
set_source_files_properties(
|
||||
${CMAKE_BINARY_DIR}/tools/ExpressionParser.cpp
|
||||
@ -31,3 +34,14 @@ SET(corpusomatic_SRCS
|
||||
)
|
||||
add_library(corpusomatic STATIC ${corpusomatic_SRCS})
|
||||
|
||||
set(databaseutil_SRCS
|
||||
database_util.cpp
|
||||
database_util.h
|
||||
)
|
||||
add_library(databaseutil STATIC ${databaseutil_SRCS})
|
||||
|
||||
set(crosscompileutil_SRCS
|
||||
cross_compile.cpp
|
||||
cross_compile.h
|
||||
)
|
||||
add_library(crosscompileutil STATIC ${crosscompileutil_SRCS})
|
||||
|
115
util/cross_compile.cpp
Normal file
115
util/cross_compile.cpp
Normal file
@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "cross_compile.h"
|
||||
#include "src/ue2common.h"
|
||||
#include "src/hs_compile.h"
|
||||
#include "src/util/make_unique.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct XcompileMode {
|
||||
const char *name;
|
||||
unsigned long long cpu_features;
|
||||
};
|
||||
|
||||
static const XcompileMode xcompile_options[] = {
|
||||
{ "avx2", HS_CPU_FEATURES_AVX2 },
|
||||
{ "base", 0 },
|
||||
};
|
||||
|
||||
unique_ptr<hs_platform_info> xcompileReadMode(const char *s) {
|
||||
hs_platform_info rv;
|
||||
UNUSED hs_error_t err;
|
||||
err = hs_populate_platform(&rv);
|
||||
assert(!err);
|
||||
|
||||
string str(s);
|
||||
string mode = str.substr(0, str.find(":"));
|
||||
string opt = str.substr(str.find(":")+1, str.npos);
|
||||
bool found_mode = false;
|
||||
|
||||
if (!opt.empty()) {
|
||||
const size_t numOpts = ARRAY_LENGTH(xcompile_options);
|
||||
for (size_t i = 0; i < numOpts; i++) {
|
||||
if (opt.compare(xcompile_options[i].name) == 0) {
|
||||
DEBUG_PRINTF("found opt %zu:%llu\n", i,
|
||||
xcompile_options[i].cpu_features);
|
||||
rv.cpu_features = xcompile_options[i].cpu_features;
|
||||
found_mode = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_mode) {
|
||||
return nullptr;
|
||||
} else {
|
||||
DEBUG_PRINTF("cpu_features %llx\n", rv.cpu_features);
|
||||
return ue2::make_unique<hs_platform_info>(rv);
|
||||
}
|
||||
}
|
||||
|
||||
string to_string(const hs_platform_info &p) {
|
||||
ostringstream out;
|
||||
if (p.tune) {
|
||||
out << p.tune;
|
||||
}
|
||||
|
||||
if (p.cpu_features) {
|
||||
u64a features = p.cpu_features;
|
||||
if (features & HS_CPU_FEATURES_AVX2) {
|
||||
out << " avx2";
|
||||
features &= ~HS_CPU_FEATURES_AVX2;
|
||||
}
|
||||
|
||||
if (features) {
|
||||
out << " " << "?cpu_features?:" << features;
|
||||
}
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
string xcompileUsage(void) {
|
||||
string variants = "Instruction set options: ";
|
||||
const size_t numOpts = ARRAY_LENGTH(xcompile_options);
|
||||
for (size_t i = 0; i < numOpts; i++) {
|
||||
variants += xcompile_options[i].name;
|
||||
if (i + 1 != numOpts) {
|
||||
variants += ", ";
|
||||
}
|
||||
}
|
||||
|
||||
return variants;
|
||||
}
|
42
util/cross_compile.h
Normal file
42
util/cross_compile.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CROSS_COMPILE_H
|
||||
#define CROSS_COMPILE_H
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
struct hs_platform_info;
|
||||
|
||||
std::unique_ptr<hs_platform_info> xcompileReadMode(const char *s);
|
||||
std::string xcompileUsage(void);
|
||||
|
||||
std::string to_string(const hs_platform_info &p);
|
||||
|
||||
#endif /* CROSS_COMPILE_H */
|
155
util/database_util.cpp
Normal file
155
util/database_util.cpp
Normal file
@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "database_util.h"
|
||||
|
||||
#include "hs_common.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#if defined(HAVE_MMAP)
|
||||
#include <sys/mman.h> // for mmap
|
||||
#include <unistd.h> // for close
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool saveDatabase(const hs_database_t *db, const char *filename, bool verbose) {
|
||||
assert(db);
|
||||
assert(filename);
|
||||
|
||||
if (verbose) {
|
||||
cout << "Saving database to: " << filename << endl;
|
||||
}
|
||||
|
||||
char *bytes = nullptr;
|
||||
size_t length = 0;
|
||||
hs_error_t err = hs_serialize_database(db, &bytes, &length);
|
||||
if (err != HS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(bytes);
|
||||
assert(length > 0);
|
||||
|
||||
ofstream out(filename, ios::binary);
|
||||
out.write(bytes, length);
|
||||
out.close();
|
||||
|
||||
::free(bytes);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
hs_database_t * loadDatabase(const char *filename, bool verbose) {
|
||||
assert(filename);
|
||||
|
||||
if (verbose) {
|
||||
cout << "Loading database from: " << filename << endl;
|
||||
}
|
||||
|
||||
char *bytes = nullptr;
|
||||
|
||||
#if defined(HAVE_MMAP)
|
||||
// Use mmap to read the file
|
||||
int fd = open(filename, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
struct stat st;
|
||||
if (fstat(fd, &st) < 0) {
|
||||
close(fd);
|
||||
return nullptr;
|
||||
}
|
||||
size_t len = st.st_size;
|
||||
|
||||
bytes = (char *)mmap(nullptr, len, PROT_READ, MAP_SHARED, fd, 0);
|
||||
if (bytes == MAP_FAILED) {
|
||||
cout << "mmap failed" << endl;
|
||||
close(fd);
|
||||
return nullptr;
|
||||
}
|
||||
#else
|
||||
// Fall back on stream IO
|
||||
ifstream is;
|
||||
is.open(filename, ios::in | ios::binary);
|
||||
if (!is.is_open()) {
|
||||
return nullptr;
|
||||
}
|
||||
is.seekg(0, ios::end);
|
||||
size_t len = is.tellg();
|
||||
if (verbose) {
|
||||
cout << "Reading " << len << " bytes" << endl;
|
||||
}
|
||||
is.seekg(0, ios::beg);
|
||||
bytes = new char[len];
|
||||
is.read(bytes, len);
|
||||
is.close();
|
||||
#endif
|
||||
|
||||
assert(bytes);
|
||||
|
||||
if (verbose) {
|
||||
char *info = nullptr;
|
||||
hs_error_t err = hs_serialized_database_info(bytes, len, &info);
|
||||
if (err) {
|
||||
cout << "Unable to decode serialized database info: " << err
|
||||
<< endl;
|
||||
} else if (info) {
|
||||
cout << "Serialized database info: " << info << endl;
|
||||
std::free(info);
|
||||
} else {
|
||||
cout << "Unable to decode serialized database info." << endl;
|
||||
}
|
||||
}
|
||||
|
||||
hs_database_t *db = nullptr;
|
||||
hs_error_t err = hs_deserialize_database(bytes, len, &db);
|
||||
|
||||
#if defined(HAVE_MMAP)
|
||||
munmap(bytes, len);
|
||||
close(fd);
|
||||
#else
|
||||
delete [] bytes;
|
||||
#endif
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
cout << "hs_deserialize_database call failed: " << err << endl;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
assert(db);
|
||||
|
||||
return db;
|
||||
}
|
39
util/database_util.h
Normal file
39
util/database_util.h
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DATABASE_UTIL_H
|
||||
#define DATABASE_UTIL_H
|
||||
|
||||
struct hs_database;
|
||||
|
||||
bool saveDatabase(const hs_database *db, const char *filename,
|
||||
bool verbose = false);
|
||||
|
||||
hs_database *loadDatabase(const char *filename, bool verbose = false);
|
||||
|
||||
#endif /* DATABASE_UTIL_H */
|
107
util/expression_path.h
Normal file
107
util/expression_path.h
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef EXPRESSION_PATH_H
|
||||
#define EXPRESSION_PATH_H
|
||||
|
||||
#include "ue2common.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <sys/stat.h>
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#include <libgen.h>
|
||||
#endif
|
||||
|
||||
//
|
||||
// Utility functions
|
||||
//
|
||||
|
||||
/**
|
||||
* Given a path to a signature file, infer the path of the pcre directory.
|
||||
*/
|
||||
static inline
|
||||
std::string inferExpressionPath(const std::string &sigFile) {
|
||||
#ifndef _WIN32
|
||||
// POSIX variant.
|
||||
|
||||
// dirname() may modify its argument, so we must make a copy.
|
||||
std::vector<char> path(sigFile.size() + 1);
|
||||
memcpy(path.data(), sigFile.c_str(), sigFile.size());
|
||||
path[sigFile.size()] = 0; // ensure null termination.
|
||||
|
||||
std::string rv = dirname(path.data());
|
||||
#else
|
||||
// Windows variant.
|
||||
if (sigFile.size() >= _MAX_DIR) {
|
||||
return std::string();
|
||||
}
|
||||
char path[_MAX_DIR];
|
||||
_splitpath(sigFile.c_str(), nullptr, path, nullptr, nullptr);
|
||||
std::string rv(path);
|
||||
#endif
|
||||
|
||||
rv += "/../pcre";
|
||||
return rv;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define stat _stat
|
||||
#define S_IFREG _S_IFREG
|
||||
#endif
|
||||
|
||||
static inline
|
||||
bool isDir(const std::string &filename) {
|
||||
struct stat s;
|
||||
|
||||
if (stat(filename.c_str(), &s) == -1) {
|
||||
std::cerr << "stat: " << strerror(errno) << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return (S_IFDIR & s.st_mode);
|
||||
}
|
||||
|
||||
static inline
|
||||
bool isFile(const std::string &filename) {
|
||||
struct stat s;
|
||||
|
||||
if (stat(filename.c_str(), &s) == -1) {
|
||||
std::cerr << "stat: " << strerror(errno) << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return (S_IFREG & s.st_mode);
|
||||
}
|
||||
|
||||
#endif /* EXPRESSION_PATH_H */
|
Loading…
x
Reference in New Issue
Block a user