mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
hsbench: add Hyperscan benchmarker
The hsbench tool provides an easy way to measure Hyperscan's performance for a particular set of patterns and corpus of data to be scanned.
This commit is contained in:
parent
06cde4c94d
commit
f626276271
53
cmake/sqlite3.cmake
Normal file
53
cmake/sqlite3.cmake
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
#
|
||||||
|
# a lot of noise to find sqlite
|
||||||
|
#
|
||||||
|
|
||||||
|
option(SQLITE_PREFER_STATIC "Build sqlite3 statically instead of using an installed lib" OFF)
|
||||||
|
|
||||||
|
if(NOT WIN32 AND NOT SQLITE_PREFER_STATIC)
|
||||||
|
find_package(PkgConfig QUIET)
|
||||||
|
|
||||||
|
# first check for sqlite on the system
|
||||||
|
pkg_check_modules(SQLITE3 sqlite3)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (NOT SQLITE3_FOUND)
|
||||||
|
message(STATUS "looking for sqlite3 in source tree")
|
||||||
|
# look in the source tree
|
||||||
|
if (EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.h" AND
|
||||||
|
EXISTS "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
|
||||||
|
message(STATUS " found sqlite3 in source tree")
|
||||||
|
set(SQLITE3_FOUND TRUE)
|
||||||
|
set(SQLITE3_BUILD_SOURCE TRUE)
|
||||||
|
set(SQLITE3_INCLUDE_DIRS "${PROJECT_SOURCE_DIR}/sqlite3")
|
||||||
|
set(SQLITE3_LDFLAGS sqlite3_static)
|
||||||
|
else()
|
||||||
|
message(FATAL_ERROR " no sqlite3 in source tree")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# now do version checks
|
||||||
|
if (SQLITE3_FOUND)
|
||||||
|
list(INSERT CMAKE_REQUIRED_INCLUDES 0 "${SQLITE3_INCLUDE_DIRS}")
|
||||||
|
CHECK_C_SOURCE_COMPILES("#include <sqlite3.h>\n#if SQLITE_VERSION_NUMBER >= 3008007 && SQLITE_VERSION_NUMBER < 3008010\n#error broken sqlite\n#endif\nint main() {return 0;}" SQLITE_VERSION_OK)
|
||||||
|
if (NOT SQLITE_VERSION_OK)
|
||||||
|
message(FATAL_ERROR "sqlite3 is broken from 3.8.7 to 3.8.10 - please find a working version")
|
||||||
|
endif()
|
||||||
|
if (NOT SQLITE3_BUILD_SOURCE)
|
||||||
|
set(_SAVED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||||
|
list(INSERT CMAKE_REQUIRED_LIBRARIES 0 ${SQLITE3_LDFLAGS})
|
||||||
|
CHECK_SYMBOL_EXISTS(sqlite3_open_v2 sqlite3.h HAVE_SQLITE3_OPEN_V2)
|
||||||
|
list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES "${SQLITE3_INCLUDE_DIRS}")
|
||||||
|
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${SQLITE3_LDFLAGS})
|
||||||
|
else()
|
||||||
|
if (NOT TARGET sqlite3_static)
|
||||||
|
# build sqlite as a static lib to compile into our test programs
|
||||||
|
add_library(sqlite3_static STATIC "${PROJECT_SOURCE_DIR}/sqlite3/sqlite3.c")
|
||||||
|
if (NOT WIN32)
|
||||||
|
set_target_properties(sqlite3_static PROPERTIES COMPILE_FLAGS "-Wno-unused -Wno-cast-qual -DSQLITE_OMIT_LOAD_EXTENSION")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# that's enough about sqlite
|
19
tools/CMakeLists.txt
Normal file
19
tools/CMakeLists.txt
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
find_package(Threads)
|
||||||
|
|
||||||
|
# remove some warnings
|
||||||
|
if(CMAKE_CXX_FLAGS MATCHES "-Wmissing-declarations" )
|
||||||
|
string(REPLACE "-Wmissing-declarations" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
include_directories(${PROJECT_SOURCE_DIR})
|
||||||
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||||
|
include_directories(${PROJECT_SOURCE_DIR}/util)
|
||||||
|
|
||||||
|
# add any subdir with a cmake file
|
||||||
|
file(GLOB dirents RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
|
||||||
|
foreach(e ${dirents})
|
||||||
|
if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${e} AND
|
||||||
|
EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${e}/CMakeLists.txt)
|
||||||
|
add_subdirectory(${e})
|
||||||
|
endif ()
|
||||||
|
endforeach ()
|
36
tools/hsbench/CMakeLists.txt
Normal file
36
tools/hsbench/CMakeLists.txt
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
include (${CMAKE_MODULE_PATH}/sqlite3.cmake)
|
||||||
|
|
||||||
|
if (NOT XCODE)
|
||||||
|
include_directories(SYSTEM ${SQLITE3_INCLUDE_DIRS})
|
||||||
|
else()
|
||||||
|
# cmake doesn't think Xcode supports isystem
|
||||||
|
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -isystem ${SQLITE3_INCLUDE_DIRS}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
CHECK_FUNCTION_EXISTS(malloc_info HAVE_MALLOC_INFO)
|
||||||
|
CHECK_FUNCTION_EXISTS(shmget HAVE_SHMGET)
|
||||||
|
set(HAVE_SHMGET ${HAVE_SHMGET} CACHE BOOL "shmget()")
|
||||||
|
|
||||||
|
# only set these after all tests are done
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||||
|
|
||||||
|
|
||||||
|
SET(hsbench_SOURCES
|
||||||
|
common.h
|
||||||
|
data_corpus.cpp
|
||||||
|
data_corpus.h
|
||||||
|
engine_hyperscan.cpp
|
||||||
|
engine_hyperscan.h
|
||||||
|
heapstats.cpp
|
||||||
|
heapstats.h
|
||||||
|
huge.cpp
|
||||||
|
huge.h
|
||||||
|
main.cpp
|
||||||
|
thread_barrier.h
|
||||||
|
timer.h
|
||||||
|
)
|
||||||
|
|
||||||
|
add_executable(hsbench ${hsbench_SOURCES})
|
||||||
|
target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS}
|
||||||
|
${CMAKE_THREAD_LIBS_INIT})
|
8
tools/hsbench/README.md
Normal file
8
tools/hsbench/README.md
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
Hyperscan Benchmarker: hsbench
|
||||||
|
==============================
|
||||||
|
|
||||||
|
The `hsbench` tool provides an easy way to measure Hyperscan's performance
|
||||||
|
for a particular set of patterns and corpus of data to be scanned.
|
||||||
|
|
||||||
|
Documentation describing its operation is available in the Tools section of the
|
||||||
|
[Developer Reference Guide](http://01org.github.io/hyperscan/dev-reference/).
|
42
tools/hsbench/common.h
Normal file
42
tools/hsbench/common.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef COMMON_H
|
||||||
|
#define COMMON_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum class ScanMode { BLOCK, STREAMING, VECTORED };
|
||||||
|
|
||||||
|
extern bool echo_matches;
|
||||||
|
extern bool saveDatabases;
|
||||||
|
extern bool loadDatabases;
|
||||||
|
extern std::string serializePath;
|
||||||
|
extern unsigned int somPrecisionMode;
|
||||||
|
|
||||||
|
#endif // COMMON_H
|
133
tools/hsbench/data_corpus.cpp
Normal file
133
tools/hsbench/data_corpus.cpp
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "data_corpus.h"
|
||||||
|
|
||||||
|
#include "util/container.h"
|
||||||
|
#include "ue2common.h"
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <map>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <sqlite3.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace ue2;
|
||||||
|
|
||||||
|
static
|
||||||
|
void readRow(sqlite3_stmt *statement, vector<DataBlock> &blocks,
|
||||||
|
map<unsigned int, unsigned int> &stream_indices) {
|
||||||
|
unsigned int id = sqlite3_column_int(statement, 0);
|
||||||
|
unsigned int stream_id = sqlite3_column_int(statement, 1);
|
||||||
|
const char *blob = (const char *)sqlite3_column_blob(statement, 2);
|
||||||
|
unsigned int bytes = sqlite3_column_bytes(statement, 2);
|
||||||
|
|
||||||
|
if (!contains(stream_indices, stream_id)) {
|
||||||
|
unsigned int internal_stream_index = stream_indices.size();
|
||||||
|
stream_indices[stream_id] = internal_stream_index;
|
||||||
|
}
|
||||||
|
auto internal_stream_index = stream_indices[stream_id];
|
||||||
|
|
||||||
|
assert(blob || bytes > 0);
|
||||||
|
blocks.emplace_back(id, stream_id, internal_stream_index,
|
||||||
|
string(blob, blob + bytes));
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<DataBlock> readCorpus(const string &filename) {
|
||||||
|
int status;
|
||||||
|
sqlite3 *db = nullptr;
|
||||||
|
|
||||||
|
status = sqlite3_open_v2(filename.c_str(), &db, SQLITE_OPEN_READONLY,
|
||||||
|
nullptr);
|
||||||
|
|
||||||
|
assert(db);
|
||||||
|
if (status != SQLITE_OK) {
|
||||||
|
ostringstream err;
|
||||||
|
err << "Unable to open database '" << filename << "': "
|
||||||
|
<< sqlite3_errmsg(db);
|
||||||
|
status = sqlite3_close(db);
|
||||||
|
assert(status == SQLITE_OK);
|
||||||
|
throw DataCorpusError(err.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
static const string query("SELECT id, stream_id, data "
|
||||||
|
"FROM chunk ORDER BY id;");
|
||||||
|
|
||||||
|
sqlite3_stmt *statement = nullptr;
|
||||||
|
|
||||||
|
status = sqlite3_prepare_v2(db, query.c_str(), query.size(), &statement,
|
||||||
|
nullptr);
|
||||||
|
if (status != SQLITE_OK) {
|
||||||
|
status = sqlite3_finalize(statement);
|
||||||
|
assert(status == SQLITE_OK);
|
||||||
|
status = sqlite3_close(db);
|
||||||
|
assert(status == SQLITE_OK);
|
||||||
|
|
||||||
|
ostringstream oss;
|
||||||
|
oss << "Query failed: " << query;
|
||||||
|
throw DataCorpusError(oss.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<DataBlock> blocks;
|
||||||
|
map<unsigned int, unsigned int> stream_indices;
|
||||||
|
|
||||||
|
status = sqlite3_step(statement);
|
||||||
|
while (status == SQLITE_ROW) {
|
||||||
|
readRow(statement, blocks, stream_indices);
|
||||||
|
status = sqlite3_step(statement);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (status != SQLITE_DONE) {
|
||||||
|
ostringstream oss;
|
||||||
|
oss << "Error retrieving blocks from corpus: "
|
||||||
|
<< sqlite3_errstr(status);
|
||||||
|
|
||||||
|
status = sqlite3_finalize(statement);
|
||||||
|
assert(status == SQLITE_OK);
|
||||||
|
status = sqlite3_close(db);
|
||||||
|
assert(status == SQLITE_OK);
|
||||||
|
|
||||||
|
throw DataCorpusError(oss.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
status = sqlite3_finalize(statement);
|
||||||
|
assert(status == SQLITE_OK);
|
||||||
|
status = sqlite3_close(db);
|
||||||
|
assert(status == SQLITE_OK);
|
||||||
|
|
||||||
|
if (blocks.empty()) {
|
||||||
|
throw DataCorpusError("Database contains no blocks.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return blocks;
|
||||||
|
}
|
63
tools/hsbench/data_corpus.h
Normal file
63
tools/hsbench/data_corpus.h
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef DATACORPUS_H
|
||||||
|
#define DATACORPUS_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
class DataBlock {
|
||||||
|
public:
|
||||||
|
DataBlock(unsigned int in_id, unsigned int in_stream,
|
||||||
|
unsigned int int_stream_index_in, std::string in_data)
|
||||||
|
: id(in_id), stream_id(in_stream),
|
||||||
|
internal_stream_index(int_stream_index_in),
|
||||||
|
payload(std::move(in_data)) {}
|
||||||
|
|
||||||
|
unsigned int id; // unique block identifier
|
||||||
|
unsigned int stream_id; // unique stream identifier (from corpus file)
|
||||||
|
unsigned int internal_stream_index; /* dense index for this stream
|
||||||
|
* (allocated by hsbench) */
|
||||||
|
std::string payload; // actual block payload
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Exception thrown if an error occurs. */
|
||||||
|
class DataCorpusError {
|
||||||
|
public:
|
||||||
|
explicit DataCorpusError(std::string msg_in) : msg(std::move(msg_in)) {}
|
||||||
|
std::string msg;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface to a corpus database. Any error will produce a DataCorpusError
|
||||||
|
* and should be considered fatal.
|
||||||
|
*/
|
||||||
|
std::vector<DataBlock> readCorpus(const std::string &filename);
|
||||||
|
|
||||||
|
#endif // DATACORPUS_H
|
411
tools/hsbench/engine_hyperscan.cpp
Normal file
411
tools/hsbench/engine_hyperscan.cpp
Normal file
@ -0,0 +1,411 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "ExpressionParser.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "engine_hyperscan.h"
|
||||||
|
#include "expressions.h"
|
||||||
|
#include "heapstats.h"
|
||||||
|
#include "huge.h"
|
||||||
|
#include "timer.h"
|
||||||
|
|
||||||
|
#include "crc32.h"
|
||||||
|
#include "database.h"
|
||||||
|
#include "hs_compile.h"
|
||||||
|
#include "hs_internal.h"
|
||||||
|
#include "hs_runtime.h"
|
||||||
|
#include "util/database_util.h"
|
||||||
|
#include "util/make_unique.h"
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
EngineContext::EngineContext(const hs_database_t *db) {
|
||||||
|
hs_alloc_scratch(db, &scratch);
|
||||||
|
assert(scratch);
|
||||||
|
}
|
||||||
|
|
||||||
|
EngineContext::~EngineContext() {
|
||||||
|
hs_free_scratch(scratch);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace /* anonymous */ {
|
||||||
|
|
||||||
|
/** Scan context structure passed to the onMatch callback function. */
|
||||||
|
struct ScanContext {
|
||||||
|
ScanContext(unsigned id_in, ResultEntry &result_in,
|
||||||
|
const EngineStream *stream_in)
|
||||||
|
: id(id_in), result(result_in), stream(stream_in) {}
|
||||||
|
unsigned id;
|
||||||
|
ResultEntry &result;
|
||||||
|
const EngineStream *stream; // nullptr except in streaming mode.
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback function called for every match that Hyperscan produces, used when
|
||||||
|
* "echo matches" is off.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||||
|
void *ctx) {
|
||||||
|
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||||
|
assert(sc);
|
||||||
|
sc->result.matches++;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback function called for every match that Hyperscan produces when "echo
|
||||||
|
* matches" is enabled.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||||
|
unsigned int, void *ctx) {
|
||||||
|
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||||
|
assert(sc);
|
||||||
|
sc->result.matches++;
|
||||||
|
|
||||||
|
if (sc->stream) {
|
||||||
|
printf("Match @%u:%u:%llu for %u\n", sc->stream->sn, sc->id, to, id);
|
||||||
|
} else {
|
||||||
|
printf("Match @%u:%llu for %u\n", sc->id, to, id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
EngineHyperscan::EngineHyperscan(hs_database_t *db_in) : db(db_in) {
|
||||||
|
assert(db);
|
||||||
|
}
|
||||||
|
|
||||||
|
EngineHyperscan::~EngineHyperscan() {
|
||||||
|
release_huge(db);
|
||||||
|
}
|
||||||
|
|
||||||
|
unique_ptr<EngineContext> EngineHyperscan::makeContext() const {
|
||||||
|
return ue2::make_unique<EngineContext>(db);
|
||||||
|
}
|
||||||
|
|
||||||
|
void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
|
||||||
|
ResultEntry &result, EngineContext &ctx) const {
|
||||||
|
assert(data);
|
||||||
|
|
||||||
|
ScanContext sc(id, result, nullptr);
|
||||||
|
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||||
|
hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc);
|
||||||
|
|
||||||
|
if (rv != HS_SUCCESS) {
|
||||||
|
printf("Fatal error: hs_scan returned error %d\n", rv);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void EngineHyperscan::scan_vectored(const char *const *data,
|
||||||
|
const unsigned int *len, unsigned int count,
|
||||||
|
unsigned streamId, ResultEntry &result,
|
||||||
|
EngineContext &ctx) const {
|
||||||
|
assert(data);
|
||||||
|
assert(len);
|
||||||
|
|
||||||
|
ScanContext sc(streamId, result, nullptr);
|
||||||
|
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||||
|
hs_error_t rv =
|
||||||
|
hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc);
|
||||||
|
|
||||||
|
if (rv != HS_SUCCESS) {
|
||||||
|
printf("Fatal error: hs_scan_vector returned error %d\n", rv);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
|
||||||
|
unsigned streamId) const {
|
||||||
|
auto stream = ue2::make_unique<EngineStream>();
|
||||||
|
stream->ctx = &ctx;
|
||||||
|
|
||||||
|
hs_open_stream(db, 0, &stream->id);
|
||||||
|
if (!stream->id) {
|
||||||
|
// an error occurred, propagate to caller
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
stream->sn = streamId;
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream,
|
||||||
|
ResultEntry &result) const {
|
||||||
|
assert(stream);
|
||||||
|
|
||||||
|
auto &s = static_cast<EngineStream &>(*stream);
|
||||||
|
EngineContext &ctx = *s.ctx;
|
||||||
|
|
||||||
|
ScanContext sc(0, result, &s);
|
||||||
|
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||||
|
|
||||||
|
assert(s.id);
|
||||||
|
hs_close_stream(s.id, ctx.scratch, callback, &sc);
|
||||||
|
s.id = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
|
||||||
|
unsigned len, unsigned id,
|
||||||
|
ResultEntry &result) const {
|
||||||
|
assert(data);
|
||||||
|
|
||||||
|
auto &s = static_cast<EngineStream &>(stream);
|
||||||
|
EngineContext &ctx = *s.ctx;
|
||||||
|
|
||||||
|
ScanContext sc(id, result, &s);
|
||||||
|
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||||
|
hs_error_t rv =
|
||||||
|
hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc);
|
||||||
|
|
||||||
|
if (rv != HS_SUCCESS) {
|
||||||
|
printf("Fatal error: hs_scan_stream returned error %d\n", rv);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
unsigned makeModeFlags(ScanMode scan_mode) {
|
||||||
|
switch (scan_mode) {
|
||||||
|
case ScanMode::BLOCK:
|
||||||
|
return HS_MODE_BLOCK;
|
||||||
|
case ScanMode::STREAMING:
|
||||||
|
return HS_MODE_STREAM;
|
||||||
|
case ScanMode::VECTORED:
|
||||||
|
return HS_MODE_VECTORED;
|
||||||
|
}
|
||||||
|
assert(0);
|
||||||
|
return HS_MODE_STREAM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hash the settings used to compile a database, returning a string that can be
|
||||||
|
* used as a filename.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
string dbSettingsHash(const string &filename, u32 mode) {
|
||||||
|
ostringstream info_oss;
|
||||||
|
|
||||||
|
info_oss << filename.c_str() << ' ';
|
||||||
|
info_oss << mode << ' ';
|
||||||
|
|
||||||
|
string info = info_oss.str();
|
||||||
|
|
||||||
|
u32 crc = Crc32c_ComputeBuf(0, info.data(), info.size());
|
||||||
|
|
||||||
|
// return STL string with printable version of digest
|
||||||
|
ostringstream oss;
|
||||||
|
oss << hex << setw(8) << setfill('0') << crc << dec;
|
||||||
|
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
string dbFilename(const std::string &name, unsigned mode) {
|
||||||
|
ostringstream oss;
|
||||||
|
oss << serializePath << '/' << dbSettingsHash(name, mode) << ".db";
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<EngineHyperscan>
|
||||||
|
buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
|
||||||
|
const std::string &name, UNUSED const ue2::Grey &grey) {
|
||||||
|
if (expressions.empty()) {
|
||||||
|
assert(0);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
long double compileSecs = 0.0;
|
||||||
|
size_t compiledSize = 0.0;
|
||||||
|
size_t streamSize = 0;
|
||||||
|
size_t scratchSize = 0;
|
||||||
|
unsigned int peakMemorySize = 0;
|
||||||
|
unsigned int crc = 0;
|
||||||
|
std::string db_info;
|
||||||
|
|
||||||
|
unsigned int mode = makeModeFlags(scan_mode);
|
||||||
|
|
||||||
|
hs_database_t *db;
|
||||||
|
hs_error_t err;
|
||||||
|
|
||||||
|
if (loadDatabases) {
|
||||||
|
db = loadDatabase(dbFilename(name, mode).c_str());
|
||||||
|
if (!db) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
const unsigned int count = expressions.size();
|
||||||
|
|
||||||
|
vector<string> exprs;
|
||||||
|
vector<unsigned int> flags, ids;
|
||||||
|
vector<hs_expr_ext> ext;
|
||||||
|
|
||||||
|
for (const auto &m : expressions) {
|
||||||
|
string expr;
|
||||||
|
unsigned int f = 0;
|
||||||
|
hs_expr_ext extparam;
|
||||||
|
extparam.flags = 0;
|
||||||
|
if (!readExpression(m.second, expr, &f, &extparam)) {
|
||||||
|
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
|
||||||
|
m.first);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
exprs.push_back(expr);
|
||||||
|
ids.push_back(m.first);
|
||||||
|
flags.push_back(f);
|
||||||
|
ext.push_back(extparam);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned full_mode = mode;
|
||||||
|
if (mode == HS_MODE_STREAM) {
|
||||||
|
full_mode |= somPrecisionMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Our compiler takes an array of plain ol' C strings.
|
||||||
|
vector<const char *> patterns(count);
|
||||||
|
for (unsigned int i = 0; i < count; i++) {
|
||||||
|
patterns[i] = exprs[i].c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extended parameters are passed as pointers to hs_expr_ext structures.
|
||||||
|
vector<const hs_expr_ext *> ext_ptr(count);
|
||||||
|
for (unsigned int i = 0; i < count; i++) {
|
||||||
|
ext_ptr[i] = &ext[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
Timer timer;
|
||||||
|
timer.start();
|
||||||
|
|
||||||
|
hs_compile_error_t *compile_err;
|
||||||
|
|
||||||
|
#ifndef RELEASE_BUILD
|
||||||
|
err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(),
|
||||||
|
ext_ptr.data(), count, full_mode, nullptr,
|
||||||
|
&db, &compile_err, grey);
|
||||||
|
#else
|
||||||
|
err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(),
|
||||||
|
ext_ptr.data(), count, full_mode, nullptr,
|
||||||
|
&db, &compile_err);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
timer.complete();
|
||||||
|
compileSecs = timer.seconds();
|
||||||
|
peakMemorySize = getPeakHeap();
|
||||||
|
|
||||||
|
if (err == HS_COMPILER_ERROR) {
|
||||||
|
if (compile_err->expression >= 0) {
|
||||||
|
printf("Compile error for signature #%u: %s\n",
|
||||||
|
compile_err->expression, compile_err->message);
|
||||||
|
} else {
|
||||||
|
printf("Compile error: %s\n", compile_err->message);
|
||||||
|
}
|
||||||
|
hs_free_compile_error(compile_err);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy the db into huge pages (where available) to reduce TLB pressure
|
||||||
|
db = get_huge(db);
|
||||||
|
if (!db) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = hs_database_size(db, &compiledSize);
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
assert(compiledSize > 0);
|
||||||
|
|
||||||
|
crc = db->crc32;
|
||||||
|
|
||||||
|
if (saveDatabases) {
|
||||||
|
saveDatabase(db, dbFilename(name, mode).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mode & HS_MODE_STREAM) {
|
||||||
|
err = hs_stream_size(db, &streamSize);
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
streamSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *info;
|
||||||
|
err = hs_database_info(db, &info);
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
db_info = string(info);
|
||||||
|
free(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate scratch temporarily to find its size: this is a good test
|
||||||
|
// anyway.
|
||||||
|
hs_scratch_t *scratch = nullptr;
|
||||||
|
err = hs_alloc_scratch(db, &scratch);
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = hs_scratch_size(scratch, &scratchSize);
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
hs_free_scratch(scratch);
|
||||||
|
|
||||||
|
// Output summary information.
|
||||||
|
printf("Signatures: %s\n", name.c_str());
|
||||||
|
printf("Hyperscan info: %s\n", db_info.c_str());
|
||||||
|
printf("Expression count: %'zu\n", expressions.size());
|
||||||
|
printf("Bytecode size: %'zu bytes\n", compiledSize);
|
||||||
|
printf("Database CRC: 0x%x\n", crc);
|
||||||
|
if (mode & HS_MODE_STREAM) {
|
||||||
|
printf("Stream state size: %'zu bytes\n", streamSize);
|
||||||
|
}
|
||||||
|
printf("Scratch size: %'zu bytes\n", scratchSize);
|
||||||
|
printf("Compile time: %'0.3Lf seconds\n", compileSecs);
|
||||||
|
printf("Peak heap usage: %'u bytes\n", peakMemorySize);
|
||||||
|
|
||||||
|
return ue2::make_unique<EngineHyperscan>(db);
|
||||||
|
}
|
97
tools/hsbench/engine_hyperscan.h
Normal file
97
tools/hsbench/engine_hyperscan.h
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ENGINEHYPERSCAN_H
|
||||||
|
#define ENGINEHYPERSCAN_H
|
||||||
|
|
||||||
|
#include "expressions.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "hs_runtime.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
/** Structure for the result of a single complete scan. */
|
||||||
|
struct ResultEntry {
|
||||||
|
double seconds = 0; //!< Time taken for scan.
|
||||||
|
unsigned int matches = 0; //!< Count of matches found.
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Engine context which is allocated on a per-thread basis. */
|
||||||
|
class EngineContext {
|
||||||
|
public:
|
||||||
|
explicit EngineContext(const hs_database_t *db);
|
||||||
|
~EngineContext();
|
||||||
|
|
||||||
|
hs_scratch_t *scratch = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Streaming mode scans have persistent stream state associated with them. */
|
||||||
|
class EngineStream {
|
||||||
|
public:
|
||||||
|
hs_stream_t *id;
|
||||||
|
unsigned int sn;
|
||||||
|
EngineContext *ctx;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Hyperscan Engine for scanning data. */
|
||||||
|
class EngineHyperscan {
|
||||||
|
public:
|
||||||
|
explicit EngineHyperscan(hs_database_t *db);
|
||||||
|
~EngineHyperscan();
|
||||||
|
|
||||||
|
std::unique_ptr<EngineContext> makeContext() const;
|
||||||
|
|
||||||
|
void scan(const char *data, unsigned int len, unsigned int id,
|
||||||
|
ResultEntry &result, EngineContext &ctx) const;
|
||||||
|
|
||||||
|
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||||
|
unsigned int count, unsigned int streamId,
|
||||||
|
ResultEntry &result, EngineContext &ctx) const;
|
||||||
|
|
||||||
|
std::unique_ptr<EngineStream> streamOpen(EngineContext &ctx,
|
||||||
|
unsigned id) const;
|
||||||
|
|
||||||
|
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||||
|
ResultEntry &result) const;
|
||||||
|
|
||||||
|
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||||
|
unsigned int id, ResultEntry &result) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
hs_database_t *db;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace ue2 {
|
||||||
|
struct Grey;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<EngineHyperscan>
|
||||||
|
buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
|
||||||
|
const std::string &name, const ue2::Grey &grey);
|
||||||
|
|
||||||
|
#endif // ENGINEHYPERSCAN_H
|
146
tools/hsbench/heapstats.cpp
Normal file
146
tools/hsbench/heapstats.cpp
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** \file
|
||||||
|
* \brief Peak heap usage code.
|
||||||
|
*
|
||||||
|
* At present, we only have an implementation for modern glibc systems, using
|
||||||
|
* the malloc_info() call. We return zero elsewhere.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "heapstats.h"
|
||||||
|
|
||||||
|
#if defined HAVE_MALLOC_INFO
|
||||||
|
|
||||||
|
#include <cerrno>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
#include <malloc.h>
|
||||||
|
|
||||||
|
size_t getPeakHeap(void) {
|
||||||
|
FILE *tmpf = tmpfile();
|
||||||
|
if (!tmpf) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int rv = malloc_info(0, tmpf);
|
||||||
|
if (rv != 0) {
|
||||||
|
fclose(tmpf);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
rewind(tmpf);
|
||||||
|
|
||||||
|
// We don't want to depend on a real XML parser. This is ugly and brittle
|
||||||
|
// and hopefully good enough for the time being. We look for the last
|
||||||
|
// system tag with type max, which should be the malloc-wide one.
|
||||||
|
|
||||||
|
static const char begin[] = "<system type=\"max\" size=\"";
|
||||||
|
const size_t begin_len = strlen(begin);
|
||||||
|
|
||||||
|
char *line = nullptr;
|
||||||
|
size_t len = 0, maxheap = 0;
|
||||||
|
ssize_t read;
|
||||||
|
|
||||||
|
while ((read = getline(&line, &len, tmpf)) != -1) {
|
||||||
|
if (strncmp(line, begin, begin_len) == 0) {
|
||||||
|
errno = 0;
|
||||||
|
maxheap = (size_t)strtoull(line + begin_len, nullptr, 10);
|
||||||
|
if (errno != 0) {
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
finish:
|
||||||
|
free(line);
|
||||||
|
fclose(tmpf);
|
||||||
|
return maxheap;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined __linux
|
||||||
|
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
size_t getPeakHeap(void) {
|
||||||
|
// Modern Linux kernels write a 'VmPeak' value into /proc/$PID/status. This
|
||||||
|
// is a reasonable approximation, though it likely includes shared libs and
|
||||||
|
// the like as well...
|
||||||
|
ostringstream path;
|
||||||
|
path << "/proc/" << getpid() << "/status";
|
||||||
|
|
||||||
|
ifstream f(path.str().c_str());
|
||||||
|
if (!f.good()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const string vmpeak("VmPeak:");
|
||||||
|
|
||||||
|
string line;
|
||||||
|
while (getline(f, line)) {
|
||||||
|
istringstream iss(line, istringstream::in);
|
||||||
|
string word;
|
||||||
|
iss >> word;
|
||||||
|
if (word != vmpeak) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip spaces
|
||||||
|
while (iss.good() && !isdigit(iss.peek())) {
|
||||||
|
iss.ignore();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t num = 0;
|
||||||
|
iss >> num;
|
||||||
|
return num * 1024;
|
||||||
|
}
|
||||||
|
|
||||||
|
f.close();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// Stub.
|
||||||
|
size_t getPeakHeap(void) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
36
tools/hsbench/heapstats.h
Normal file
36
tools/hsbench/heapstats.h
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef HEAPSTATS_H
|
||||||
|
#define HEAPSTATS_H
|
||||||
|
|
||||||
|
#include <cstddef> // for size_t
|
||||||
|
|
||||||
|
size_t getPeakHeap(void);
|
||||||
|
|
||||||
|
#endif
|
201
tools/hsbench/huge.cpp
Normal file
201
tools/hsbench/huge.cpp
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "hs.h"
|
||||||
|
#include "ue2common.h"
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "huge.h"
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#if defined(HAVE_SHMGET)
|
||||||
|
#include <sys/ipc.h>
|
||||||
|
#include <sys/shm.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
UNUSED static int hsdb_shmid;
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
long gethugepagesize(void);
|
||||||
|
|
||||||
|
hs_database_t *get_huge(hs_database_t *db) {
|
||||||
|
#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB)
|
||||||
|
/* move the database to huge pages where possible, but fail politely */
|
||||||
|
hs_error_t err;
|
||||||
|
size_t len;
|
||||||
|
char *bytes;
|
||||||
|
|
||||||
|
long hpage_size = gethugepagesize();
|
||||||
|
if (hpage_size < 0) {
|
||||||
|
printf("Couldn't determine huge page size\n");
|
||||||
|
hsdb_shmid = -1;
|
||||||
|
return db;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = hs_serialize_database(db, &bytes, &len);
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
printf("Failed to serialize database for copy: %d\n", err);
|
||||||
|
// this is weird - don't fail gracefully this time
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size;
|
||||||
|
err = hs_serialized_database_size(bytes, len, &size);
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
printf("Failed to get database size: %d\n", err);
|
||||||
|
// this is weird - don't fail gracefully this time
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *shmaddr;
|
||||||
|
if ((hsdb_shmid = shmget(IPC_PRIVATE, ROUNDUP_N(size, gethugepagesize()),
|
||||||
|
SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
|
||||||
|
// This could fail if the user doesn't have permission to shmget(),
|
||||||
|
// which is OK.
|
||||||
|
goto fini;
|
||||||
|
}
|
||||||
|
|
||||||
|
shmaddr = shmat(hsdb_shmid, nullptr, SHM_RND);
|
||||||
|
if (shmaddr == (char *)-1) {
|
||||||
|
perror("Shared memory attach failure");
|
||||||
|
goto fini;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark this segment to be destroyed after this process detaches.
|
||||||
|
shmctl(hsdb_shmid, IPC_RMID, nullptr);
|
||||||
|
|
||||||
|
err = hs_deserialize_database_at(bytes, len, (hs_database_t *)shmaddr);
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
printf("Failed to deserialize database into shm: %d\n", err);
|
||||||
|
shmdt((const void *)shmaddr);
|
||||||
|
goto fini;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(bytes);
|
||||||
|
hs_free_database(db);
|
||||||
|
return (hs_database_t *)shmaddr;
|
||||||
|
|
||||||
|
fini:
|
||||||
|
free(bytes);
|
||||||
|
hsdb_shmid = -1;
|
||||||
|
return db;
|
||||||
|
#else
|
||||||
|
return db;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void release_huge(hs_database_t *db) {
|
||||||
|
#if defined(HAVE_SHMGET) && defined(SHM_HUGETLB)
|
||||||
|
if (hsdb_shmid != -1) {
|
||||||
|
if (shmdt((const void *)db) != 0) {
|
||||||
|
perror("Detach failure");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// fallback
|
||||||
|
hs_free_database(db);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
hs_free_database(db);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#define BUF_SIZE 4096
|
||||||
|
static long read_meminfo(const char *tag) {
|
||||||
|
int fd;
|
||||||
|
char buf[BUF_SIZE];
|
||||||
|
int len;
|
||||||
|
char *p, *q;
|
||||||
|
long val;
|
||||||
|
|
||||||
|
fd = open("/proc/meminfo", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
perror("Couldn't open /proc/meminfo");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = read(fd, buf, sizeof(buf));
|
||||||
|
close(fd);
|
||||||
|
if (len < 0) {
|
||||||
|
perror("Error reading /proc/meminfo");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (len == sizeof(buf)) {
|
||||||
|
printf("/proc/meminfo is too large\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
buf[len] = '\0';
|
||||||
|
|
||||||
|
p = strstr(buf, tag);
|
||||||
|
if (!p) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
p += strlen(tag);
|
||||||
|
val = strtol(p, &q, 0);
|
||||||
|
if (!isspace(*q)) {
|
||||||
|
printf("Couldn't parse /proc/meminfo value\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
long gethugepagesize(void) {
|
||||||
|
long hpage_size;
|
||||||
|
int hpage_kb;
|
||||||
|
|
||||||
|
hpage_kb = read_meminfo("Hugepagesize:");
|
||||||
|
if (hpage_kb < 0) {
|
||||||
|
hpage_size = -1;
|
||||||
|
} else {
|
||||||
|
/* convert from kb to bytes */
|
||||||
|
hpage_size = 1024 * hpage_kb;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hpage_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/* No huge page support on WIN32. */
|
||||||
|
|
||||||
|
hs_database_t *get_huge(hs_database_t *db) { return db; }
|
||||||
|
|
||||||
|
void release_huge(hs_database_t *db) { hs_free_database(db); }
|
||||||
|
|
||||||
|
#endif
|
37
tools/hsbench/huge.h
Normal file
37
tools/hsbench/huge.h
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef HUGE_H
|
||||||
|
#define HUGE_H
|
||||||
|
|
||||||
|
#include "hs.h"
|
||||||
|
|
||||||
|
hs_database_t *get_huge(hs_database_t *db);
|
||||||
|
void release_huge(hs_database_t *db);
|
||||||
|
|
||||||
|
#endif /* HUGE_H */
|
780
tools/hsbench/main.cpp
Normal file
780
tools/hsbench/main.cpp
Normal file
@ -0,0 +1,780 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "data_corpus.h"
|
||||||
|
#include "engine_hyperscan.h"
|
||||||
|
#include "expressions.h"
|
||||||
|
#include "thread_barrier.h"
|
||||||
|
#include "timer.h"
|
||||||
|
#include "util/expression_path.h"
|
||||||
|
#include "util/string_util.h"
|
||||||
|
|
||||||
|
#include "grey.h"
|
||||||
|
#include "hs.h"
|
||||||
|
#include "ue2common.h"
|
||||||
|
#include "util/make_unique.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <clocale>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <fstream>
|
||||||
|
#include <map>
|
||||||
|
#include <numeric>
|
||||||
|
#include <sstream>
|
||||||
|
#include <set>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#include <getopt.h>
|
||||||
|
#ifndef _WIN32
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <boost/core/noncopyable.hpp>
|
||||||
|
#include <boost/range/adaptor/map.hpp>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace ue2;
|
||||||
|
using boost::adaptors::map_keys;
|
||||||
|
|
||||||
|
// Globals common to all files.
|
||||||
|
bool echo_matches = false;
|
||||||
|
bool saveDatabases = false;
|
||||||
|
bool loadDatabases = false;
|
||||||
|
string serializePath("");
|
||||||
|
unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
|
||||||
|
|
||||||
|
namespace /* anonymous */ {
|
||||||
|
|
||||||
|
// Globals local to this file.
|
||||||
|
bool display_per_scan = false;
|
||||||
|
ScanMode scan_mode = ScanMode::STREAMING;
|
||||||
|
unsigned repeats = 20;
|
||||||
|
string exprPath("");
|
||||||
|
string corpusFile("");
|
||||||
|
vector<unsigned int> threadCores;
|
||||||
|
Timer totalTimer;
|
||||||
|
double totalSecs = 0;
|
||||||
|
|
||||||
|
typedef void (*thread_func_t)(void *context);
|
||||||
|
|
||||||
|
class ThreadContext : boost::noncopyable {
|
||||||
|
public:
|
||||||
|
ThreadContext(unsigned num_in, const EngineHyperscan &db_in,
|
||||||
|
thread_barrier &tb_in, thread_func_t function_in,
|
||||||
|
vector<DataBlock> corpus_data_in)
|
||||||
|
: num(num_in), results(repeats), engine(db_in),
|
||||||
|
enginectx(db_in.makeContext()), corpus_data(move(corpus_data_in)),
|
||||||
|
tb(tb_in), function(function_in) {}
|
||||||
|
|
||||||
|
// Start the thread.
|
||||||
|
bool start(int cpu) {
|
||||||
|
thr = thread(function, this);
|
||||||
|
|
||||||
|
// affine if it's asked for
|
||||||
|
if (cpu >= 0) {
|
||||||
|
return affine(cpu);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the thread to exit.
|
||||||
|
void join() {
|
||||||
|
thr.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialise all threads on a global barrier.
|
||||||
|
void barrier() {
|
||||||
|
tb.wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply processor affinity (if available) to this thread.
|
||||||
|
bool affine(UNUSED int cpu) {
|
||||||
|
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||||
|
cpu_set_t cpuset;
|
||||||
|
CPU_ZERO(&cpuset);
|
||||||
|
assert(cpu >= 0 && cpu < CPU_SETSIZE);
|
||||||
|
|
||||||
|
// The 'clang' compiler complains about an unused result here, so we
|
||||||
|
// silence it.
|
||||||
|
(void)CPU_SET(cpu, &cpuset);
|
||||||
|
|
||||||
|
int rv = pthread_setaffinity_np(thr.native_handle(), sizeof(cpuset),
|
||||||
|
&cpuset);
|
||||||
|
return (rv == 0);
|
||||||
|
#endif
|
||||||
|
return false; // not available
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned num;
|
||||||
|
Timer timer;
|
||||||
|
vector<ResultEntry> results;
|
||||||
|
const EngineHyperscan &engine;
|
||||||
|
unique_ptr<EngineContext> enginectx;
|
||||||
|
vector<DataBlock> corpus_data;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
thread_barrier &tb; // shared barrier for time sync
|
||||||
|
thread_func_t function;
|
||||||
|
thread thr;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Display usage information, with an optional error. */
|
||||||
|
static
|
||||||
|
void usage(const char *error) {
|
||||||
|
printf("Usage: hsbench [OPTIONS...]\n\n");
|
||||||
|
printf("Options:\n\n");
|
||||||
|
printf(" -h Display help and exit.\n");
|
||||||
|
printf(" -G OVERRIDES Overrides for the grey box.\n");
|
||||||
|
printf(" -e PATH Path to expression directory.\n");
|
||||||
|
printf(" -s FILE Signature file to use.\n");
|
||||||
|
printf(" -z NUM Signature ID to use.\n");
|
||||||
|
printf(" -c FILE File to use as corpus.\n");
|
||||||
|
printf(" -n NUMBER Repeat scan NUMBER times (default 20).\n");
|
||||||
|
printf(" -N Benchmark in block mode"
|
||||||
|
" (default: streaming).\n");
|
||||||
|
printf(" -V Benchmark in vectored mode"
|
||||||
|
" (default: streaming).\n");
|
||||||
|
printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n");
|
||||||
|
printf(" -i DIR Don't compile, load from files in DIR"
|
||||||
|
" instead.\n");
|
||||||
|
printf(" -w DIR After compiling, save to files in DIR.\n");
|
||||||
|
printf(" -d NUMBER Set SOM precision mode (default: 8 (large)).\n");
|
||||||
|
printf("\n");
|
||||||
|
printf(" --per-scan Display per-scan Mbit/sec results.\n");
|
||||||
|
printf(" --echo-matches Display all matches that occur during scan.\n");
|
||||||
|
printf("\n\n");
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
printf("Error: %s\n", error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Wraps up a name and the set of signature IDs it refers to. */
|
||||||
|
struct BenchmarkSigs {
|
||||||
|
BenchmarkSigs(string name_in, SignatureSet sigs_in)
|
||||||
|
: name(move(name_in)), sigs(move(sigs_in)) {}
|
||||||
|
string name;
|
||||||
|
SignatureSet sigs;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Process command-line arguments. Prints usage and exits on error. */
|
||||||
|
static
|
||||||
|
void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||||
|
UNUSED Grey &grey) {
|
||||||
|
const char options[] = "-b:c:Cd:e:G:hi:n:No:p:sT:Vw:z:";
|
||||||
|
int in_sigfile = 0;
|
||||||
|
int do_per_scan = 0;
|
||||||
|
int do_echo_matches = 0;
|
||||||
|
vector<string> sigFiles;
|
||||||
|
|
||||||
|
static struct option longopts[] = {
|
||||||
|
{"per-scan", 0, &do_per_scan, 1},
|
||||||
|
{"echo-matches", 0, &do_echo_matches, 1},
|
||||||
|
{nullptr, 0, nullptr, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
int c = getopt_long(argc, argv, options, longopts, nullptr);
|
||||||
|
if (c < 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch (c) {
|
||||||
|
case 'c':
|
||||||
|
corpusFile.assign(optarg);
|
||||||
|
break;
|
||||||
|
case 'd': {
|
||||||
|
unsigned dist;
|
||||||
|
if (!fromString(optarg, dist)) {
|
||||||
|
usage("Must provide an integer argument to '-d' flag");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
switch (dist) {
|
||||||
|
case 2:
|
||||||
|
somPrecisionMode = HS_MODE_SOM_HORIZON_SMALL;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
somPrecisionMode = HS_MODE_SOM_HORIZON_MEDIUM;
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage("SOM precision must be 2, 4 or 8");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'e':
|
||||||
|
exprPath.assign(optarg);
|
||||||
|
break;
|
||||||
|
#ifndef RELEASE_BUILD
|
||||||
|
case 'G':
|
||||||
|
applyGreyOverrides(&grey, string(optarg));
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
case 'h':
|
||||||
|
usage(nullptr);
|
||||||
|
exit(0);
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
if (!fromString(optarg, repeats) || repeats == 0) {
|
||||||
|
usage("Couldn't parse argument to -n flag, should be"
|
||||||
|
" a positive integer.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 's':
|
||||||
|
in_sigfile = 2;
|
||||||
|
break;
|
||||||
|
case 'N':
|
||||||
|
scan_mode = ScanMode::BLOCK;
|
||||||
|
break;
|
||||||
|
case 'V':
|
||||||
|
scan_mode = ScanMode::VECTORED;
|
||||||
|
break;
|
||||||
|
case 'T':
|
||||||
|
if (!strToList(optarg, threadCores)) {
|
||||||
|
usage("Couldn't parse argument to -T flag, should be"
|
||||||
|
" a list of positive integers.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'z': {
|
||||||
|
unsigned int sinumber;
|
||||||
|
if (!fromString(optarg, sinumber)) {
|
||||||
|
usage("Argument to '-z' flag must be an integer");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
SignatureSet sigs = {sinumber};
|
||||||
|
sigSets.emplace_back(string("-z ") + optarg, sigs);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 'i':
|
||||||
|
loadDatabases = true;
|
||||||
|
serializePath = optarg;
|
||||||
|
break;
|
||||||
|
case 'w':
|
||||||
|
saveDatabases = true;
|
||||||
|
serializePath = optarg;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
if (in_sigfile) {
|
||||||
|
sigFiles.push_back(optarg);
|
||||||
|
in_sigfile = 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 0:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage("Unrecognised command line argument.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_sigfile) {
|
||||||
|
in_sigfile--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (do_echo_matches) {
|
||||||
|
echo_matches = true;
|
||||||
|
}
|
||||||
|
if (do_per_scan) {
|
||||||
|
display_per_scan = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (exprPath.empty() && !sigFiles.empty()) {
|
||||||
|
/* attempt to infer an expression directory */
|
||||||
|
auto si = sigFiles.begin();
|
||||||
|
exprPath = inferExpressionPath(*si);
|
||||||
|
for (++si; si != sigFiles.end(); ++si) {
|
||||||
|
if (exprPath != inferExpressionPath(*si)) {
|
||||||
|
usage("Unable to infer consistent expression directory");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must have a valid expression path
|
||||||
|
if (exprPath.empty()) {
|
||||||
|
usage("Must specify an expression path with the -e option.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must have valid database to scan
|
||||||
|
if (corpusFile.empty()) {
|
||||||
|
usage("Must specify a corpus file with the -c option.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cannot ask for both loading and saving
|
||||||
|
if (loadDatabases && saveDatabases) {
|
||||||
|
usage("You cannot both load and save databases.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read in any -s signature sets.
|
||||||
|
for (const auto &file : sigFiles) {
|
||||||
|
SignatureSet sigs;
|
||||||
|
loadSignatureList(file, sigs);
|
||||||
|
sigSets.emplace_back(file, move(sigs));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Start the global timer. */
|
||||||
|
static
|
||||||
|
void startTotalTimer(ThreadContext *ctx) {
|
||||||
|
if (ctx->num != 0) {
|
||||||
|
return; // only runs in the first thread
|
||||||
|
}
|
||||||
|
totalTimer.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Stop the global timer and calculate totals. */
|
||||||
|
static
|
||||||
|
void stopTotalTimer(ThreadContext *ctx) {
|
||||||
|
if (ctx->num != 0) {
|
||||||
|
return; // only runs in the first thread
|
||||||
|
}
|
||||||
|
totalTimer.complete();
|
||||||
|
totalSecs = totalTimer.seconds();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Run a benchmark over a given engine and corpus in block mode. */
|
||||||
|
static
|
||||||
|
void benchBlock(void *context) {
|
||||||
|
ThreadContext *ctx = (ThreadContext *)context;
|
||||||
|
|
||||||
|
// Synchronization point
|
||||||
|
ctx->barrier();
|
||||||
|
|
||||||
|
startTotalTimer(ctx);
|
||||||
|
|
||||||
|
for (ResultEntry &r : ctx->results) {
|
||||||
|
ctx->timer.start();
|
||||||
|
|
||||||
|
for (const DataBlock &block : ctx->corpus_data) {
|
||||||
|
ctx->engine.scan(block.payload.c_str(), block.payload.size(),
|
||||||
|
block.id, r, *ctx->enginectx);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->timer.complete();
|
||||||
|
r.seconds = ctx->timer.seconds();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Synchronization point
|
||||||
|
ctx->barrier();
|
||||||
|
|
||||||
|
// Now that all threads are finished, we can stop the clock.
|
||||||
|
stopTotalTimer(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Structure used to represent a stream. */
|
||||||
|
struct StreamInfo {
|
||||||
|
unsigned int stream_id = ~0U;
|
||||||
|
unsigned int first_block_id = ~0U;
|
||||||
|
unsigned int last_block_id = 0;
|
||||||
|
unique_ptr<EngineStream> eng_handle;
|
||||||
|
};
|
||||||
|
|
||||||
|
static
|
||||||
|
u64a count_streams(const vector<DataBlock> &corpus_blocks) {
|
||||||
|
set<unsigned int> streams;
|
||||||
|
for (const DataBlock &block : corpus_blocks) {
|
||||||
|
streams.insert(block.stream_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (u64a)streams.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Take a ThreadContext and prepare a vector<StreamDataBlock> for streaming mode
|
||||||
|
* scanning from it.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
vector<StreamInfo> prepStreamingData(const ThreadContext *ctx) {
|
||||||
|
vector<StreamInfo> info(count_streams(ctx->corpus_data));
|
||||||
|
for (const DataBlock &block : ctx->corpus_data) {
|
||||||
|
assert(block.internal_stream_index < info.size());
|
||||||
|
StreamInfo &si = info[block.internal_stream_index];
|
||||||
|
|
||||||
|
/* check if this is the first time we have encountered this stream */
|
||||||
|
if (si.first_block_id > si.last_block_id) {
|
||||||
|
si.stream_id = block.stream_id;
|
||||||
|
si.first_block_id = block.id;
|
||||||
|
si.last_block_id = block.id;
|
||||||
|
} else {
|
||||||
|
assert(block.stream_id == si.stream_id);
|
||||||
|
assert(block.id > si.last_block_id);
|
||||||
|
assert(block.id > si.first_block_id);
|
||||||
|
si.last_block_id = block.id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void benchStreamingInternal(ThreadContext *ctx, vector<StreamInfo> &streams) {
|
||||||
|
assert(ctx);
|
||||||
|
const EngineHyperscan &e = ctx->engine;
|
||||||
|
const vector<DataBlock> &blocks = ctx->corpus_data;
|
||||||
|
|
||||||
|
for (ResultEntry &r : ctx->results) {
|
||||||
|
ctx->timer.start();
|
||||||
|
|
||||||
|
for (const auto &b : blocks) {
|
||||||
|
StreamInfo &stream = streams[b.internal_stream_index];
|
||||||
|
assert(stream.stream_id == b.stream_id);
|
||||||
|
|
||||||
|
// If this is the first block in the stream, open the stream
|
||||||
|
// handle.
|
||||||
|
if (b.id == stream.first_block_id) {
|
||||||
|
assert(!stream.eng_handle);
|
||||||
|
stream.eng_handle = e.streamOpen(*ctx->enginectx, b.stream_id);
|
||||||
|
if (!stream.eng_handle) {
|
||||||
|
printf("Fatal error: stream open failed!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(stream.eng_handle);
|
||||||
|
|
||||||
|
e.streamScan(*stream.eng_handle, b.payload.c_str(),
|
||||||
|
b.payload.size(), b.id, r);
|
||||||
|
|
||||||
|
// if this was the last block in the stream, close the stream handle
|
||||||
|
if (b.id == stream.last_block_id) {
|
||||||
|
e.streamClose(move(stream.eng_handle), r);
|
||||||
|
stream.eng_handle = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->timer.complete();
|
||||||
|
r.seconds = ctx->timer.seconds();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Run a benchmark over a given engine and corpus in streaming mode. */
|
||||||
|
static
|
||||||
|
void benchStreaming(void *context) {
|
||||||
|
ThreadContext *ctx = (ThreadContext *)context;
|
||||||
|
vector<StreamInfo> streams = prepStreamingData(ctx);
|
||||||
|
|
||||||
|
// Synchronization point
|
||||||
|
ctx->barrier();
|
||||||
|
|
||||||
|
startTotalTimer(ctx);
|
||||||
|
|
||||||
|
benchStreamingInternal(ctx, streams);
|
||||||
|
|
||||||
|
// Synchronization point
|
||||||
|
ctx->barrier();
|
||||||
|
|
||||||
|
// Now that all threads are finished, we can stop the clock.
|
||||||
|
stopTotalTimer(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** In-memory structure for a data block to be scanned in vectored mode. */
|
||||||
|
struct VectoredInfo {
|
||||||
|
vector<const char *> data;
|
||||||
|
vector<unsigned int> len;
|
||||||
|
unsigned int stream_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Take a ThreadContext and prepare a vector<VectoredInfo> for vectored mode
|
||||||
|
* scanning from it.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
vector<VectoredInfo> prepVectorData(const ThreadContext *ctx) {
|
||||||
|
vector<VectoredInfo> out(count_streams(ctx->corpus_data));
|
||||||
|
for (const DataBlock &block : ctx->corpus_data) {
|
||||||
|
VectoredInfo &vi = out[block.internal_stream_index];
|
||||||
|
if (vi.data.empty()) {
|
||||||
|
vi.stream_id = block.stream_id;
|
||||||
|
} else {
|
||||||
|
assert(vi.stream_id == block.stream_id);
|
||||||
|
}
|
||||||
|
vi.data.push_back(block.payload.c_str());
|
||||||
|
vi.len.push_back(block.payload.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Run a benchmark over a given engine and corpus in vectored mode. */
|
||||||
|
static
|
||||||
|
void benchVectored(void *context) {
|
||||||
|
ThreadContext *ctx = (ThreadContext *)context;
|
||||||
|
|
||||||
|
vector<VectoredInfo> v_plans = prepVectorData(ctx);
|
||||||
|
|
||||||
|
// Synchronization point
|
||||||
|
ctx->barrier();
|
||||||
|
|
||||||
|
startTotalTimer(ctx);
|
||||||
|
|
||||||
|
for (ResultEntry &r : ctx->results) {
|
||||||
|
ctx->timer.start();
|
||||||
|
|
||||||
|
for (const VectoredInfo &v_plan : v_plans) {
|
||||||
|
ctx->engine.scan_vectored(&v_plan.data[0], &v_plan.len[0],
|
||||||
|
v_plan.data.size(), v_plan.stream_id, r,
|
||||||
|
*ctx->enginectx);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->timer.complete();
|
||||||
|
r.seconds = ctx->timer.seconds();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Synchronization point
|
||||||
|
ctx->barrier();
|
||||||
|
|
||||||
|
// Now that all threads are finished, we can stop the clock.
|
||||||
|
stopTotalTimer(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Given a time and a size, compute the throughput in megabits/sec. */
|
||||||
|
static
|
||||||
|
long double calc_mbps(double seconds, u64a bytes) {
|
||||||
|
assert(seconds > 0);
|
||||||
|
return (long double)bytes / ((long double)seconds * 125000);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Dump per-scan throughput data to screen. */
|
||||||
|
static
|
||||||
|
void displayPerScanResults(const vector<unique_ptr<ThreadContext>> &threads,
|
||||||
|
u64a bytesPerRun) {
|
||||||
|
for (const auto &t : threads) {
|
||||||
|
const auto &results = t->results;
|
||||||
|
for (size_t j = 0; j != results.size(); j++) {
|
||||||
|
const auto &r = results[j];
|
||||||
|
double mbps = calc_mbps(r.seconds, bytesPerRun);
|
||||||
|
printf("T %2u Scan %2zu: %'0.2f Mbit/sec\n", t->num, j, mbps);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
u64a byte_size(const vector<DataBlock> &corpus_blocks) {
|
||||||
|
u64a total = 0;
|
||||||
|
for (const DataBlock &block : corpus_blocks) {
|
||||||
|
total += block.payload.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Dump benchmark results to screen. */
|
||||||
|
static
|
||||||
|
void displayResults(const vector<unique_ptr<ThreadContext>> &threads,
|
||||||
|
const vector<DataBlock> &corpus_blocks) {
|
||||||
|
u64a bytesPerRun = byte_size(corpus_blocks);
|
||||||
|
u64a matchesPerRun = threads[0]->results[0].matches;
|
||||||
|
|
||||||
|
// Sanity check: all of our results should have the same match count.
|
||||||
|
for (const auto &t : threads) {
|
||||||
|
if (!all_of(begin(t->results), end(t->results),
|
||||||
|
[&matchesPerRun](const ResultEntry &e) {
|
||||||
|
return e.matches == matchesPerRun;
|
||||||
|
})) {
|
||||||
|
printf("\nWARNING: PER-SCAN MATCH COUNTS ARE INCONSISTENT!\n\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Time spent scanning: %'0.3f seconds\n", totalSecs);
|
||||||
|
printf("Corpus size: %'llu bytes ", bytesPerRun);
|
||||||
|
switch (scan_mode) {
|
||||||
|
case ScanMode::STREAMING:
|
||||||
|
printf("(%'zu blocks in %'llu streams)\n", corpus_blocks.size(),
|
||||||
|
count_streams(corpus_blocks));
|
||||||
|
break;
|
||||||
|
case ScanMode::VECTORED:
|
||||||
|
printf("(%'zu blocks in %'llu vectors)\n", corpus_blocks.size(),
|
||||||
|
count_streams(corpus_blocks));
|
||||||
|
break;
|
||||||
|
case ScanMode::BLOCK:
|
||||||
|
printf("(%'zu blocks)\n", corpus_blocks.size());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64a totalBytes = bytesPerRun * repeats * threads.size();
|
||||||
|
u64a totalBlocks = corpus_blocks.size() * repeats * threads.size();
|
||||||
|
|
||||||
|
double matchRate = ((double)matchesPerRun * 1024) / bytesPerRun;
|
||||||
|
printf("Matches per iteration: %'llu (%'0.3f matches/kilobyte)\n",
|
||||||
|
matchesPerRun, matchRate);
|
||||||
|
|
||||||
|
double blockRate = (double)totalBlocks / (double)totalSecs;
|
||||||
|
printf("Overall block rate: %'0.2f blocks/sec\n", blockRate);
|
||||||
|
printf("Overall throughput: %'0.2Lf Mbit/sec\n",
|
||||||
|
calc_mbps(totalSecs, totalBytes));
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
if (display_per_scan) {
|
||||||
|
displayPerScanResults(threads, bytesPerRun);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a thread context for this scanning mode.
|
||||||
|
*
|
||||||
|
* Note: does not take blocks by reference. This is to give every thread their
|
||||||
|
* own copy of the data. It would be unrealistic for every thread to be scanning
|
||||||
|
* the same copy of the data.
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
|
||||||
|
const vector<DataBlock> &blocks,
|
||||||
|
unsigned id,
|
||||||
|
thread_barrier &sync_barrier) {
|
||||||
|
thread_func_t fn = nullptr;
|
||||||
|
switch (scan_mode) {
|
||||||
|
case ScanMode::STREAMING:
|
||||||
|
fn = benchStreaming;
|
||||||
|
break;
|
||||||
|
case ScanMode::VECTORED:
|
||||||
|
fn = benchVectored;
|
||||||
|
break;
|
||||||
|
case ScanMode::BLOCK:
|
||||||
|
fn = benchBlock;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assert(fn);
|
||||||
|
|
||||||
|
return ue2::make_unique<ThreadContext>(id, db, sync_barrier, fn, blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Run the given benchmark. */
|
||||||
|
static
|
||||||
|
void runBenchmark(const EngineHyperscan &db,
|
||||||
|
const vector<DataBlock> &corpus_blocks) {
|
||||||
|
size_t numThreads;
|
||||||
|
bool useAffinity = false;
|
||||||
|
|
||||||
|
if (threadCores.empty()) {
|
||||||
|
numThreads = 1;
|
||||||
|
} else {
|
||||||
|
numThreads = threadCores.size();
|
||||||
|
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||||
|
useAffinity = true;
|
||||||
|
#else
|
||||||
|
useAffinity = false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialise a barrier that will let us sync threads before/after scanning
|
||||||
|
// for timer measurements.
|
||||||
|
thread_barrier sync_barrier(numThreads);
|
||||||
|
|
||||||
|
vector<unique_ptr<ThreadContext>> threads;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < numThreads; i++) {
|
||||||
|
auto t = makeThreadContext(db, corpus_blocks, i, sync_barrier);
|
||||||
|
int core = useAffinity ? (int)threadCores[i] : -1;
|
||||||
|
if (!t->start(core)) {
|
||||||
|
printf("Unable to start processing thread %u\n", i);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
threads.push_back(move(t));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reap threads.
|
||||||
|
for (auto &t : threads) {
|
||||||
|
t->join();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Display global results.
|
||||||
|
displayResults(threads, corpus_blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
/** Main driver. */
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
Grey grey;
|
||||||
|
|
||||||
|
setlocale(LC_ALL, ""); // use the user's locale
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
|
printf("\nWARNING: DO NOT BENCHMARK A HYPERSCAN BUILD WITH ASSERTIONS\n\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
vector<BenchmarkSigs> sigSets;
|
||||||
|
processArgs(argc, argv, sigSets, grey);
|
||||||
|
|
||||||
|
// read in and process our expressions
|
||||||
|
ExpressionMap exprMapTemplate;
|
||||||
|
loadExpressions(exprPath, exprMapTemplate);
|
||||||
|
|
||||||
|
// If we have no signature sets, the user wants us to benchmark all the
|
||||||
|
// known expressions together.
|
||||||
|
if (sigSets.empty()) {
|
||||||
|
SignatureSet sigs;
|
||||||
|
for (auto i : exprMapTemplate | map_keys) {
|
||||||
|
sigs.push_back(i);
|
||||||
|
}
|
||||||
|
sigSets.emplace_back(exprPath, move(sigs));
|
||||||
|
}
|
||||||
|
|
||||||
|
// read in and process our corpus
|
||||||
|
vector<DataBlock> corpus_blocks;
|
||||||
|
try {
|
||||||
|
corpus_blocks = readCorpus(corpusFile);
|
||||||
|
} catch (const DataCorpusError &e) {
|
||||||
|
printf("Corpus data error: %s\n", e.msg.c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &s : sigSets) {
|
||||||
|
ExpressionMap exprMap = exprMapTemplate; // copy
|
||||||
|
|
||||||
|
limitBySignature(exprMap, s.sigs);
|
||||||
|
if (exprMap.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name, grey);
|
||||||
|
if (!engine) {
|
||||||
|
printf("Error: expressions failed to compile.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
runBenchmark(*engine, corpus_blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
58
tools/hsbench/scripts/CorpusBuilder.py
Executable file
58
tools/hsbench/scripts/CorpusBuilder.py
Executable file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
'''
|
||||||
|
A module to construct corpora databases for the Hyperscan benchmarker
|
||||||
|
(hsbench).
|
||||||
|
|
||||||
|
After construction, simply add blocks with the add_chunk() method, then call
|
||||||
|
finish() when you're done.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os.path
|
||||||
|
|
||||||
|
try:
|
||||||
|
from sqlite3 import dbapi2 as sqlite
|
||||||
|
except:
|
||||||
|
from pysqlite2 import dbapi2 as sqlite
|
||||||
|
|
||||||
|
class CorpusBuilder:
|
||||||
|
SCHEMA = '''
|
||||||
|
CREATE TABLE chunk (
|
||||||
|
id integer primary key,
|
||||||
|
stream_id integer not null,
|
||||||
|
data blob
|
||||||
|
);
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, outfile):
|
||||||
|
if os.path.exists(outfile):
|
||||||
|
raise RuntimeError("Database '%s' already exists" % outfile)
|
||||||
|
self.outfile = outfile
|
||||||
|
self.db = sqlite.connect(self.outfile)
|
||||||
|
self.db.executescript(CorpusBuilder.SCHEMA)
|
||||||
|
self.current_chunk_id = 0;
|
||||||
|
|
||||||
|
def add_chunk(self, stream_id, data):
|
||||||
|
chunk_id = self.current_chunk_id;
|
||||||
|
c = self.db.cursor()
|
||||||
|
q = 'insert into chunk (id, stream_id, data) values (?, ?, ?)'
|
||||||
|
c.execute(q, (chunk_id, stream_id, sqlite.Binary(data)))
|
||||||
|
self.current_chunk_id += 1
|
||||||
|
return chunk_id
|
||||||
|
|
||||||
|
def finish(self):
|
||||||
|
self.db.commit()
|
||||||
|
|
||||||
|
c = self.db.cursor()
|
||||||
|
q = 'create index chunk_stream_id_idx on chunk(stream_id)'
|
||||||
|
c.execute(q)
|
||||||
|
|
||||||
|
c = self.db.cursor()
|
||||||
|
q = 'vacuum'
|
||||||
|
c.execute(q)
|
||||||
|
|
||||||
|
c = self.db.cursor()
|
||||||
|
q = 'analyze'
|
||||||
|
c.execute(q)
|
||||||
|
|
||||||
|
self.db.commit()
|
68
tools/hsbench/scripts/gutenbergCorpus.py
Executable file
68
tools/hsbench/scripts/gutenbergCorpus.py
Executable file
@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
'''
|
||||||
|
This script creates a Hyperscan benchmarking corpus database from a supplied
|
||||||
|
group of Project Gutenberg texts.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import sys, getopt, os.path
|
||||||
|
import gutenberg.acquire, gutenberg.cleanup, gutenberg.query
|
||||||
|
from CorpusBuilder import CorpusBuilder
|
||||||
|
|
||||||
|
stream_id = 0
|
||||||
|
stream_bytes = 0
|
||||||
|
|
||||||
|
def addBlocks(builder, block_size, stream_size, text_id, text):
|
||||||
|
global stream_id
|
||||||
|
global stream_bytes
|
||||||
|
|
||||||
|
print "text", text_id, "len", len(text)
|
||||||
|
i = 0
|
||||||
|
while i < len(text):
|
||||||
|
chunk = text[i:min(len(text), i + block_size)]
|
||||||
|
builder.add_chunk(stream_id, chunk)
|
||||||
|
i += block_size
|
||||||
|
stream_bytes += len(chunk)
|
||||||
|
if stream_bytes >= stream_size:
|
||||||
|
stream_id += 1
|
||||||
|
stream_bytes = 0
|
||||||
|
print "Text", text_id, ": added", i/block_size, "blocks of", block_size, "bytes."
|
||||||
|
|
||||||
|
def buildCorpus(outFN, block_size, stream_size, text_ids):
|
||||||
|
if len(text_ids) == 0:
|
||||||
|
print >>sys.stderr, "Must provide at least one input ID"
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
builder = CorpusBuilder(outFN)
|
||||||
|
|
||||||
|
total_bytes = 0
|
||||||
|
stream_id = 0
|
||||||
|
stream_bytes = 0
|
||||||
|
|
||||||
|
for text_id in text_ids:
|
||||||
|
text_id = int(text_id)
|
||||||
|
text = gutenberg.acquire.load_etext(text_id)
|
||||||
|
text = gutenberg.cleanup.strip_headers(text).strip()
|
||||||
|
addBlocks(builder, block_size, stream_size, text_id, text)
|
||||||
|
total_bytes += len(text)
|
||||||
|
|
||||||
|
builder.finish()
|
||||||
|
|
||||||
|
print "Total:", total_bytes, "bytes."
|
||||||
|
|
||||||
|
def usage(exeName):
|
||||||
|
errmsg = "Usage: %s -o <output file> -b <block size> -s <max stream size> <gutenberg text id>..."
|
||||||
|
errmsg = errmsg % exeName
|
||||||
|
print >> sys.stderr, errmsg
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
opts, args = getopt.getopt(sys.argv[1:], 'o:b:s:')
|
||||||
|
opts = dict(opts)
|
||||||
|
|
||||||
|
requiredKeys = [ '-o', '-b', '-s' ]
|
||||||
|
for k in requiredKeys:
|
||||||
|
if not opts.has_key(k):
|
||||||
|
usage(os.path.basename(sys.argv[0]))
|
||||||
|
|
||||||
|
buildCorpus(opts['-o'], int(opts['-b']), int(opts['-s']), args)
|
53
tools/hsbench/scripts/linebasedCorpus.py
Executable file
53
tools/hsbench/scripts/linebasedCorpus.py
Executable file
@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
'''
|
||||||
|
Simple script to take a file full of lines of text and push them into a
|
||||||
|
Hyperscan benchmarking corpus database, one block per line.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import sys, getopt, os.path
|
||||||
|
from CorpusBuilder import CorpusBuilder
|
||||||
|
|
||||||
|
def lineCorpus(inFN, outFN):
|
||||||
|
'''
|
||||||
|
Read lines from file name @inFN and write them as blocks to a new db with
|
||||||
|
name @outFN.
|
||||||
|
'''
|
||||||
|
|
||||||
|
if not os.path.exists(inFN):
|
||||||
|
print >> sys.stderr, "Input file '%s' does not exist. Exiting." % outFN
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
lines = open(inFN).readlines()
|
||||||
|
|
||||||
|
if len(lines) == 0:
|
||||||
|
print >> sys.stderr, "Input file contained no lines. Exiting."
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
builder = CorpusBuilder(outFN)
|
||||||
|
|
||||||
|
# write a single stream to contain everything
|
||||||
|
streamId = 0
|
||||||
|
|
||||||
|
for l in lines:
|
||||||
|
builder.add_chunk(streamId, l.rstrip())
|
||||||
|
|
||||||
|
builder.finish()
|
||||||
|
|
||||||
|
def usage(exeName):
|
||||||
|
errmsg = "Usage: %s -i <input file> -o <output file>"
|
||||||
|
errmsg = errmsg % exeName
|
||||||
|
print >> sys.stderr, errmsg
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = getopt.getopt(sys.argv[1:], 'i:o:c:')
|
||||||
|
args = dict(args[0])
|
||||||
|
|
||||||
|
requiredKeys = [ '-i', '-o' ]
|
||||||
|
for k in requiredKeys:
|
||||||
|
if not args.has_key(k):
|
||||||
|
usage(os.path.basename(sys.argv[0]))
|
||||||
|
|
||||||
|
fnArgs = tuple([args[k] for k in requiredKeys])
|
||||||
|
lineCorpus(*fnArgs)
|
301
tools/hsbench/scripts/pcapCorpus.py
Executable file
301
tools/hsbench/scripts/pcapCorpus.py
Executable file
@ -0,0 +1,301 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
'''
|
||||||
|
Script to convert a pcap file containing UDP and TCP packets to a corpus file.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import sys, getopt, pprint, os
|
||||||
|
from sqlite3 import dbapi2 as sqlite
|
||||||
|
import pcap
|
||||||
|
from optparse import OptionParser
|
||||||
|
from socket import AF_INET, IPPROTO_UDP, IPPROTO_TCP, inet_ntop, ntohs, ntohl, inet_ntoa
|
||||||
|
import struct
|
||||||
|
from CorpusBuilder import CorpusBuilder
|
||||||
|
|
||||||
|
ETHERTYPE_IP = 0x0800 # IP protocol
|
||||||
|
ETHERTYPE_ARP = 0x0806 # Addr. resolution protocol
|
||||||
|
ETHERTYPE_REVARP = 0x8035 # reverse Addr. resolution protocol
|
||||||
|
ETHERTYPE_VLAN = 0x8100 # IEEE 802.1Q VLAN tagging
|
||||||
|
ETHERTYPE_IPV6 = 0x86dd # IPv6
|
||||||
|
|
||||||
|
#
|
||||||
|
# A dictionary of active TCP streams
|
||||||
|
#
|
||||||
|
tcp_streams = {}
|
||||||
|
|
||||||
|
#
|
||||||
|
# A dictionary of UDP streams
|
||||||
|
#
|
||||||
|
udp_streams = {}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Current stream id
|
||||||
|
cur_stream_id = 0
|
||||||
|
|
||||||
|
def usage(exeName) :
|
||||||
|
errmsg = "Usage: %s -i <pcap-file> -o <sqlite-file>"
|
||||||
|
errmsg = errmsg % exeName
|
||||||
|
print >> sys.stderr, errmsg
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
class FiveTuple(object):
|
||||||
|
def __init__(self, protocol, src_addr, src_port, dst_addr, dst_port):
|
||||||
|
self.protocol = protocol
|
||||||
|
self.src_addr = src_addr
|
||||||
|
self.src_port = src_port
|
||||||
|
self.dst_addr = dst_addr
|
||||||
|
self.dst_port = dst_port
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "%d,%s,%d,%s,%d" % (self.protocol, self.src_addr, self.src_port, self.dst_addr, self.dst_port)
|
||||||
|
|
||||||
|
class UdpSegment:
|
||||||
|
"""Definition of a UDP segment
|
||||||
|
"""
|
||||||
|
def __init__(self, five_tuple, header, payload):
|
||||||
|
self.five_tuple = five_tuple
|
||||||
|
self.udp_header = header
|
||||||
|
self.udp_payload = payload
|
||||||
|
|
||||||
|
class TcpSegment:
|
||||||
|
"""Definition of a TCP segment
|
||||||
|
"""
|
||||||
|
def __init__(self, five_tuple, header, payload):
|
||||||
|
self.five_tuple = five_tuple
|
||||||
|
self.tcp_header = header
|
||||||
|
self.tcp_payload = payload
|
||||||
|
self.tcp_sequence_number, self.tcp_acknowledgement_number = struct.unpack('!LL', header[4:12])
|
||||||
|
|
||||||
|
def opt_isset_FIN(self):
|
||||||
|
opts = ord(self.tcp_header[13]) & 0x3F
|
||||||
|
return (opts & 0x01)
|
||||||
|
|
||||||
|
def opt_isset_SYN(self):
|
||||||
|
opts = ord(self.tcp_header[13]) & 0x3F
|
||||||
|
return (opts & 0x02)
|
||||||
|
|
||||||
|
def get_sequence_number(self):
|
||||||
|
return self.tcp_sequence_number
|
||||||
|
|
||||||
|
def __cmp__(self, other):
|
||||||
|
return cmp(self.tcp_sequence_number, other.tcp_sequence_number)
|
||||||
|
|
||||||
|
class TcpStream:
|
||||||
|
"""Definition of a TCP stream.
|
||||||
|
"""
|
||||||
|
TCP_STREAM_ACTIVE = 0x1
|
||||||
|
TCP_STREAM_CLOSED = 0x02
|
||||||
|
|
||||||
|
def __init__(self, five_tuple):
|
||||||
|
self.five_tuple = five_tuple
|
||||||
|
self.initial_sequence_number = 0
|
||||||
|
self.segments = []
|
||||||
|
|
||||||
|
def reset_stream(self):
|
||||||
|
self.segments = []
|
||||||
|
self.initial_sequence_number = 0
|
||||||
|
|
||||||
|
def set_initial_sequence_number(self, sequence_number):
|
||||||
|
self.initial_sequence_number = sequence_number
|
||||||
|
|
||||||
|
def append_segment(self, tcp_segment):
|
||||||
|
if len(self.segments) == 0:
|
||||||
|
self.set_initial_sequence_number(tcp_segment.get_sequence_number())
|
||||||
|
self.segments.append(tcp_segment)
|
||||||
|
|
||||||
|
def get_segments_sorted(self):
|
||||||
|
return sorted(self.segments)
|
||||||
|
|
||||||
|
class UdpStream:
|
||||||
|
"""A container for UDP packets that share the same 5-tuple
|
||||||
|
"""
|
||||||
|
def __init__(self, five_tuple):
|
||||||
|
self.five_tuple = five_tuple
|
||||||
|
self.segments = []
|
||||||
|
|
||||||
|
def append_segment(self, udp_segment):
|
||||||
|
self.segments.append(udp_segment)
|
||||||
|
|
||||||
|
|
||||||
|
def newStream(five_tuple):
|
||||||
|
'''
|
||||||
|
Create a new stream using the arguments passed-in and return its ID.
|
||||||
|
'''
|
||||||
|
global cur_stream_id
|
||||||
|
stream_id = cur_stream_id
|
||||||
|
cur_stream_id += 1
|
||||||
|
return stream_id
|
||||||
|
|
||||||
|
def process_tcp_segment(builder, segment):
|
||||||
|
"""Process a tcp segment. It checks for SYN and FIN segments are
|
||||||
|
if set modifies the associated stream.
|
||||||
|
"""
|
||||||
|
segment_id = str(segment.five_tuple)
|
||||||
|
if segment_id in tcp_streams:
|
||||||
|
m_tcp_stream = tcp_streams[segment_id]
|
||||||
|
m_tcp_stream.append_segment(segment)
|
||||||
|
else:
|
||||||
|
m_tcp_stream = TcpStream(segment.five_tuple)
|
||||||
|
m_tcp_stream.append_segment(segment)
|
||||||
|
tcp_streams[segment_id] = m_tcp_stream
|
||||||
|
|
||||||
|
|
||||||
|
if segment.opt_isset_SYN():
|
||||||
|
m_tcp_stream.segments = []
|
||||||
|
|
||||||
|
if segment.opt_isset_FIN():
|
||||||
|
#
|
||||||
|
# Finished with the stream - add the segments in the
|
||||||
|
# stream to db allowing the stream to be reused.
|
||||||
|
#
|
||||||
|
db_add_tcp_stream_segments(builder, m_tcp_stream)
|
||||||
|
del tcp_streams[segment_id]
|
||||||
|
|
||||||
|
def process_udp_segment(builder, segment):
|
||||||
|
""" Process a UDP segment. Given the connectionless nature of the UDP
|
||||||
|
protocol we simple accumulate the segment for later processing
|
||||||
|
when all the packets have been read
|
||||||
|
"""
|
||||||
|
segment_id = str(segment.five_tuple)
|
||||||
|
if segment_id in udp_streams:
|
||||||
|
m_udp_stream = udp_streams[segment_id]
|
||||||
|
m_udp_stream.append_segment(segment)
|
||||||
|
else:
|
||||||
|
m_udp_stream = UdpStream(segment.five_tuple)
|
||||||
|
m_udp_stream.append_segment(segment)
|
||||||
|
udp_streams[segment_id] = m_udp_stream
|
||||||
|
|
||||||
|
|
||||||
|
def db_add_tcp_stream_segments(builder, tcp_stream):
|
||||||
|
"""Add the contents of a tcp stream to the database
|
||||||
|
"""
|
||||||
|
tcp_segments = tcp_stream.get_segments_sorted()
|
||||||
|
last_sequence_num = 0
|
||||||
|
streamID = None
|
||||||
|
|
||||||
|
for tcp_segment in tcp_segments:
|
||||||
|
if (len(tcp_segment.tcp_payload) > 0) and (tcp_segment.tcp_sequence_number > last_sequence_num):
|
||||||
|
#
|
||||||
|
# Segment with an actual payload - add it to the stream's
|
||||||
|
# list of chunks.
|
||||||
|
#
|
||||||
|
# Note: delay creating the stream until we have a via chunk to
|
||||||
|
# commit to it
|
||||||
|
#
|
||||||
|
if streamID == None:
|
||||||
|
streamID = newStream(tcp_stream.five_tuple)
|
||||||
|
builder.add_chunk(streamID, tcp_segment.tcp_payload)
|
||||||
|
last_sequence_num = tcp_segment.tcp_sequence_number
|
||||||
|
|
||||||
|
|
||||||
|
def db_add_udp_stream_segments(builder, udp_stream):
|
||||||
|
"""Add the contents of a UDP stream to the database. Since UDP is
|
||||||
|
connection-less, a UDP stream object is really just an accumulation
|
||||||
|
of all the packets associated with a given 5-tuple.
|
||||||
|
"""
|
||||||
|
udp_segments = udp_stream.segments
|
||||||
|
streamID = None
|
||||||
|
for udp_segment in udp_segments:
|
||||||
|
if len(udp_segment.udp_payload) > 0:
|
||||||
|
if streamID == None:
|
||||||
|
streamID = newStream(udp_stream.five_tuple)
|
||||||
|
builder.add_chunk(streamID, udp_segment.udp_payload)
|
||||||
|
|
||||||
|
def enchunk_pcap(pcapFN, sqliteFN):
|
||||||
|
"""Read the contents of a pcap file with name @pcapFN and produce
|
||||||
|
a sqlite db with name @sqliteFN. It will contain chunks of data
|
||||||
|
from TCP and UDP streams,
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not os.path.exists(pcapFN):
|
||||||
|
print >> sys.stderr, "Input file '%s' does not exist. Exiting." % pcapFN
|
||||||
|
sys.exit(-1)
|
||||||
|
|
||||||
|
builder = CorpusBuilder(sqliteFN)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Read in the contents of the pcap file, adding stream segments as found
|
||||||
|
#
|
||||||
|
pkt_cnt = 0;
|
||||||
|
ip_pkt_cnt = 0;
|
||||||
|
unsupported_ip_protocol_cnt = 0
|
||||||
|
pcap_ref = pcap.pcap(pcapFN)
|
||||||
|
done = False
|
||||||
|
|
||||||
|
while not done:
|
||||||
|
try:
|
||||||
|
ts, packet = pcap_ref.next()
|
||||||
|
except:
|
||||||
|
break
|
||||||
|
|
||||||
|
pkt_cnt += 1
|
||||||
|
|
||||||
|
linkLayerType = struct.unpack('!H', packet[(pcap_ref.dloff - 2):pcap_ref.dloff])[0]
|
||||||
|
if linkLayerType != ETHERTYPE_IP:
|
||||||
|
#
|
||||||
|
# We're only interested in IP packets
|
||||||
|
#
|
||||||
|
continue
|
||||||
|
|
||||||
|
ip_pkt_cnt += 1
|
||||||
|
|
||||||
|
ip_pkt_total_len = struct.unpack('!H', packet[pcap_ref.dloff + 2: pcap_ref.dloff + 4])[0]
|
||||||
|
ip_pkt = packet[pcap_ref.dloff:pcap_ref.dloff + ip_pkt_total_len]
|
||||||
|
pkt_protocol = struct.unpack('B', ip_pkt[9])[0]
|
||||||
|
|
||||||
|
if (pkt_protocol != IPPROTO_UDP) and (pkt_protocol != IPPROTO_TCP):
|
||||||
|
#
|
||||||
|
# we're only interested in UDP and TCP packets at the moment
|
||||||
|
#
|
||||||
|
continue
|
||||||
|
|
||||||
|
pkt_src_addr = inet_ntoa(ip_pkt[12:16])
|
||||||
|
pkt_dst_addr = inet_ntoa(ip_pkt[16:20])
|
||||||
|
|
||||||
|
ip_hdr_len_offset = (ord(ip_pkt[0]) & 0x0f) * 4
|
||||||
|
ip_payload = ip_pkt[ip_hdr_len_offset:len(ip_pkt)]
|
||||||
|
|
||||||
|
pkt_src_port, pkt_dst_port = struct.unpack('!HH', ip_payload[0:4])
|
||||||
|
five_tuple = FiveTuple(pkt_protocol, pkt_src_addr, pkt_src_port, pkt_dst_addr, pkt_dst_port)
|
||||||
|
five_tuple_id = str(five_tuple)
|
||||||
|
|
||||||
|
if pkt_protocol == IPPROTO_UDP:
|
||||||
|
udp_payload_len = struct.unpack('!H', ip_payload[4:6])[0] - 8
|
||||||
|
udp_header = ip_payload[0:8]
|
||||||
|
udp_payload = ip_payload[8:len(ip_payload)]
|
||||||
|
udp_segment = UdpSegment(five_tuple, udp_header, udp_payload)
|
||||||
|
process_udp_segment(builder, udp_segment)
|
||||||
|
elif pkt_protocol == IPPROTO_TCP:
|
||||||
|
tcp_hdr_len = (ord(ip_payload[12]) >> 4) * 4
|
||||||
|
tcp_header = ip_payload[0:tcp_hdr_len]
|
||||||
|
tcp_payload = ip_payload[tcp_hdr_len:len(ip_payload)]
|
||||||
|
segment = TcpSegment(five_tuple, tcp_header, tcp_payload)
|
||||||
|
process_tcp_segment(builder, segment)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Having read the contents of the pcap, we fill the database with any
|
||||||
|
# remaining TCP and UDP segments
|
||||||
|
#
|
||||||
|
for tcp_stream in tcp_streams.itervalues():
|
||||||
|
db_add_tcp_stream_segments(builder, tcp_stream)
|
||||||
|
|
||||||
|
for udp_stream in udp_streams.itervalues():
|
||||||
|
db_add_udp_stream_segments(builder, udp_stream)
|
||||||
|
|
||||||
|
#
|
||||||
|
# We've finished with the database
|
||||||
|
#
|
||||||
|
builder.finish()
|
||||||
|
|
||||||
|
if __name__ == '__main__' :
|
||||||
|
|
||||||
|
args = getopt.getopt(sys.argv[1:], 'i:o:')
|
||||||
|
args = dict(args[0])
|
||||||
|
|
||||||
|
requiredKeys = [ '-i', '-o']
|
||||||
|
for k in requiredKeys :
|
||||||
|
if not args.has_key(k) :
|
||||||
|
usage(os.path.basename(sys.argv[0]))
|
||||||
|
|
||||||
|
fnArgs = tuple([ args[k] for k in requiredKeys ])
|
||||||
|
enchunk_pcap(*fnArgs)
|
71
tools/hsbench/thread_barrier.h
Normal file
71
tools/hsbench/thread_barrier.h
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \file
|
||||||
|
* \brief Simple thread barrier.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TOOLS_THREAD_BARRIER_H
|
||||||
|
#define TOOLS_THREAD_BARRIER_H
|
||||||
|
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Simple thread barrier class.
|
||||||
|
*
|
||||||
|
* Blocks until wait() has been called N times.
|
||||||
|
*/
|
||||||
|
class thread_barrier {
|
||||||
|
public:
|
||||||
|
explicit thread_barrier(unsigned int n) : max(n) {
|
||||||
|
if (max == 0) {
|
||||||
|
throw std::runtime_error("invalid barrier");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void wait() {
|
||||||
|
std::unique_lock<std::mutex> lock(mtx);
|
||||||
|
count++;
|
||||||
|
if (count >= max) {
|
||||||
|
count = 0;
|
||||||
|
condvar.notify_all();
|
||||||
|
} else {
|
||||||
|
condvar.wait(lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::mutex mtx;
|
||||||
|
std::condition_variable condvar;
|
||||||
|
unsigned int count = 0;
|
||||||
|
unsigned int max;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // TOOLS_THREAD_BARRIER_H
|
59
tools/hsbench/timer.h
Normal file
59
tools/hsbench/timer.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef TIMER_H
|
||||||
|
#define TIMER_H
|
||||||
|
|
||||||
|
#include "ue2common.h"
|
||||||
|
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
|
class Timer {
|
||||||
|
public:
|
||||||
|
Timer() = default;
|
||||||
|
|
||||||
|
void start() {
|
||||||
|
clock_start = Clock::now();
|
||||||
|
}
|
||||||
|
|
||||||
|
void complete() {
|
||||||
|
clock_end = Clock::now();
|
||||||
|
}
|
||||||
|
|
||||||
|
double seconds() const {
|
||||||
|
std::chrono::duration<double> secs = clock_end - clock_start;
|
||||||
|
return secs.count();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
using Clock = std::chrono::steady_clock;
|
||||||
|
std::chrono::time_point<Clock> clock_start;
|
||||||
|
std::chrono::time_point<Clock> clock_end;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // TIMER_H
|
@ -1,7 +1,10 @@
|
|||||||
# utility libs
|
# utility libs
|
||||||
|
|
||||||
|
CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP)
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
${PROJECT_SOURCE_DIR})
|
||||||
|
|
||||||
set_source_files_properties(
|
set_source_files_properties(
|
||||||
${CMAKE_BINARY_DIR}/tools/ExpressionParser.cpp
|
${CMAKE_BINARY_DIR}/tools/ExpressionParser.cpp
|
||||||
@ -31,3 +34,14 @@ SET(corpusomatic_SRCS
|
|||||||
)
|
)
|
||||||
add_library(corpusomatic STATIC ${corpusomatic_SRCS})
|
add_library(corpusomatic STATIC ${corpusomatic_SRCS})
|
||||||
|
|
||||||
|
set(databaseutil_SRCS
|
||||||
|
database_util.cpp
|
||||||
|
database_util.h
|
||||||
|
)
|
||||||
|
add_library(databaseutil STATIC ${databaseutil_SRCS})
|
||||||
|
|
||||||
|
set(crosscompileutil_SRCS
|
||||||
|
cross_compile.cpp
|
||||||
|
cross_compile.h
|
||||||
|
)
|
||||||
|
add_library(crosscompileutil STATIC ${crosscompileutil_SRCS})
|
||||||
|
115
util/cross_compile.cpp
Normal file
115
util/cross_compile.cpp
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "cross_compile.h"
|
||||||
|
#include "src/ue2common.h"
|
||||||
|
#include "src/hs_compile.h"
|
||||||
|
#include "src/util/make_unique.h"
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
struct XcompileMode {
|
||||||
|
const char *name;
|
||||||
|
unsigned long long cpu_features;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const XcompileMode xcompile_options[] = {
|
||||||
|
{ "avx2", HS_CPU_FEATURES_AVX2 },
|
||||||
|
{ "base", 0 },
|
||||||
|
};
|
||||||
|
|
||||||
|
unique_ptr<hs_platform_info> xcompileReadMode(const char *s) {
|
||||||
|
hs_platform_info rv;
|
||||||
|
UNUSED hs_error_t err;
|
||||||
|
err = hs_populate_platform(&rv);
|
||||||
|
assert(!err);
|
||||||
|
|
||||||
|
string str(s);
|
||||||
|
string mode = str.substr(0, str.find(":"));
|
||||||
|
string opt = str.substr(str.find(":")+1, str.npos);
|
||||||
|
bool found_mode = false;
|
||||||
|
|
||||||
|
if (!opt.empty()) {
|
||||||
|
const size_t numOpts = ARRAY_LENGTH(xcompile_options);
|
||||||
|
for (size_t i = 0; i < numOpts; i++) {
|
||||||
|
if (opt.compare(xcompile_options[i].name) == 0) {
|
||||||
|
DEBUG_PRINTF("found opt %zu:%llu\n", i,
|
||||||
|
xcompile_options[i].cpu_features);
|
||||||
|
rv.cpu_features = xcompile_options[i].cpu_features;
|
||||||
|
found_mode = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!found_mode) {
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
DEBUG_PRINTF("cpu_features %llx\n", rv.cpu_features);
|
||||||
|
return ue2::make_unique<hs_platform_info>(rv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
string to_string(const hs_platform_info &p) {
|
||||||
|
ostringstream out;
|
||||||
|
if (p.tune) {
|
||||||
|
out << p.tune;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p.cpu_features) {
|
||||||
|
u64a features = p.cpu_features;
|
||||||
|
if (features & HS_CPU_FEATURES_AVX2) {
|
||||||
|
out << " avx2";
|
||||||
|
features &= ~HS_CPU_FEATURES_AVX2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (features) {
|
||||||
|
out << " " << "?cpu_features?:" << features;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return out.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
string xcompileUsage(void) {
|
||||||
|
string variants = "Instruction set options: ";
|
||||||
|
const size_t numOpts = ARRAY_LENGTH(xcompile_options);
|
||||||
|
for (size_t i = 0; i < numOpts; i++) {
|
||||||
|
variants += xcompile_options[i].name;
|
||||||
|
if (i + 1 != numOpts) {
|
||||||
|
variants += ", ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return variants;
|
||||||
|
}
|
42
util/cross_compile.h
Normal file
42
util/cross_compile.h
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CROSS_COMPILE_H
|
||||||
|
#define CROSS_COMPILE_H
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct hs_platform_info;
|
||||||
|
|
||||||
|
std::unique_ptr<hs_platform_info> xcompileReadMode(const char *s);
|
||||||
|
std::string xcompileUsage(void);
|
||||||
|
|
||||||
|
std::string to_string(const hs_platform_info &p);
|
||||||
|
|
||||||
|
#endif /* CROSS_COMPILE_H */
|
155
util/database_util.cpp
Normal file
155
util/database_util.cpp
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "database_util.h"
|
||||||
|
|
||||||
|
#include "hs_common.h"
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#if defined(HAVE_MMAP)
|
||||||
|
#include <sys/mman.h> // for mmap
|
||||||
|
#include <unistd.h> // for close
|
||||||
|
#include <sys/fcntl.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
bool saveDatabase(const hs_database_t *db, const char *filename, bool verbose) {
|
||||||
|
assert(db);
|
||||||
|
assert(filename);
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
|
cout << "Saving database to: " << filename << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *bytes = nullptr;
|
||||||
|
size_t length = 0;
|
||||||
|
hs_error_t err = hs_serialize_database(db, &bytes, &length);
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(bytes);
|
||||||
|
assert(length > 0);
|
||||||
|
|
||||||
|
ofstream out(filename, ios::binary);
|
||||||
|
out.write(bytes, length);
|
||||||
|
out.close();
|
||||||
|
|
||||||
|
::free(bytes);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
hs_database_t * loadDatabase(const char *filename, bool verbose) {
|
||||||
|
assert(filename);
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
|
cout << "Loading database from: " << filename << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *bytes = nullptr;
|
||||||
|
|
||||||
|
#if defined(HAVE_MMAP)
|
||||||
|
// Use mmap to read the file
|
||||||
|
int fd = open(filename, O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
struct stat st;
|
||||||
|
if (fstat(fd, &st) < 0) {
|
||||||
|
close(fd);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
size_t len = st.st_size;
|
||||||
|
|
||||||
|
bytes = (char *)mmap(nullptr, len, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
|
if (bytes == MAP_FAILED) {
|
||||||
|
cout << "mmap failed" << endl;
|
||||||
|
close(fd);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// Fall back on stream IO
|
||||||
|
ifstream is;
|
||||||
|
is.open(filename, ios::in | ios::binary);
|
||||||
|
if (!is.is_open()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
is.seekg(0, ios::end);
|
||||||
|
size_t len = is.tellg();
|
||||||
|
if (verbose) {
|
||||||
|
cout << "Reading " << len << " bytes" << endl;
|
||||||
|
}
|
||||||
|
is.seekg(0, ios::beg);
|
||||||
|
bytes = new char[len];
|
||||||
|
is.read(bytes, len);
|
||||||
|
is.close();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
assert(bytes);
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
|
char *info = nullptr;
|
||||||
|
hs_error_t err = hs_serialized_database_info(bytes, len, &info);
|
||||||
|
if (err) {
|
||||||
|
cout << "Unable to decode serialized database info: " << err
|
||||||
|
<< endl;
|
||||||
|
} else if (info) {
|
||||||
|
cout << "Serialized database info: " << info << endl;
|
||||||
|
std::free(info);
|
||||||
|
} else {
|
||||||
|
cout << "Unable to decode serialized database info." << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
hs_database_t *db = nullptr;
|
||||||
|
hs_error_t err = hs_deserialize_database(bytes, len, &db);
|
||||||
|
|
||||||
|
#if defined(HAVE_MMAP)
|
||||||
|
munmap(bytes, len);
|
||||||
|
close(fd);
|
||||||
|
#else
|
||||||
|
delete [] bytes;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (err != HS_SUCCESS) {
|
||||||
|
cout << "hs_deserialize_database call failed: " << err << endl;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(db);
|
||||||
|
|
||||||
|
return db;
|
||||||
|
}
|
39
util/database_util.h
Normal file
39
util/database_util.h
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef DATABASE_UTIL_H
|
||||||
|
#define DATABASE_UTIL_H
|
||||||
|
|
||||||
|
struct hs_database;
|
||||||
|
|
||||||
|
bool saveDatabase(const hs_database *db, const char *filename,
|
||||||
|
bool verbose = false);
|
||||||
|
|
||||||
|
hs_database *loadDatabase(const char *filename, bool verbose = false);
|
||||||
|
|
||||||
|
#endif /* DATABASE_UTIL_H */
|
107
util/expression_path.h
Normal file
107
util/expression_path.h
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015-2016, Intel Corporation
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef EXPRESSION_PATH_H
|
||||||
|
#define EXPRESSION_PATH_H
|
||||||
|
|
||||||
|
#include "ue2common.h"
|
||||||
|
|
||||||
|
#include <cerrno>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#if !defined(_WIN32)
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <libgen.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Utility functions
|
||||||
|
//
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a path to a signature file, infer the path of the pcre directory.
|
||||||
|
*/
|
||||||
|
static inline
|
||||||
|
std::string inferExpressionPath(const std::string &sigFile) {
|
||||||
|
#ifndef _WIN32
|
||||||
|
// POSIX variant.
|
||||||
|
|
||||||
|
// dirname() may modify its argument, so we must make a copy.
|
||||||
|
std::vector<char> path(sigFile.size() + 1);
|
||||||
|
memcpy(path.data(), sigFile.c_str(), sigFile.size());
|
||||||
|
path[sigFile.size()] = 0; // ensure null termination.
|
||||||
|
|
||||||
|
std::string rv = dirname(path.data());
|
||||||
|
#else
|
||||||
|
// Windows variant.
|
||||||
|
if (sigFile.size() >= _MAX_DIR) {
|
||||||
|
return std::string();
|
||||||
|
}
|
||||||
|
char path[_MAX_DIR];
|
||||||
|
_splitpath(sigFile.c_str(), nullptr, path, nullptr, nullptr);
|
||||||
|
std::string rv(path);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
rv += "/../pcre";
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#define stat _stat
|
||||||
|
#define S_IFREG _S_IFREG
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline
|
||||||
|
bool isDir(const std::string &filename) {
|
||||||
|
struct stat s;
|
||||||
|
|
||||||
|
if (stat(filename.c_str(), &s) == -1) {
|
||||||
|
std::cerr << "stat: " << strerror(errno) << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (S_IFDIR & s.st_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline
|
||||||
|
bool isFile(const std::string &filename) {
|
||||||
|
struct stat s;
|
||||||
|
|
||||||
|
if (stat(filename.c_str(), &s) == -1) {
|
||||||
|
std::cerr << "stat: " << strerror(errno) << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (S_IFREG & s.st_mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* EXPRESSION_PATH_H */
|
Loading…
x
Reference in New Issue
Block a user