chimera: hybrid of Hyperscan and PCRE

This commit is contained in:
Wang, Xiang W
2018-03-09 03:52:12 -05:00
parent 8a1c497f44
commit bf87f8c003
47 changed files with 6985 additions and 202 deletions

View File

@@ -31,6 +31,8 @@ SET(hsbench_SOURCES
common.h
data_corpus.cpp
data_corpus.h
engine.cpp
engine.h
engine_hyperscan.cpp
engine_hyperscan.h
heapstats.cpp
@@ -45,6 +47,23 @@ SET(hsbench_SOURCES
timer.h
)
if (BUILD_CHIMERA)
add_definitions(-DHS_HYBRID)
SET(hsbench_SOURCES
${hsbench_SOURCES}
engine_chimera.cpp
engine_chimera.h
engine_pcre.cpp
engine_pcre.h
)
endif()
add_executable(hsbench ${hsbench_SOURCES})
target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS}
${CMAKE_THREAD_LIBS_INIT})
if (BUILD_CHIMERA)
include_directories(${PCRE_INCLUDE_DIRS})
target_link_libraries(hsbench hs chimera ${PCRE_LDFLAGS} databaseutil
expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
else()
target_link_libraries(hsbench hs databaseutil expressionutil
${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
endif()

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2017, Intel Corporation
* Copyright (c) 2016-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -42,6 +42,12 @@ extern bool forceEditDistance;
extern unsigned editDistance;
extern bool printCompressSize;
/** Structure for the result of a single complete scan. */
struct ResultEntry {
double seconds = 0; //!< Time taken for scan.
unsigned int matches = 0; //!< Count of matches found.
};
struct SqlFailure {
explicit SqlFailure(const std::string &s) : message(s) {}
std::string message;

35
tools/hsbench/engine.cpp Normal file
View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "engine.h"
EngineContext::~EngineContext() { }
EngineStream::~EngineStream() { }
Engine::~Engine() { }

94
tools/hsbench/engine.h Normal file
View File

@@ -0,0 +1,94 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ENGINE_H
#define ENGINE_H
#include "common.h"
#include "sqldb.h"
#include <memory>
#include <string>
#include <vector>
#include <boost/core/noncopyable.hpp>
// Engines have an engine context which is allocated on a per-thread basis.
class EngineContext : boost::noncopyable {
public:
virtual ~EngineContext();
};
/** Streaming mode scans have persistent stream state associated with them. */
class EngineStream : boost::noncopyable {
public:
virtual ~EngineStream();
unsigned int sn;
};
// Benchmarking engine
class Engine : boost::noncopyable {
public:
virtual ~Engine();
// allocate an EngineContext
virtual std::unique_ptr<EngineContext> makeContext() const = 0;
// non-streaming scan
virtual void scan(const char *data, unsigned len, unsigned blockId,
ResultEntry &results, EngineContext &ectx) const = 0;
// vectoring scan
virtual void scan_vectored(const char *const *data,
const unsigned int *len, unsigned int count,
unsigned int streamId, ResultEntry &result,
EngineContext &ectx) const = 0;
// stream open
virtual std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
unsigned id) const = 0;
// stream close
virtual void streamClose(std::unique_ptr<EngineStream> stream,
ResultEntry &result) const = 0;
// stream compress and expand
virtual void streamCompressExpand(EngineStream &stream,
std::vector<char> &temp) const = 0;
// streaming scan
virtual void streamScan(EngineStream &stream, const char *data,
unsigned int len, unsigned int id,
ResultEntry &result) const = 0;
virtual void printStats() const = 0;
virtual void sqlStats(SqlDB &db) const = 0;
};
#endif // ENGINE_H

View File

@@ -0,0 +1,314 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "ExpressionParser.h"
#include "common.h"
#include "engine_chimera.h"
#include "expressions.h"
#include "heapstats.h"
#include "sqldb.h"
#include "timer.h"
#include "chimera/ch_database.h"
#include "util/make_unique.h"
using namespace std;
EngineCHContext::EngineCHContext(const ch_database_t *db) {
ch_alloc_scratch(db, &scratch);
assert(scratch);
}
EngineCHContext::~EngineCHContext() {
ch_free_scratch(scratch);
}
namespace /* anonymous */ {
/** Scan context structure passed to the onMatch callback function. */
struct ScanCHContext {
ScanCHContext(unsigned id_in, ResultEntry &result_in)
: id(id_in), result(result_in) {}
unsigned id;
ResultEntry &result;
};
} // namespace
/**
* Callback function called for every match that Chimera produces, used when
* "echo matches" is off.
*/
static
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
unsigned int, const ch_capture_t *, void *ctx) {
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
assert(sc);
sc->result.matches++;
return 0;
}
/**
* Callback function called for every match that Chimera produces when "echo
* matches" is enabled.
*/
static
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
unsigned int, unsigned int, const ch_capture_t *, void *ctx) {
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
assert(sc);
sc->result.matches++;
printf("Match @%u:%llu for %u\n", sc->id, to, id);
return 0;
}
EngineChimera::EngineChimera(ch_database_t *db_in, CompileCHStats cs)
: db(db_in), compile_stats(move(cs)) {
assert(db);
}
EngineChimera::~EngineChimera() {
ch_free_database(db);
}
unique_ptr<EngineContext> EngineChimera::makeContext() const {
return ue2::make_unique<EngineCHContext>(db);
}
void EngineChimera::scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ectx) const {
assert(data);
auto &ctx = static_cast<EngineCHContext &>(ectx);
ScanCHContext sc(id, result);
auto callback = echo_matches ? onMatchEcho : onMatch;
ch_error_t rv = ch_scan(db, data, len, 0, ctx.scratch, callback, nullptr,
&sc);
if (rv != CH_SUCCESS) {
printf("Fatal error: ch_scan returned error %d\n", rv);
abort();
}
}
// vectoring scan
void EngineChimera::scan_vectored(UNUSED const char *const *data,
UNUSED const unsigned int *len,
UNUSED unsigned int count,
UNUSED unsigned int streamId,
UNUSED ResultEntry &result,
UNUSED EngineContext &ectx) const {
printf("Hybrid matcher can't support vectored mode.\n");
abort();
}
unique_ptr<EngineStream> EngineChimera::streamOpen(UNUSED EngineContext &ectx,
UNUSED unsigned id) const {
printf("Hybrid matcher can't stream.\n");
abort();
}
void EngineChimera::streamClose(UNUSED unique_ptr<EngineStream> stream,
UNUSED ResultEntry &result) const {
printf("Hybrid matcher can't stream.\n");
abort();
}
void EngineChimera::streamScan(UNUSED EngineStream &stream,
UNUSED const char *data,
UNUSED unsigned len, UNUSED unsigned id,
UNUSED ResultEntry &result) const {
printf("Hybrid matcher can't stream.\n");
abort();
}
void EngineChimera::streamCompressExpand(UNUSED EngineStream &stream,
UNUSED vector<char> &temp) const {
printf("Hybrid matcher can't stream.\n");
abort();
}
void EngineChimera::printStats() const {
// Output summary information.
if (!compile_stats.sigs_name.empty()) {
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
}
printf("Signatures: %s\n", compile_stats.signatures.c_str());
printf("Chimera info: %s\n", compile_stats.db_info.c_str());
printf("Expression count: %'zu\n", compile_stats.expressionCount);
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
printf("Database CRC: 0x%x\n", compile_stats.crc32);
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
}
void EngineChimera::sqlStats(SqlDB &sqldb) const {
ostringstream crc;
crc << "0x" << hex << compile_stats.crc32;
static const string Q =
"INSERT INTO Compile ("
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
"scratchSize, compileSecs, peakMemory) "
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
compile_stats.db_info, compile_stats.expressionCount,
compile_stats.compiledSize, crc.str(),
compile_stats.scratchSize, compile_stats.compileSecs,
compile_stats.peakMemorySize);
}
unique_ptr<EngineChimera>
buildEngineChimera(const ExpressionMap &expressions, const string &name,
const string &sigs_name) {
if (expressions.empty()) {
assert(0);
return nullptr;
}
long double compileSecs = 0.0;
size_t compiledSize = 0.0;
size_t scratchSize = 0;
unsigned int peakMemorySize = 0;
string db_info;
ch_database_t *db;
ch_error_t err;
const unsigned int count = expressions.size();
vector<string> exprs;
vector<unsigned int> flags, ids;
vector<hs_expr_ext> ext;
for (const auto &m : expressions) {
string expr;
unsigned int f = 0;
hs_expr_ext extparam; // unused
extparam.flags = 0;
if (!readExpression(m.second, expr, &f, &extparam)) {
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
m.first);
return nullptr;
}
if (extparam.flags) {
printf("Error parsing PCRE with extended flags: %s (id %u)\n",
m.second.c_str(), m.first);
return nullptr;
}
exprs.push_back(expr);
ids.push_back(m.first);
flags.push_back(f);
}
// Our compiler takes an array of plain ol' C strings.
vector<const char *> patterns(count);
for (unsigned int i = 0; i < count; i++) {
patterns[i] = exprs[i].c_str();
}
Timer timer;
timer.start();
// Capture groups by default
unsigned int mode = CH_MODE_GROUPS;
ch_compile_error_t *compile_err;
err = ch_compile_multi(patterns.data(), flags.data(), ids.data(),
count, mode, nullptr, &db, &compile_err);
timer.complete();
compileSecs = timer.seconds();
peakMemorySize = getPeakHeap();
if (err == CH_COMPILER_ERROR) {
if (compile_err->expression >= 0) {
printf("Compile error for signature #%u: %s\n",
compile_err->expression, compile_err->message);
} else {
printf("Compile error: %s\n", compile_err->message);
}
ch_free_compile_error(compile_err);
return nullptr;
}
err = ch_database_size(db, &compiledSize);
if (err != CH_SUCCESS) {
return nullptr;
}
assert(compiledSize > 0);
char *info;
err = ch_database_info(db, &info);
if (err != CH_SUCCESS) {
return nullptr;
} else {
db_info = string(info);
free(info);
}
// Allocate scratch temporarily to find its size: this is a good test
// anyway.
ch_scratch_t *scratch = nullptr;
err = ch_alloc_scratch(db, &scratch);
if (err != HS_SUCCESS) {
return nullptr;
}
err = ch_scratch_size(scratch, &scratchSize);
if (err != CH_SUCCESS) {
return nullptr;
}
ch_free_scratch(scratch);
// Collect summary information.
CompileCHStats cs;
cs.sigs_name = sigs_name;
if (!sigs_name.empty()) {
const auto pos = name.find_last_of('/');
cs.signatures = name.substr(pos + 1);
} else {
cs.signatures = name;
}
cs.db_info = db_info;
cs.expressionCount = expressions.size();
cs.compiledSize = compiledSize;
cs.scratchSize = scratchSize;
cs.compileSecs = compileSecs;
cs.peakMemorySize = peakMemorySize;
return ue2::make_unique<EngineChimera>(db, move(cs));
}

View File

@@ -0,0 +1,103 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ENGINECHIMERA_H
#define ENGINECHIMERA_H
#include "expressions.h"
#include "engine.h"
#include "chimera/ch.h"
#include <memory>
#include <string>
#include <vector>
/** Infomation about the database compile */
struct CompileCHStats {
std::string sigs_name;
std::string signatures;
std::string db_info;
size_t expressionCount = 0;
size_t compiledSize = 0;
uint32_t crc32 = 0;
size_t scratchSize = 0;
long double compileSecs = 0;
unsigned int peakMemorySize = 0;
};
/** Engine context which is allocated on a per-thread basis. */
class EngineCHContext : public EngineContext{
public:
explicit EngineCHContext(const ch_database_t *db);
~EngineCHContext();
ch_scratch_t *scratch = nullptr;
};
/** Chimera Engine for scanning data. */
class EngineChimera : public Engine {
public:
explicit EngineChimera(ch_database_t *db, CompileCHStats cs);
~EngineChimera();
std::unique_ptr<EngineContext> makeContext() const;
void scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ectx) const;
void scan_vectored(const char *const *data, const unsigned int *len,
unsigned int count, unsigned int streamId,
ResultEntry &result, EngineContext &ectx) const;
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
unsigned id) const;
void streamClose(std::unique_ptr<EngineStream> stream,
ResultEntry &result) const;
void streamCompressExpand(EngineStream &stream,
std::vector<char> &temp) const;
void streamScan(EngineStream &stream, const char *data, unsigned int len,
unsigned int id, ResultEntry &result) const;
void printStats() const;
void sqlStats(SqlDB &db) const;
private:
ch_database_t *db;
CompileCHStats compile_stats;
};
std::unique_ptr<EngineChimera>
buildEngineChimera(const ExpressionMap &expressions, const std::string &name,
const std::string &sigs_name);
#endif // ENGINECHIMERA_H

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2017, Intel Corporation
* Copyright (c) 2016-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -57,20 +57,22 @@
using namespace std;
EngineContext::EngineContext(const hs_database_t *db) {
EngineHSContext::EngineHSContext(const hs_database_t *db) {
hs_alloc_scratch(db, &scratch);
assert(scratch);
}
EngineContext::~EngineContext() {
EngineHSContext::~EngineHSContext() {
hs_free_scratch(scratch);
}
EngineHSStream::~EngineHSStream() { }
namespace /* anonymous */ {
/** Scan context structure passed to the onMatch callback function. */
struct ScanContext {
ScanContext(unsigned id_in, ResultEntry &result_in,
struct ScanHSContext {
ScanHSContext(unsigned id_in, ResultEntry &result_in,
const EngineStream *stream_in)
: id(id_in), result(result_in), stream(stream_in) {}
unsigned id;
@@ -87,7 +89,7 @@ struct ScanContext {
static
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
void *ctx) {
ScanContext *sc = static_cast<ScanContext *>(ctx);
ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
assert(sc);
sc->result.matches++;
@@ -101,7 +103,7 @@ int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
static
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
unsigned int, void *ctx) {
ScanContext *sc = static_cast<ScanContext *>(ctx);
ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
assert(sc);
sc->result.matches++;
@@ -114,7 +116,7 @@ int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
return 0;
}
EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileStats cs)
EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileHSStats cs)
: db(db_in), compile_stats(std::move(cs)) {
assert(db);
}
@@ -124,14 +126,15 @@ EngineHyperscan::~EngineHyperscan() {
}
unique_ptr<EngineContext> EngineHyperscan::makeContext() const {
return ue2::make_unique<EngineContext>(db);
return ue2::make_unique<EngineHSContext>(db);
}
void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ctx) const {
ResultEntry &result, EngineContext &ectx) const {
assert(data);
ScanContext sc(id, result, nullptr);
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
ScanHSContext sc(id, result, nullptr);
auto callback = echo_matches ? onMatchEcho : onMatch;
hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc);
@@ -144,11 +147,12 @@ void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
void EngineHyperscan::scan_vectored(const char *const *data,
const unsigned int *len, unsigned int count,
unsigned streamId, ResultEntry &result,
EngineContext &ctx) const {
EngineContext &ectx) const {
assert(data);
assert(len);
ScanContext sc(streamId, result, nullptr);
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
ScanHSContext sc(streamId, result, nullptr);
auto callback = echo_matches ? onMatchEcho : onMatch;
hs_error_t rv =
hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc);
@@ -159,9 +163,10 @@ void EngineHyperscan::scan_vectored(const char *const *data,
}
}
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ectx,
unsigned streamId) const {
auto stream = ue2::make_unique<EngineStream>();
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
auto stream = ue2::make_unique<EngineHSStream>();
stream->ctx = &ctx;
hs_open_stream(db, 0, &stream->id);
@@ -170,17 +175,18 @@ unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
return nullptr;
}
stream->sn = streamId;
return stream;
return move(stream);
}
void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream,
ResultEntry &result) const {
assert(stream);
auto &s = static_cast<EngineStream &>(*stream);
EngineContext &ctx = *s.ctx;
auto &s = static_cast<EngineHSStream &>(*stream);
EngineContext &ectx = *s.ctx;
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
ScanContext sc(0, result, &s);
ScanHSContext sc(0, result, &s);
auto callback = echo_matches ? onMatchEcho : onMatch;
assert(s.id);
@@ -193,10 +199,10 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
ResultEntry &result) const {
assert(data);
auto &s = static_cast<EngineStream &>(stream);
EngineContext &ctx = *s.ctx;
auto &s = static_cast<EngineHSStream &>(stream);
EngineHSContext &ctx = *s.ctx;
ScanContext sc(id, result, &s);
ScanHSContext sc(id, result, &s);
auto callback = echo_matches ? onMatchEcho : onMatch;
hs_error_t rv =
hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc);
@@ -210,11 +216,12 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
void EngineHyperscan::streamCompressExpand(EngineStream &stream,
vector<char> &temp) const {
size_t used = 0;
hs_error_t err = hs_compress_stream(stream.id, temp.data(), temp.size(),
auto &s = static_cast<EngineHSStream &>(stream);
hs_error_t err = hs_compress_stream(s.id, temp.data(), temp.size(),
&used);
if (err == HS_INSUFFICIENT_SPACE) {
temp.resize(used);
err = hs_compress_stream(stream.id, temp.data(), temp.size(), &used);
err = hs_compress_stream(s.id, temp.data(), temp.size(), &used);
}
if (err != HS_SUCCESS) {
@@ -223,10 +230,10 @@ void EngineHyperscan::streamCompressExpand(EngineStream &stream,
}
if (printCompressSize) {
printf("stream %u: compressed to %zu\n", stream.sn, used);
printf("stream %u: compressed to %zu\n", s.sn, used);
}
err = hs_reset_and_expand_stream(stream.id, temp.data(), temp.size(),
err = hs_reset_and_expand_stream(s.id, temp.data(), temp.size(),
nullptr, nullptr, nullptr);
if (err != HS_SUCCESS) {
@@ -469,7 +476,7 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
hs_free_scratch(scratch);
// Collect summary information.
CompileStats cs;
CompileHSStats cs;
cs.sigs_name = sigs_name;
if (!sigs_name.empty()) {
const auto pos = name.find_last_of('/');

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2017, Intel Corporation
* Copyright (c) 2016-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -30,22 +30,15 @@
#define ENGINEHYPERSCAN_H
#include "expressions.h"
#include "common.h"
#include "sqldb.h"
#include "engine.h"
#include "hs_runtime.h"
#include <memory>
#include <string>
#include <vector>
/** Structure for the result of a single complete scan. */
struct ResultEntry {
double seconds = 0; //!< Time taken for scan.
unsigned int matches = 0; //!< Count of matches found.
};
/** Infomation about the database compile */
struct CompileStats {
struct CompileHSStats {
std::string sigs_name;
std::string signatures;
std::string db_info;
@@ -60,38 +53,38 @@ struct CompileStats {
};
/** Engine context which is allocated on a per-thread basis. */
class EngineContext {
class EngineHSContext : public EngineContext {
public:
explicit EngineContext(const hs_database_t *db);
~EngineContext();
explicit EngineHSContext(const hs_database_t *db);
~EngineHSContext();
hs_scratch_t *scratch = nullptr;
};
/** Streaming mode scans have persistent stream state associated with them. */
class EngineStream {
class EngineHSStream : public EngineStream {
public:
~EngineHSStream();
hs_stream_t *id;
unsigned int sn;
EngineContext *ctx;
EngineHSContext *ctx;
};
/** Hyperscan Engine for scanning data. */
class EngineHyperscan {
class EngineHyperscan : public Engine {
public:
explicit EngineHyperscan(hs_database_t *db, CompileStats cs);
explicit EngineHyperscan(hs_database_t *db, CompileHSStats cs);
~EngineHyperscan();
std::unique_ptr<EngineContext> makeContext() const;
void scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ctx) const;
ResultEntry &result, EngineContext &ectx) const;
void scan_vectored(const char *const *data, const unsigned int *len,
unsigned int count, unsigned int streamId,
ResultEntry &result, EngineContext &ctx) const;
ResultEntry &result, EngineContext &ectx) const;
std::unique_ptr<EngineStream> streamOpen(EngineContext &ctx,
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
unsigned id) const;
void streamClose(std::unique_ptr<EngineStream> stream,
@@ -109,7 +102,7 @@ public:
private:
hs_database_t *db;
CompileStats compile_stats;
CompileHSStats compile_stats;
};
namespace ue2 {

View File

@@ -0,0 +1,388 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "common.h"
#include "engine_pcre.h"
#include "heapstats.h"
#include "huge.h"
#include "sqldb.h"
#include "timer.h"
#include "util/make_unique.h"
#include "util/unicode_def.h"
using namespace std;
EnginePCREContext::EnginePCREContext(int capture_cnt) {
ovec = (int *)malloc((capture_cnt + 1)* sizeof(int) * 3);
}
EnginePCREContext::~EnginePCREContext() {
free(ovec);
}
namespace /* anonymous */ {
/** Scan context structure passed to the onMatch callback function. */
struct ScanPCREContext {
ScanPCREContext(unsigned id_in, ResultEntry &result_in)
: id(id_in), result(result_in) {}
unsigned id;
ResultEntry &result;
};
} // namespace
/**
* Function called for every match that PCRE produces, used when
* "echo matches" is off.
*/
static
int onMatch(ScanPCREContext *sc) {
assert(sc);
sc->result.matches++;
return 0;
}
/**
* Function called for every match that PCRE produces when "echo
* matches" is enabled.
*/
static
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
ScanPCREContext *sc) {
assert(sc);
sc->result.matches++;
printf("Match @%u:%llu for %u\n", sc->id, to, id);
return 0;
}
EnginePCRE::EnginePCRE(vector<unique_ptr<PcreDB>> dbs_in, CompilePCREStats cs,
int capture_cnt_in)
: dbs(move(dbs_in)), compile_stats(move(cs)),
capture_cnt(capture_cnt_in) {}
EnginePCRE::~EnginePCRE() {
for (auto &pcreDB : dbs) {
free(pcreDB->extra);
free(pcreDB->db);
}
}
unique_ptr<EngineContext> EnginePCRE::makeContext() const {
return ue2::make_unique<EnginePCREContext>(capture_cnt);
}
void EnginePCRE::scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ectx) const {
assert(data);
ScanPCREContext sc(id, result);
auto &ctx = static_cast<EnginePCREContext &>(ectx);
int *ovec = ctx.ovec;
int ovec_size = (capture_cnt + 1) * 3;
for (const auto &pcreDB : dbs) {
int startoffset = 0;
bool utf8 = pcreDB->utf8;
bool highlander = pcreDB->highlander;
int flags = 0;
int ret;
do {
ret = pcre_exec(pcreDB->db, pcreDB->extra, data, len,
startoffset, flags, ovec, ovec_size);
if (ret <= PCRE_ERROR_NOMATCH) {
break;
}
int from = ovec[0];
int to = ovec[1];
assert(from <= to);
if (echo_matches) {
onMatchEcho(pcreDB->id, from, to, &sc);
} else {
onMatch(&sc);
}
// If we only wanted a single match, we're done.
if (highlander) {
break;
}
// Next scan starts at the first codepoint after the match. It's
// possible that we have a vacuous match, in which case we must step
// past it to ensure that we always progress.
if (from != to) {
startoffset = to;
} else if (utf8) {
startoffset = to + 1;
while (startoffset < (int)len &&
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
++startoffset;
}
} else {
startoffset = to + 1;
}
} while (startoffset <= (int)len);
if (ret < PCRE_ERROR_NOMATCH) {
printf("Fatal error: pcre returned error %d\n", ret);
abort();
}
}
}
// vectoring scan
void EnginePCRE::scan_vectored(UNUSED const char *const *data,
UNUSED const unsigned int *len,
UNUSED unsigned int count,
UNUSED unsigned int streamId,
UNUSED ResultEntry &result,
UNUSED EngineContext &ectx) const {
printf("PCRE matcher can't support vectored mode.\n");
abort();
}
unique_ptr<EngineStream> EnginePCRE::streamOpen(UNUSED EngineContext &ectx,
UNUSED unsigned id) const {
printf("PCRE matcher can't stream.\n");
abort();
}
void EnginePCRE::streamClose(UNUSED unique_ptr<EngineStream> stream,
UNUSED ResultEntry &result) const {
printf("PCRE matcher can't stream.\n");
abort();
}
void EnginePCRE::streamScan(UNUSED EngineStream &stream,
UNUSED const char *data,
UNUSED unsigned len, UNUSED unsigned id,
UNUSED ResultEntry &result) const {
printf("PCRE matcher can't stream.\n");
abort();
}
void EnginePCRE::streamCompressExpand(UNUSED EngineStream &stream,
UNUSED vector<char> &temp) const {
printf("PCRE matcher can't stream.\n");
abort();
}
void EnginePCRE::printStats() const {
// Output summary information.
if (!compile_stats.sigs_name.empty()) {
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
}
printf("Signatures: %s\n", compile_stats.signatures.c_str());
printf("PCRE info: %s\n", compile_stats.db_info.c_str());
printf("Expression count: %'zu\n", compile_stats.expressionCount);
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
}
void EnginePCRE::sqlStats(SqlDB &sqldb) const {
ostringstream crc;
static const string Q =
"INSERT INTO Compile ("
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
"scratchSize, compileSecs, peakMemory) "
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
compile_stats.db_info, compile_stats.expressionCount,
compile_stats.compiledSize, crc.str(),
compile_stats.scratchSize, compile_stats.compileSecs,
compile_stats.peakMemorySize);
}
static
bool decodeExprPCRE(string &expr, unsigned *flags, struct PcreDB &db) {
if (expr[0] != '/') {
return false;
}
size_t end = expr.find_last_of('/');
if (end == string::npos) {
return false;
}
string strFlags = expr.substr(end + 1, expr.length() - end - 1);
// strip starting and trailing slashes and the flags
expr.erase(end, expr.length() - end);
expr.erase(0, 1);
// decode the flags
*flags = 0;
for (size_t i = 0; i != strFlags.length(); ++i) {
switch (strFlags[i]) {
case 's':
*flags |= PCRE_DOTALL;
break;
case 'm':
*flags |= PCRE_MULTILINE;
break;
case 'i':
*flags |= PCRE_CASELESS;
break;
case '8':
*flags |= PCRE_UTF8;
db.utf8 = true;
break;
case 'W':
*flags |= PCRE_UCP;
break;
case 'H':
db.highlander = true;
break;
default:
return false;
}
}
return true;
}
unique_ptr<EnginePCRE>
buildEnginePcre(const ExpressionMap &expressions, const string &name,
const string &sigs_name) {
if (expressions.empty()) {
assert(0);
return nullptr;
}
long double compileSecs = 0.0;
size_t compiledSize = 0.0;
unsigned int peakMemorySize = 0;
string db_info("Version: ");
db_info += string(pcre_version());
vector<unique_ptr<PcreDB>> dbs;
int capture_cnt = 0;
Timer timer;
timer.start();
for (const auto &m : expressions) {
string expr(m.second);
unsigned int flags = 0;
auto pcreDB = ue2::make_unique<PcreDB>();
if (!decodeExprPCRE(expr, &flags, *pcreDB)) {
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
m.first);
return nullptr;
}
const char *errp;
int erro;
pcre *db = pcre_compile(expr.c_str(), flags, &errp, &erro, NULL);
if (!db) {
printf("Compile error %s\n", errp);
return nullptr;
}
pcre_extra *extra = pcre_study(db, PCRE_STUDY_JIT_COMPILE, &errp);
if (errp) {
printf("PCRE could not be studied: %s\n", errp);
return nullptr;
}
if (!extra) {
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
}
int cap = 0; // PCRE_INFO_CAPTURECOUNT demands an int
if (pcre_fullinfo(db, extra, PCRE_INFO_CAPTURECOUNT, &cap)) {
printf("PCRE fullinfo error\n");
free(extra);
free(db);
return nullptr;
}
assert(cap >= 0);
capture_cnt = max(capture_cnt, cap);
size_t db_size = 0;
if (pcre_fullinfo(db, extra, PCRE_INFO_SIZE, &db_size)) {
printf("PCRE fullinfo error\n");
free(extra);
free(db);
return nullptr;
}
size_t study_size = 0;
if (pcre_fullinfo(db, extra, PCRE_INFO_STUDYSIZE,
&study_size)) {
printf("PCRE fullinfo error\n");
free(extra);
free(db);
return nullptr;
}
compiledSize += db_size + study_size;
pcreDB->id = m.first;
pcreDB->db = db;
extra->flags =
PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
extra->match_limit = 10000000;
extra->match_limit_recursion = 1500;
pcreDB->extra = extra;
dbs.push_back(move(pcreDB));
}
timer.complete();
compileSecs = timer.seconds();
peakMemorySize = getPeakHeap();
// Collect summary information.
CompilePCREStats cs;
cs.sigs_name = sigs_name;
if (!sigs_name.empty()) {
const auto pos = name.find_last_of('/');
cs.signatures = name.substr(pos + 1);
} else {
cs.signatures = name;
}
cs.db_info = db_info;
cs.expressionCount = expressions.size();
cs.compiledSize = compiledSize;
cs.scratchSize = (capture_cnt + 1) * sizeof(int) * 3;
cs.compileSecs = compileSecs;
cs.peakMemorySize = peakMemorySize;
return ue2::make_unique<EnginePCRE>(move(dbs), move(cs), capture_cnt);
}

114
tools/hsbench/engine_pcre.h Normal file
View File

@@ -0,0 +1,114 @@
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef ENGINEPCRE_H
#define ENGINEPCRE_H
#include "expressions.h"
#include "engine.h"
#include <pcre.h>
#include <memory>
#include <string>
#include <vector>
/** Infomation about the database compile */
struct CompilePCREStats {
std::string sigs_name;
std::string signatures;
std::string db_info;
size_t expressionCount = 0;
size_t compiledSize = 0;
size_t scratchSize = 0;
long double compileSecs = 0;
unsigned int peakMemorySize = 0;
};
/** Engine context which is allocated on a per-thread basis. */
class EnginePCREContext : public EngineContext{
public:
explicit EnginePCREContext(int capture_cnt);
~EnginePCREContext();
int *ovec = nullptr;
};
struct PcreDB {
bool highlander = false;
bool utf8 = false;
u32 id;
pcre *db = nullptr;
pcre_extra *extra = nullptr;
};
/** PCRE Engine for scanning data. */
class EnginePCRE : public Engine {
public:
explicit EnginePCRE(std::vector<std::unique_ptr<PcreDB>> dbs_in,
CompilePCREStats cs, int capture_cnt_in);
~EnginePCRE();
std::unique_ptr<EngineContext> makeContext() const;
void scan(const char *data, unsigned int len, unsigned int id,
ResultEntry &result, EngineContext &ectx) const;
void scan_vectored(const char *const *data, const unsigned int *len,
unsigned int count, unsigned int streamId,
ResultEntry &result, EngineContext &ectx) const;
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
unsigned id) const;
void streamClose(std::unique_ptr<EngineStream> stream,
ResultEntry &result) const;
void streamCompressExpand(EngineStream &stream,
std::vector<char> &temp) const;
void streamScan(EngineStream &stream, const char *data, unsigned int len,
unsigned int id, ResultEntry &result) const;
void printStats() const;
void sqlStats(SqlDB &db) const;
private:
std::vector<std::unique_ptr<PcreDB>> dbs;
CompilePCREStats compile_stats;
int capture_cnt;
};
std::unique_ptr<EnginePCRE>
buildEnginePcre(const ExpressionMap &expressions, const std::string &name,
const std::string &sigs_name);
#endif // ENGINEPCRE_H

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2017, Intel Corporation
* Copyright (c) 2016-2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -31,6 +31,10 @@
#include "common.h"
#include "data_corpus.h"
#include "engine_hyperscan.h"
#if defined(HS_HYBRID)
#include "engine_chimera.h"
#include "engine_pcre.h"
#endif
#include "expressions.h"
#include "sqldb.h"
#include "thread_barrier.h"
@@ -87,6 +91,8 @@ namespace /* anonymous */ {
bool display_per_scan = false;
ScanMode scan_mode = ScanMode::STREAMING;
bool useHybrid = false;
bool usePcre = false;
unsigned repeats = 20;
string exprPath("");
string corpusFile("");
@@ -102,7 +108,7 @@ typedef void (*thread_func_t)(void *context);
class ThreadContext : boost::noncopyable {
public:
ThreadContext(unsigned num_in, const EngineHyperscan &db_in,
ThreadContext(unsigned num_in, const Engine &db_in,
thread_barrier &tb_in, thread_func_t function_in,
vector<DataBlock> corpus_data_in)
: num(num_in), results(repeats), engine(db_in),
@@ -155,7 +161,7 @@ public:
unsigned num;
Timer timer;
vector<ResultEntry> results;
const EngineHyperscan &engine;
const Engine &engine;
unique_ptr<EngineContext> enginectx;
vector<DataBlock> corpus_data;
@@ -181,6 +187,10 @@ void usage(const char *error) {
" (default: streaming).\n");
printf(" -V Benchmark in vectored mode"
" (default: streaming).\n");
#if defined(HS_HYBRID)
printf(" -H Benchmark using Chimera (if supported).\n");
printf(" -P Benchmark using PCRE (if supported).\n");
#endif
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n");
#endif
@@ -214,7 +224,7 @@ struct BenchmarkSigs {
static
void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
UNUSED unique_ptr<Grey> &grey) {
const char options[] = "-b:c:Cd:e:E:G:hi:n:No:p:sS:Vw:z:"
const char options[] = "-b:c:Cd:e:E:G:hHi:n:No:p:PsS:Vw:z:"
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
"T:" // add the thread flag
#endif
@@ -287,6 +297,14 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
usage(nullptr);
exit(0);
break;
case 'H':
#if defined(HS_HYBRID)
useHybrid = true;
#else
usage("Hybrid matcher not enabled in this build");
exit(1);
#endif
break;
case 'n':
if (!fromString(optarg, repeats) || repeats == 0) {
usage("Couldn't parse argument to -n flag, should be"
@@ -294,6 +312,14 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
exit(1);
}
break;
case 'P':
#if defined(HS_HYBRID)
usePcre = true;
#else
usage("PCRE matcher not enabled in this build");
exit(1);
#endif
break;
case 's':
in_sigfile = 2;
break;
@@ -399,6 +425,24 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
exit(1);
}
// Constraints on Chimera and PCRE engines
if (useHybrid || usePcre) {
if (useHybrid && usePcre) {
usage("Can't run both Chimera and PCRE.");
exit(1);
}
if (scan_mode != ScanMode::BLOCK) {
usage("Must specify block mode in Chimera or PCRE with "
"the -N option.");
exit(1);
}
if (forceEditDistance || loadDatabases || saveDatabases) {
usage("No extended options are supported in Chimera or PCRE.");
exit(1);
}
}
// Read in any -s signature sets.
for (const auto &file : sigFiles) {
SignatureSet sigs;
@@ -503,7 +547,7 @@ static
void benchStreamingInternal(ThreadContext *ctx, vector<StreamInfo> &streams,
bool do_compress) {
assert(ctx);
const EngineHyperscan &e = ctx->engine;
const Engine &e = ctx->engine;
const vector<DataBlock> &blocks = ctx->corpus_data;
vector<char> compress_buf(do_compress ? 1000 : 0);
@@ -812,7 +856,7 @@ void sqlResults(const vector<unique_ptr<ThreadContext>> &threads,
* the same copy of the data.
*/
static
unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
unique_ptr<ThreadContext> makeThreadContext(const Engine &db,
const vector<DataBlock> &blocks,
unsigned id,
thread_barrier &sync_barrier) {
@@ -839,7 +883,7 @@ unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
/** Run the given benchmark. */
static
void runBenchmark(const EngineHyperscan &db,
void runBenchmark(const Engine &db,
const vector<DataBlock> &corpus_blocks) {
size_t numThreads;
bool useAffinity = false;
@@ -936,8 +980,18 @@ int main(int argc, char *argv[]) {
continue;
}
auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name,
sigName, *grey);
unique_ptr<Engine> engine;
if (useHybrid) {
#if defined(HS_HYBRID)
engine = buildEngineChimera(exprMap, s.name, sigName);
} else if (usePcre) {
engine = buildEnginePcre(exprMap, s.name, sigName);
#endif
} else {
engine = buildEngineHyperscan(exprMap, scan_mode, s.name,
sigName, *grey);
}
if (!engine) {
printf("Error: expressions failed to compile.\n");
exit(1);