mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 19:24:25 +03:00
chimera: hybrid of Hyperscan and PCRE
This commit is contained in:
@@ -31,6 +31,8 @@ SET(hsbench_SOURCES
|
||||
common.h
|
||||
data_corpus.cpp
|
||||
data_corpus.h
|
||||
engine.cpp
|
||||
engine.h
|
||||
engine_hyperscan.cpp
|
||||
engine_hyperscan.h
|
||||
heapstats.cpp
|
||||
@@ -45,6 +47,23 @@ SET(hsbench_SOURCES
|
||||
timer.h
|
||||
)
|
||||
|
||||
if (BUILD_CHIMERA)
|
||||
add_definitions(-DHS_HYBRID)
|
||||
SET(hsbench_SOURCES
|
||||
${hsbench_SOURCES}
|
||||
engine_chimera.cpp
|
||||
engine_chimera.h
|
||||
engine_pcre.cpp
|
||||
engine_pcre.h
|
||||
)
|
||||
endif()
|
||||
|
||||
add_executable(hsbench ${hsbench_SOURCES})
|
||||
target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS}
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
if (BUILD_CHIMERA)
|
||||
include_directories(${PCRE_INCLUDE_DIRS})
|
||||
target_link_libraries(hsbench hs chimera ${PCRE_LDFLAGS} databaseutil
|
||||
expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
else()
|
||||
target_link_libraries(hsbench hs databaseutil expressionutil
|
||||
${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -42,6 +42,12 @@ extern bool forceEditDistance;
|
||||
extern unsigned editDistance;
|
||||
extern bool printCompressSize;
|
||||
|
||||
/** Structure for the result of a single complete scan. */
|
||||
struct ResultEntry {
|
||||
double seconds = 0; //!< Time taken for scan.
|
||||
unsigned int matches = 0; //!< Count of matches found.
|
||||
};
|
||||
|
||||
struct SqlFailure {
|
||||
explicit SqlFailure(const std::string &s) : message(s) {}
|
||||
std::string message;
|
||||
|
35
tools/hsbench/engine.cpp
Normal file
35
tools/hsbench/engine.cpp
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "engine.h"
|
||||
|
||||
EngineContext::~EngineContext() { }
|
||||
|
||||
EngineStream::~EngineStream() { }
|
||||
|
||||
Engine::~Engine() { }
|
94
tools/hsbench/engine.h
Normal file
94
tools/hsbench/engine.h
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINE_H
|
||||
#define ENGINE_H
|
||||
|
||||
#include "common.h"
|
||||
#include "sqldb.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
// Engines have an engine context which is allocated on a per-thread basis.
|
||||
class EngineContext : boost::noncopyable {
|
||||
public:
|
||||
virtual ~EngineContext();
|
||||
};
|
||||
|
||||
/** Streaming mode scans have persistent stream state associated with them. */
|
||||
class EngineStream : boost::noncopyable {
|
||||
public:
|
||||
virtual ~EngineStream();
|
||||
unsigned int sn;
|
||||
};
|
||||
|
||||
// Benchmarking engine
|
||||
class Engine : boost::noncopyable {
|
||||
public:
|
||||
virtual ~Engine();
|
||||
|
||||
// allocate an EngineContext
|
||||
virtual std::unique_ptr<EngineContext> makeContext() const = 0;
|
||||
|
||||
// non-streaming scan
|
||||
virtual void scan(const char *data, unsigned len, unsigned blockId,
|
||||
ResultEntry &results, EngineContext &ectx) const = 0;
|
||||
|
||||
// vectoring scan
|
||||
virtual void scan_vectored(const char *const *data,
|
||||
const unsigned int *len, unsigned int count,
|
||||
unsigned int streamId, ResultEntry &result,
|
||||
EngineContext &ectx) const = 0;
|
||||
|
||||
// stream open
|
||||
virtual std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const = 0;
|
||||
|
||||
// stream close
|
||||
virtual void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const = 0;
|
||||
|
||||
// stream compress and expand
|
||||
virtual void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const = 0;
|
||||
|
||||
// streaming scan
|
||||
virtual void streamScan(EngineStream &stream, const char *data,
|
||||
unsigned int len, unsigned int id,
|
||||
ResultEntry &result) const = 0;
|
||||
|
||||
virtual void printStats() const = 0;
|
||||
|
||||
virtual void sqlStats(SqlDB &db) const = 0;
|
||||
};
|
||||
|
||||
#endif // ENGINE_H
|
314
tools/hsbench/engine_chimera.cpp
Normal file
314
tools/hsbench/engine_chimera.cpp
Normal file
@@ -0,0 +1,314 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "ExpressionParser.h"
|
||||
#include "common.h"
|
||||
#include "engine_chimera.h"
|
||||
#include "expressions.h"
|
||||
#include "heapstats.h"
|
||||
#include "sqldb.h"
|
||||
#include "timer.h"
|
||||
|
||||
#include "chimera/ch_database.h"
|
||||
|
||||
#include "util/make_unique.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
EngineCHContext::EngineCHContext(const ch_database_t *db) {
|
||||
ch_alloc_scratch(db, &scratch);
|
||||
assert(scratch);
|
||||
}
|
||||
|
||||
EngineCHContext::~EngineCHContext() {
|
||||
ch_free_scratch(scratch);
|
||||
}
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanCHContext {
|
||||
ScanCHContext(unsigned id_in, ResultEntry &result_in)
|
||||
: id(id_in), result(result_in) {}
|
||||
unsigned id;
|
||||
ResultEntry &result;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Callback function called for every match that Chimera produces, used when
|
||||
* "echo matches" is off.
|
||||
*/
|
||||
static
|
||||
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||
unsigned int, const ch_capture_t *, void *ctx) {
|
||||
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function called for every match that Chimera produces when "echo
|
||||
* matches" is enabled.
|
||||
*/
|
||||
static
|
||||
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
unsigned int, unsigned int, const ch_capture_t *, void *ctx) {
|
||||
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
printf("Match @%u:%llu for %u\n", sc->id, to, id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
EngineChimera::EngineChimera(ch_database_t *db_in, CompileCHStats cs)
|
||||
: db(db_in), compile_stats(move(cs)) {
|
||||
assert(db);
|
||||
}
|
||||
|
||||
EngineChimera::~EngineChimera() {
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EngineChimera::makeContext() const {
|
||||
return ue2::make_unique<EngineCHContext>(db);
|
||||
}
|
||||
|
||||
void EngineChimera::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const {
|
||||
assert(data);
|
||||
|
||||
auto &ctx = static_cast<EngineCHContext &>(ectx);
|
||||
ScanCHContext sc(id, result);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
ch_error_t rv = ch_scan(db, data, len, 0, ctx.scratch, callback, nullptr,
|
||||
&sc);
|
||||
|
||||
if (rv != CH_SUCCESS) {
|
||||
printf("Fatal error: ch_scan returned error %d\n", rv);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// vectoring scan
|
||||
void EngineChimera::scan_vectored(UNUSED const char *const *data,
|
||||
UNUSED const unsigned int *len,
|
||||
UNUSED unsigned int count,
|
||||
UNUSED unsigned int streamId,
|
||||
UNUSED ResultEntry &result,
|
||||
UNUSED EngineContext &ectx) const {
|
||||
printf("Hybrid matcher can't support vectored mode.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EngineChimera::streamOpen(UNUSED EngineContext &ectx,
|
||||
UNUSED unsigned id) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::streamClose(UNUSED unique_ptr<EngineStream> stream,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::streamScan(UNUSED EngineStream &stream,
|
||||
UNUSED const char *data,
|
||||
UNUSED unsigned len, UNUSED unsigned id,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::streamCompressExpand(UNUSED EngineStream &stream,
|
||||
UNUSED vector<char> &temp) const {
|
||||
printf("Hybrid matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EngineChimera::printStats() const {
|
||||
// Output summary information.
|
||||
if (!compile_stats.sigs_name.empty()) {
|
||||
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
|
||||
}
|
||||
printf("Signatures: %s\n", compile_stats.signatures.c_str());
|
||||
printf("Chimera info: %s\n", compile_stats.db_info.c_str());
|
||||
printf("Expression count: %'zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
|
||||
printf("Database CRC: 0x%x\n", compile_stats.crc32);
|
||||
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
void EngineChimera::sqlStats(SqlDB &sqldb) const {
|
||||
ostringstream crc;
|
||||
crc << "0x" << hex << compile_stats.crc32;
|
||||
|
||||
static const string Q =
|
||||
"INSERT INTO Compile ("
|
||||
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
|
||||
"scratchSize, compileSecs, peakMemory) "
|
||||
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
|
||||
|
||||
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
|
||||
compile_stats.db_info, compile_stats.expressionCount,
|
||||
compile_stats.compiledSize, crc.str(),
|
||||
compile_stats.scratchSize, compile_stats.compileSecs,
|
||||
compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
unique_ptr<EngineChimera>
|
||||
buildEngineChimera(const ExpressionMap &expressions, const string &name,
|
||||
const string &sigs_name) {
|
||||
if (expressions.empty()) {
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
long double compileSecs = 0.0;
|
||||
size_t compiledSize = 0.0;
|
||||
size_t scratchSize = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
string db_info;
|
||||
|
||||
ch_database_t *db;
|
||||
ch_error_t err;
|
||||
|
||||
const unsigned int count = expressions.size();
|
||||
|
||||
vector<string> exprs;
|
||||
vector<unsigned int> flags, ids;
|
||||
vector<hs_expr_ext> ext;
|
||||
|
||||
for (const auto &m : expressions) {
|
||||
string expr;
|
||||
unsigned int f = 0;
|
||||
hs_expr_ext extparam; // unused
|
||||
extparam.flags = 0;
|
||||
if (!readExpression(m.second, expr, &f, &extparam)) {
|
||||
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
|
||||
m.first);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (extparam.flags) {
|
||||
printf("Error parsing PCRE with extended flags: %s (id %u)\n",
|
||||
m.second.c_str(), m.first);
|
||||
return nullptr;
|
||||
}
|
||||
exprs.push_back(expr);
|
||||
ids.push_back(m.first);
|
||||
flags.push_back(f);
|
||||
}
|
||||
|
||||
// Our compiler takes an array of plain ol' C strings.
|
||||
vector<const char *> patterns(count);
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
patterns[i] = exprs[i].c_str();
|
||||
}
|
||||
|
||||
Timer timer;
|
||||
timer.start();
|
||||
|
||||
// Capture groups by default
|
||||
unsigned int mode = CH_MODE_GROUPS;
|
||||
ch_compile_error_t *compile_err;
|
||||
err = ch_compile_multi(patterns.data(), flags.data(), ids.data(),
|
||||
count, mode, nullptr, &db, &compile_err);
|
||||
|
||||
timer.complete();
|
||||
compileSecs = timer.seconds();
|
||||
peakMemorySize = getPeakHeap();
|
||||
|
||||
if (err == CH_COMPILER_ERROR) {
|
||||
if (compile_err->expression >= 0) {
|
||||
printf("Compile error for signature #%u: %s\n",
|
||||
compile_err->expression, compile_err->message);
|
||||
} else {
|
||||
printf("Compile error: %s\n", compile_err->message);
|
||||
}
|
||||
ch_free_compile_error(compile_err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
err = ch_database_size(db, &compiledSize);
|
||||
if (err != CH_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
assert(compiledSize > 0);
|
||||
|
||||
char *info;
|
||||
err = ch_database_info(db, &info);
|
||||
if (err != CH_SUCCESS) {
|
||||
return nullptr;
|
||||
} else {
|
||||
db_info = string(info);
|
||||
free(info);
|
||||
}
|
||||
|
||||
// Allocate scratch temporarily to find its size: this is a good test
|
||||
// anyway.
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
err = ch_alloc_scratch(db, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
err = ch_scratch_size(scratch, &scratchSize);
|
||||
if (err != CH_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
ch_free_scratch(scratch);
|
||||
|
||||
// Collect summary information.
|
||||
CompileCHStats cs;
|
||||
cs.sigs_name = sigs_name;
|
||||
if (!sigs_name.empty()) {
|
||||
const auto pos = name.find_last_of('/');
|
||||
cs.signatures = name.substr(pos + 1);
|
||||
} else {
|
||||
cs.signatures = name;
|
||||
}
|
||||
cs.db_info = db_info;
|
||||
cs.expressionCount = expressions.size();
|
||||
cs.compiledSize = compiledSize;
|
||||
cs.scratchSize = scratchSize;
|
||||
cs.compileSecs = compileSecs;
|
||||
cs.peakMemorySize = peakMemorySize;
|
||||
|
||||
return ue2::make_unique<EngineChimera>(db, move(cs));
|
||||
}
|
103
tools/hsbench/engine_chimera.h
Normal file
103
tools/hsbench/engine_chimera.h
Normal file
@@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINECHIMERA_H
|
||||
#define ENGINECHIMERA_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "engine.h"
|
||||
|
||||
#include "chimera/ch.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** Infomation about the database compile */
|
||||
struct CompileCHStats {
|
||||
std::string sigs_name;
|
||||
std::string signatures;
|
||||
std::string db_info;
|
||||
size_t expressionCount = 0;
|
||||
size_t compiledSize = 0;
|
||||
uint32_t crc32 = 0;
|
||||
size_t scratchSize = 0;
|
||||
long double compileSecs = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EngineCHContext : public EngineContext{
|
||||
public:
|
||||
explicit EngineCHContext(const ch_database_t *db);
|
||||
~EngineCHContext();
|
||||
|
||||
ch_scratch_t *scratch = nullptr;
|
||||
};
|
||||
|
||||
/** Chimera Engine for scanning data. */
|
||||
class EngineChimera : public Engine {
|
||||
public:
|
||||
explicit EngineChimera(ch_database_t *db, CompileCHStats cs);
|
||||
~EngineChimera();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
|
||||
void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
|
||||
void printStats() const;
|
||||
|
||||
void sqlStats(SqlDB &db) const;
|
||||
|
||||
private:
|
||||
ch_database_t *db;
|
||||
CompileCHStats compile_stats;
|
||||
};
|
||||
|
||||
std::unique_ptr<EngineChimera>
|
||||
buildEngineChimera(const ExpressionMap &expressions, const std::string &name,
|
||||
const std::string &sigs_name);
|
||||
|
||||
#endif // ENGINECHIMERA_H
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -57,20 +57,22 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
EngineContext::EngineContext(const hs_database_t *db) {
|
||||
EngineHSContext::EngineHSContext(const hs_database_t *db) {
|
||||
hs_alloc_scratch(db, &scratch);
|
||||
assert(scratch);
|
||||
}
|
||||
|
||||
EngineContext::~EngineContext() {
|
||||
EngineHSContext::~EngineHSContext() {
|
||||
hs_free_scratch(scratch);
|
||||
}
|
||||
|
||||
EngineHSStream::~EngineHSStream() { }
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanContext {
|
||||
ScanContext(unsigned id_in, ResultEntry &result_in,
|
||||
struct ScanHSContext {
|
||||
ScanHSContext(unsigned id_in, ResultEntry &result_in,
|
||||
const EngineStream *stream_in)
|
||||
: id(id_in), result(result_in), stream(stream_in) {}
|
||||
unsigned id;
|
||||
@@ -87,7 +89,7 @@ struct ScanContext {
|
||||
static
|
||||
int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||
void *ctx) {
|
||||
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||
ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
@@ -101,7 +103,7 @@ int onMatch(unsigned int, unsigned long long, unsigned long long, unsigned int,
|
||||
static
|
||||
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
unsigned int, void *ctx) {
|
||||
ScanContext *sc = static_cast<ScanContext *>(ctx);
|
||||
ScanHSContext *sc = static_cast<ScanHSContext *>(ctx);
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
@@ -114,7 +116,7 @@ int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
return 0;
|
||||
}
|
||||
|
||||
EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileStats cs)
|
||||
EngineHyperscan::EngineHyperscan(hs_database_t *db_in, CompileHSStats cs)
|
||||
: db(db_in), compile_stats(std::move(cs)) {
|
||||
assert(db);
|
||||
}
|
||||
@@ -124,14 +126,15 @@ EngineHyperscan::~EngineHyperscan() {
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EngineHyperscan::makeContext() const {
|
||||
return ue2::make_unique<EngineContext>(db);
|
||||
return ue2::make_unique<EngineHSContext>(db);
|
||||
}
|
||||
|
||||
void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ctx) const {
|
||||
ResultEntry &result, EngineContext &ectx) const {
|
||||
assert(data);
|
||||
|
||||
ScanContext sc(id, result, nullptr);
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
ScanHSContext sc(id, result, nullptr);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv = hs_scan(db, data, len, 0, ctx.scratch, callback, &sc);
|
||||
|
||||
@@ -144,11 +147,12 @@ void EngineHyperscan::scan(const char *data, unsigned int len, unsigned int id,
|
||||
void EngineHyperscan::scan_vectored(const char *const *data,
|
||||
const unsigned int *len, unsigned int count,
|
||||
unsigned streamId, ResultEntry &result,
|
||||
EngineContext &ctx) const {
|
||||
EngineContext &ectx) const {
|
||||
assert(data);
|
||||
assert(len);
|
||||
|
||||
ScanContext sc(streamId, result, nullptr);
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
ScanHSContext sc(streamId, result, nullptr);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv =
|
||||
hs_scan_vector(db, data, len, count, 0, ctx.scratch, callback, &sc);
|
||||
@@ -159,9 +163,10 @@ void EngineHyperscan::scan_vectored(const char *const *data,
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
|
||||
unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ectx,
|
||||
unsigned streamId) const {
|
||||
auto stream = ue2::make_unique<EngineStream>();
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
auto stream = ue2::make_unique<EngineHSStream>();
|
||||
stream->ctx = &ctx;
|
||||
|
||||
hs_open_stream(db, 0, &stream->id);
|
||||
@@ -170,17 +175,18 @@ unique_ptr<EngineStream> EngineHyperscan::streamOpen(EngineContext &ctx,
|
||||
return nullptr;
|
||||
}
|
||||
stream->sn = streamId;
|
||||
return stream;
|
||||
return move(stream);
|
||||
}
|
||||
|
||||
void EngineHyperscan::streamClose(unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const {
|
||||
assert(stream);
|
||||
|
||||
auto &s = static_cast<EngineStream &>(*stream);
|
||||
EngineContext &ctx = *s.ctx;
|
||||
auto &s = static_cast<EngineHSStream &>(*stream);
|
||||
EngineContext &ectx = *s.ctx;
|
||||
EngineHSContext &ctx = static_cast<EngineHSContext &>(ectx);
|
||||
|
||||
ScanContext sc(0, result, &s);
|
||||
ScanHSContext sc(0, result, &s);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
|
||||
assert(s.id);
|
||||
@@ -193,10 +199,10 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
|
||||
ResultEntry &result) const {
|
||||
assert(data);
|
||||
|
||||
auto &s = static_cast<EngineStream &>(stream);
|
||||
EngineContext &ctx = *s.ctx;
|
||||
auto &s = static_cast<EngineHSStream &>(stream);
|
||||
EngineHSContext &ctx = *s.ctx;
|
||||
|
||||
ScanContext sc(id, result, &s);
|
||||
ScanHSContext sc(id, result, &s);
|
||||
auto callback = echo_matches ? onMatchEcho : onMatch;
|
||||
hs_error_t rv =
|
||||
hs_scan_stream(s.id, data, len, 0, ctx.scratch, callback, &sc);
|
||||
@@ -210,11 +216,12 @@ void EngineHyperscan::streamScan(EngineStream &stream, const char *data,
|
||||
void EngineHyperscan::streamCompressExpand(EngineStream &stream,
|
||||
vector<char> &temp) const {
|
||||
size_t used = 0;
|
||||
hs_error_t err = hs_compress_stream(stream.id, temp.data(), temp.size(),
|
||||
auto &s = static_cast<EngineHSStream &>(stream);
|
||||
hs_error_t err = hs_compress_stream(s.id, temp.data(), temp.size(),
|
||||
&used);
|
||||
if (err == HS_INSUFFICIENT_SPACE) {
|
||||
temp.resize(used);
|
||||
err = hs_compress_stream(stream.id, temp.data(), temp.size(), &used);
|
||||
err = hs_compress_stream(s.id, temp.data(), temp.size(), &used);
|
||||
}
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
@@ -223,10 +230,10 @@ void EngineHyperscan::streamCompressExpand(EngineStream &stream,
|
||||
}
|
||||
|
||||
if (printCompressSize) {
|
||||
printf("stream %u: compressed to %zu\n", stream.sn, used);
|
||||
printf("stream %u: compressed to %zu\n", s.sn, used);
|
||||
}
|
||||
|
||||
err = hs_reset_and_expand_stream(stream.id, temp.data(), temp.size(),
|
||||
err = hs_reset_and_expand_stream(s.id, temp.data(), temp.size(),
|
||||
nullptr, nullptr, nullptr);
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
@@ -469,7 +476,7 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode,
|
||||
hs_free_scratch(scratch);
|
||||
|
||||
// Collect summary information.
|
||||
CompileStats cs;
|
||||
CompileHSStats cs;
|
||||
cs.sigs_name = sigs_name;
|
||||
if (!sigs_name.empty()) {
|
||||
const auto pos = name.find_last_of('/');
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -30,22 +30,15 @@
|
||||
#define ENGINEHYPERSCAN_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "common.h"
|
||||
#include "sqldb.h"
|
||||
#include "engine.h"
|
||||
#include "hs_runtime.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** Structure for the result of a single complete scan. */
|
||||
struct ResultEntry {
|
||||
double seconds = 0; //!< Time taken for scan.
|
||||
unsigned int matches = 0; //!< Count of matches found.
|
||||
};
|
||||
|
||||
/** Infomation about the database compile */
|
||||
struct CompileStats {
|
||||
struct CompileHSStats {
|
||||
std::string sigs_name;
|
||||
std::string signatures;
|
||||
std::string db_info;
|
||||
@@ -60,38 +53,38 @@ struct CompileStats {
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EngineContext {
|
||||
class EngineHSContext : public EngineContext {
|
||||
public:
|
||||
explicit EngineContext(const hs_database_t *db);
|
||||
~EngineContext();
|
||||
explicit EngineHSContext(const hs_database_t *db);
|
||||
~EngineHSContext();
|
||||
|
||||
hs_scratch_t *scratch = nullptr;
|
||||
};
|
||||
|
||||
/** Streaming mode scans have persistent stream state associated with them. */
|
||||
class EngineStream {
|
||||
class EngineHSStream : public EngineStream {
|
||||
public:
|
||||
~EngineHSStream();
|
||||
hs_stream_t *id;
|
||||
unsigned int sn;
|
||||
EngineContext *ctx;
|
||||
EngineHSContext *ctx;
|
||||
};
|
||||
|
||||
/** Hyperscan Engine for scanning data. */
|
||||
class EngineHyperscan {
|
||||
class EngineHyperscan : public Engine {
|
||||
public:
|
||||
explicit EngineHyperscan(hs_database_t *db, CompileStats cs);
|
||||
explicit EngineHyperscan(hs_database_t *db, CompileHSStats cs);
|
||||
~EngineHyperscan();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ctx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ctx) const;
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ctx,
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
@@ -109,7 +102,7 @@ public:
|
||||
|
||||
private:
|
||||
hs_database_t *db;
|
||||
CompileStats compile_stats;
|
||||
CompileHSStats compile_stats;
|
||||
};
|
||||
|
||||
namespace ue2 {
|
||||
|
388
tools/hsbench/engine_pcre.cpp
Normal file
388
tools/hsbench/engine_pcre.cpp
Normal file
@@ -0,0 +1,388 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "engine_pcre.h"
|
||||
#include "heapstats.h"
|
||||
#include "huge.h"
|
||||
#include "sqldb.h"
|
||||
#include "timer.h"
|
||||
|
||||
#include "util/make_unique.h"
|
||||
#include "util/unicode_def.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
EnginePCREContext::EnginePCREContext(int capture_cnt) {
|
||||
ovec = (int *)malloc((capture_cnt + 1)* sizeof(int) * 3);
|
||||
}
|
||||
|
||||
EnginePCREContext::~EnginePCREContext() {
|
||||
free(ovec);
|
||||
}
|
||||
|
||||
namespace /* anonymous */ {
|
||||
|
||||
/** Scan context structure passed to the onMatch callback function. */
|
||||
struct ScanPCREContext {
|
||||
ScanPCREContext(unsigned id_in, ResultEntry &result_in)
|
||||
: id(id_in), result(result_in) {}
|
||||
unsigned id;
|
||||
ResultEntry &result;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Function called for every match that PCRE produces, used when
|
||||
* "echo matches" is off.
|
||||
*/
|
||||
static
|
||||
int onMatch(ScanPCREContext *sc) {
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function called for every match that PCRE produces when "echo
|
||||
* matches" is enabled.
|
||||
*/
|
||||
static
|
||||
int onMatchEcho(unsigned int id, unsigned long long, unsigned long long to,
|
||||
ScanPCREContext *sc) {
|
||||
assert(sc);
|
||||
sc->result.matches++;
|
||||
|
||||
printf("Match @%u:%llu for %u\n", sc->id, to, id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
EnginePCRE::EnginePCRE(vector<unique_ptr<PcreDB>> dbs_in, CompilePCREStats cs,
|
||||
int capture_cnt_in)
|
||||
: dbs(move(dbs_in)), compile_stats(move(cs)),
|
||||
capture_cnt(capture_cnt_in) {}
|
||||
|
||||
EnginePCRE::~EnginePCRE() {
|
||||
for (auto &pcreDB : dbs) {
|
||||
free(pcreDB->extra);
|
||||
free(pcreDB->db);
|
||||
}
|
||||
}
|
||||
|
||||
unique_ptr<EngineContext> EnginePCRE::makeContext() const {
|
||||
return ue2::make_unique<EnginePCREContext>(capture_cnt);
|
||||
}
|
||||
|
||||
void EnginePCRE::scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const {
|
||||
assert(data);
|
||||
|
||||
ScanPCREContext sc(id, result);
|
||||
auto &ctx = static_cast<EnginePCREContext &>(ectx);
|
||||
int *ovec = ctx.ovec;
|
||||
int ovec_size = (capture_cnt + 1) * 3;
|
||||
for (const auto &pcreDB : dbs) {
|
||||
int startoffset = 0;
|
||||
bool utf8 = pcreDB->utf8;
|
||||
bool highlander = pcreDB->highlander;
|
||||
|
||||
int flags = 0;
|
||||
int ret;
|
||||
do {
|
||||
ret = pcre_exec(pcreDB->db, pcreDB->extra, data, len,
|
||||
startoffset, flags, ovec, ovec_size);
|
||||
if (ret <= PCRE_ERROR_NOMATCH) {
|
||||
break;
|
||||
}
|
||||
|
||||
int from = ovec[0];
|
||||
int to = ovec[1];
|
||||
assert(from <= to);
|
||||
|
||||
if (echo_matches) {
|
||||
onMatchEcho(pcreDB->id, from, to, &sc);
|
||||
} else {
|
||||
onMatch(&sc);
|
||||
}
|
||||
|
||||
// If we only wanted a single match, we're done.
|
||||
if (highlander) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Next scan starts at the first codepoint after the match. It's
|
||||
// possible that we have a vacuous match, in which case we must step
|
||||
// past it to ensure that we always progress.
|
||||
if (from != to) {
|
||||
startoffset = to;
|
||||
} else if (utf8) {
|
||||
startoffset = to + 1;
|
||||
while (startoffset < (int)len &&
|
||||
((data[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
|
||||
++startoffset;
|
||||
}
|
||||
} else {
|
||||
startoffset = to + 1;
|
||||
}
|
||||
} while (startoffset <= (int)len);
|
||||
|
||||
if (ret < PCRE_ERROR_NOMATCH) {
|
||||
printf("Fatal error: pcre returned error %d\n", ret);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vectoring scan
|
||||
void EnginePCRE::scan_vectored(UNUSED const char *const *data,
|
||||
UNUSED const unsigned int *len,
|
||||
UNUSED unsigned int count,
|
||||
UNUSED unsigned int streamId,
|
||||
UNUSED ResultEntry &result,
|
||||
UNUSED EngineContext &ectx) const {
|
||||
printf("PCRE matcher can't support vectored mode.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
unique_ptr<EngineStream> EnginePCRE::streamOpen(UNUSED EngineContext &ectx,
|
||||
UNUSED unsigned id) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::streamClose(UNUSED unique_ptr<EngineStream> stream,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::streamScan(UNUSED EngineStream &stream,
|
||||
UNUSED const char *data,
|
||||
UNUSED unsigned len, UNUSED unsigned id,
|
||||
UNUSED ResultEntry &result) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::streamCompressExpand(UNUSED EngineStream &stream,
|
||||
UNUSED vector<char> &temp) const {
|
||||
printf("PCRE matcher can't stream.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
void EnginePCRE::printStats() const {
|
||||
// Output summary information.
|
||||
if (!compile_stats.sigs_name.empty()) {
|
||||
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
|
||||
}
|
||||
printf("Signatures: %s\n", compile_stats.signatures.c_str());
|
||||
printf("PCRE info: %s\n", compile_stats.db_info.c_str());
|
||||
printf("Expression count: %'zu\n", compile_stats.expressionCount);
|
||||
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
|
||||
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
|
||||
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
|
||||
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
void EnginePCRE::sqlStats(SqlDB &sqldb) const {
|
||||
ostringstream crc;
|
||||
|
||||
static const string Q =
|
||||
"INSERT INTO Compile ("
|
||||
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
|
||||
"scratchSize, compileSecs, peakMemory) "
|
||||
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
|
||||
|
||||
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
|
||||
compile_stats.db_info, compile_stats.expressionCount,
|
||||
compile_stats.compiledSize, crc.str(),
|
||||
compile_stats.scratchSize, compile_stats.compileSecs,
|
||||
compile_stats.peakMemorySize);
|
||||
}
|
||||
|
||||
static
|
||||
bool decodeExprPCRE(string &expr, unsigned *flags, struct PcreDB &db) {
|
||||
if (expr[0] != '/') {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t end = expr.find_last_of('/');
|
||||
if (end == string::npos) {
|
||||
return false;
|
||||
}
|
||||
string strFlags = expr.substr(end + 1, expr.length() - end - 1);
|
||||
|
||||
// strip starting and trailing slashes and the flags
|
||||
expr.erase(end, expr.length() - end);
|
||||
expr.erase(0, 1);
|
||||
|
||||
// decode the flags
|
||||
*flags = 0;
|
||||
for (size_t i = 0; i != strFlags.length(); ++i) {
|
||||
switch (strFlags[i]) {
|
||||
case 's':
|
||||
*flags |= PCRE_DOTALL;
|
||||
break;
|
||||
case 'm':
|
||||
*flags |= PCRE_MULTILINE;
|
||||
break;
|
||||
case 'i':
|
||||
*flags |= PCRE_CASELESS;
|
||||
break;
|
||||
case '8':
|
||||
*flags |= PCRE_UTF8;
|
||||
db.utf8 = true;
|
||||
break;
|
||||
case 'W':
|
||||
*flags |= PCRE_UCP;
|
||||
break;
|
||||
case 'H':
|
||||
db.highlander = true;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unique_ptr<EnginePCRE>
|
||||
buildEnginePcre(const ExpressionMap &expressions, const string &name,
|
||||
const string &sigs_name) {
|
||||
if (expressions.empty()) {
|
||||
assert(0);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
long double compileSecs = 0.0;
|
||||
size_t compiledSize = 0.0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
string db_info("Version: ");
|
||||
db_info += string(pcre_version());
|
||||
|
||||
vector<unique_ptr<PcreDB>> dbs;
|
||||
int capture_cnt = 0;
|
||||
|
||||
Timer timer;
|
||||
timer.start();
|
||||
|
||||
for (const auto &m : expressions) {
|
||||
string expr(m.second);
|
||||
unsigned int flags = 0;
|
||||
auto pcreDB = ue2::make_unique<PcreDB>();
|
||||
if (!decodeExprPCRE(expr, &flags, *pcreDB)) {
|
||||
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
|
||||
m.first);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char *errp;
|
||||
int erro;
|
||||
pcre *db = pcre_compile(expr.c_str(), flags, &errp, &erro, NULL);
|
||||
|
||||
if (!db) {
|
||||
printf("Compile error %s\n", errp);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
pcre_extra *extra = pcre_study(db, PCRE_STUDY_JIT_COMPILE, &errp);
|
||||
if (errp) {
|
||||
printf("PCRE could not be studied: %s\n", errp);
|
||||
return nullptr;
|
||||
}
|
||||
if (!extra) {
|
||||
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
|
||||
}
|
||||
int cap = 0; // PCRE_INFO_CAPTURECOUNT demands an int
|
||||
if (pcre_fullinfo(db, extra, PCRE_INFO_CAPTURECOUNT, &cap)) {
|
||||
printf("PCRE fullinfo error\n");
|
||||
free(extra);
|
||||
free(db);
|
||||
return nullptr;
|
||||
}
|
||||
assert(cap >= 0);
|
||||
capture_cnt = max(capture_cnt, cap);
|
||||
|
||||
size_t db_size = 0;
|
||||
if (pcre_fullinfo(db, extra, PCRE_INFO_SIZE, &db_size)) {
|
||||
printf("PCRE fullinfo error\n");
|
||||
free(extra);
|
||||
free(db);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t study_size = 0;
|
||||
if (pcre_fullinfo(db, extra, PCRE_INFO_STUDYSIZE,
|
||||
&study_size)) {
|
||||
printf("PCRE fullinfo error\n");
|
||||
free(extra);
|
||||
free(db);
|
||||
return nullptr;
|
||||
}
|
||||
compiledSize += db_size + study_size;
|
||||
|
||||
pcreDB->id = m.first;
|
||||
pcreDB->db = db;
|
||||
|
||||
extra->flags =
|
||||
PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
||||
extra->match_limit = 10000000;
|
||||
extra->match_limit_recursion = 1500;
|
||||
|
||||
pcreDB->extra = extra;
|
||||
dbs.push_back(move(pcreDB));
|
||||
}
|
||||
|
||||
timer.complete();
|
||||
compileSecs = timer.seconds();
|
||||
peakMemorySize = getPeakHeap();
|
||||
|
||||
// Collect summary information.
|
||||
CompilePCREStats cs;
|
||||
cs.sigs_name = sigs_name;
|
||||
if (!sigs_name.empty()) {
|
||||
const auto pos = name.find_last_of('/');
|
||||
cs.signatures = name.substr(pos + 1);
|
||||
} else {
|
||||
cs.signatures = name;
|
||||
}
|
||||
cs.db_info = db_info;
|
||||
cs.expressionCount = expressions.size();
|
||||
cs.compiledSize = compiledSize;
|
||||
cs.scratchSize = (capture_cnt + 1) * sizeof(int) * 3;
|
||||
cs.compileSecs = compileSecs;
|
||||
cs.peakMemorySize = peakMemorySize;
|
||||
|
||||
return ue2::make_unique<EnginePCRE>(move(dbs), move(cs), capture_cnt);
|
||||
}
|
114
tools/hsbench/engine_pcre.h
Normal file
114
tools/hsbench/engine_pcre.h
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ENGINEPCRE_H
|
||||
#define ENGINEPCRE_H
|
||||
|
||||
#include "expressions.h"
|
||||
#include "engine.h"
|
||||
|
||||
#include <pcre.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/** Infomation about the database compile */
|
||||
struct CompilePCREStats {
|
||||
std::string sigs_name;
|
||||
std::string signatures;
|
||||
std::string db_info;
|
||||
size_t expressionCount = 0;
|
||||
size_t compiledSize = 0;
|
||||
size_t scratchSize = 0;
|
||||
long double compileSecs = 0;
|
||||
unsigned int peakMemorySize = 0;
|
||||
};
|
||||
|
||||
/** Engine context which is allocated on a per-thread basis. */
|
||||
class EnginePCREContext : public EngineContext{
|
||||
public:
|
||||
explicit EnginePCREContext(int capture_cnt);
|
||||
~EnginePCREContext();
|
||||
|
||||
int *ovec = nullptr;
|
||||
};
|
||||
|
||||
struct PcreDB {
|
||||
bool highlander = false;
|
||||
bool utf8 = false;
|
||||
u32 id;
|
||||
pcre *db = nullptr;
|
||||
pcre_extra *extra = nullptr;
|
||||
};
|
||||
|
||||
/** PCRE Engine for scanning data. */
|
||||
class EnginePCRE : public Engine {
|
||||
public:
|
||||
explicit EnginePCRE(std::vector<std::unique_ptr<PcreDB>> dbs_in,
|
||||
CompilePCREStats cs, int capture_cnt_in);
|
||||
~EnginePCRE();
|
||||
|
||||
std::unique_ptr<EngineContext> makeContext() const;
|
||||
|
||||
void scan(const char *data, unsigned int len, unsigned int id,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
void scan_vectored(const char *const *data, const unsigned int *len,
|
||||
unsigned int count, unsigned int streamId,
|
||||
ResultEntry &result, EngineContext &ectx) const;
|
||||
|
||||
std::unique_ptr<EngineStream> streamOpen(EngineContext &ectx,
|
||||
unsigned id) const;
|
||||
|
||||
void streamClose(std::unique_ptr<EngineStream> stream,
|
||||
ResultEntry &result) const;
|
||||
|
||||
void streamCompressExpand(EngineStream &stream,
|
||||
std::vector<char> &temp) const;
|
||||
|
||||
void streamScan(EngineStream &stream, const char *data, unsigned int len,
|
||||
unsigned int id, ResultEntry &result) const;
|
||||
|
||||
void printStats() const;
|
||||
|
||||
void sqlStats(SqlDB &db) const;
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<PcreDB>> dbs;
|
||||
|
||||
CompilePCREStats compile_stats;
|
||||
|
||||
int capture_cnt;
|
||||
};
|
||||
|
||||
std::unique_ptr<EnginePCRE>
|
||||
buildEnginePcre(const ExpressionMap &expressions, const std::string &name,
|
||||
const std::string &sigs_name);
|
||||
|
||||
#endif // ENGINEPCRE_H
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Intel Corporation
|
||||
* Copyright (c) 2016-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -31,6 +31,10 @@
|
||||
#include "common.h"
|
||||
#include "data_corpus.h"
|
||||
#include "engine_hyperscan.h"
|
||||
#if defined(HS_HYBRID)
|
||||
#include "engine_chimera.h"
|
||||
#include "engine_pcre.h"
|
||||
#endif
|
||||
#include "expressions.h"
|
||||
#include "sqldb.h"
|
||||
#include "thread_barrier.h"
|
||||
@@ -87,6 +91,8 @@ namespace /* anonymous */ {
|
||||
|
||||
bool display_per_scan = false;
|
||||
ScanMode scan_mode = ScanMode::STREAMING;
|
||||
bool useHybrid = false;
|
||||
bool usePcre = false;
|
||||
unsigned repeats = 20;
|
||||
string exprPath("");
|
||||
string corpusFile("");
|
||||
@@ -102,7 +108,7 @@ typedef void (*thread_func_t)(void *context);
|
||||
|
||||
class ThreadContext : boost::noncopyable {
|
||||
public:
|
||||
ThreadContext(unsigned num_in, const EngineHyperscan &db_in,
|
||||
ThreadContext(unsigned num_in, const Engine &db_in,
|
||||
thread_barrier &tb_in, thread_func_t function_in,
|
||||
vector<DataBlock> corpus_data_in)
|
||||
: num(num_in), results(repeats), engine(db_in),
|
||||
@@ -155,7 +161,7 @@ public:
|
||||
unsigned num;
|
||||
Timer timer;
|
||||
vector<ResultEntry> results;
|
||||
const EngineHyperscan &engine;
|
||||
const Engine &engine;
|
||||
unique_ptr<EngineContext> enginectx;
|
||||
vector<DataBlock> corpus_data;
|
||||
|
||||
@@ -181,6 +187,10 @@ void usage(const char *error) {
|
||||
" (default: streaming).\n");
|
||||
printf(" -V Benchmark in vectored mode"
|
||||
" (default: streaming).\n");
|
||||
#if defined(HS_HYBRID)
|
||||
printf(" -H Benchmark using Chimera (if supported).\n");
|
||||
printf(" -P Benchmark using PCRE (if supported).\n");
|
||||
#endif
|
||||
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||
printf(" -T CPU,CPU,... Benchmark with threads on these CPUs.\n");
|
||||
#endif
|
||||
@@ -214,7 +224,7 @@ struct BenchmarkSigs {
|
||||
static
|
||||
void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
UNUSED unique_ptr<Grey> &grey) {
|
||||
const char options[] = "-b:c:Cd:e:E:G:hi:n:No:p:sS:Vw:z:"
|
||||
const char options[] = "-b:c:Cd:e:E:G:hHi:n:No:p:PsS:Vw:z:"
|
||||
#ifdef HAVE_DECL_PTHREAD_SETAFFINITY_NP
|
||||
"T:" // add the thread flag
|
||||
#endif
|
||||
@@ -287,6 +297,14 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
usage(nullptr);
|
||||
exit(0);
|
||||
break;
|
||||
case 'H':
|
||||
#if defined(HS_HYBRID)
|
||||
useHybrid = true;
|
||||
#else
|
||||
usage("Hybrid matcher not enabled in this build");
|
||||
exit(1);
|
||||
#endif
|
||||
break;
|
||||
case 'n':
|
||||
if (!fromString(optarg, repeats) || repeats == 0) {
|
||||
usage("Couldn't parse argument to -n flag, should be"
|
||||
@@ -294,6 +312,14 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
exit(1);
|
||||
}
|
||||
break;
|
||||
case 'P':
|
||||
#if defined(HS_HYBRID)
|
||||
usePcre = true;
|
||||
#else
|
||||
usage("PCRE matcher not enabled in this build");
|
||||
exit(1);
|
||||
#endif
|
||||
break;
|
||||
case 's':
|
||||
in_sigfile = 2;
|
||||
break;
|
||||
@@ -399,6 +425,24 @@ void processArgs(int argc, char *argv[], vector<BenchmarkSigs> &sigSets,
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Constraints on Chimera and PCRE engines
|
||||
if (useHybrid || usePcre) {
|
||||
if (useHybrid && usePcre) {
|
||||
usage("Can't run both Chimera and PCRE.");
|
||||
exit(1);
|
||||
}
|
||||
if (scan_mode != ScanMode::BLOCK) {
|
||||
usage("Must specify block mode in Chimera or PCRE with "
|
||||
"the -N option.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (forceEditDistance || loadDatabases || saveDatabases) {
|
||||
usage("No extended options are supported in Chimera or PCRE.");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Read in any -s signature sets.
|
||||
for (const auto &file : sigFiles) {
|
||||
SignatureSet sigs;
|
||||
@@ -503,7 +547,7 @@ static
|
||||
void benchStreamingInternal(ThreadContext *ctx, vector<StreamInfo> &streams,
|
||||
bool do_compress) {
|
||||
assert(ctx);
|
||||
const EngineHyperscan &e = ctx->engine;
|
||||
const Engine &e = ctx->engine;
|
||||
const vector<DataBlock> &blocks = ctx->corpus_data;
|
||||
vector<char> compress_buf(do_compress ? 1000 : 0);
|
||||
|
||||
@@ -812,7 +856,7 @@ void sqlResults(const vector<unique_ptr<ThreadContext>> &threads,
|
||||
* the same copy of the data.
|
||||
*/
|
||||
static
|
||||
unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
|
||||
unique_ptr<ThreadContext> makeThreadContext(const Engine &db,
|
||||
const vector<DataBlock> &blocks,
|
||||
unsigned id,
|
||||
thread_barrier &sync_barrier) {
|
||||
@@ -839,7 +883,7 @@ unique_ptr<ThreadContext> makeThreadContext(const EngineHyperscan &db,
|
||||
|
||||
/** Run the given benchmark. */
|
||||
static
|
||||
void runBenchmark(const EngineHyperscan &db,
|
||||
void runBenchmark(const Engine &db,
|
||||
const vector<DataBlock> &corpus_blocks) {
|
||||
size_t numThreads;
|
||||
bool useAffinity = false;
|
||||
@@ -936,8 +980,18 @@ int main(int argc, char *argv[]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto engine = buildEngineHyperscan(exprMap, scan_mode, s.name,
|
||||
sigName, *grey);
|
||||
unique_ptr<Engine> engine;
|
||||
if (useHybrid) {
|
||||
#if defined(HS_HYBRID)
|
||||
engine = buildEngineChimera(exprMap, s.name, sigName);
|
||||
} else if (usePcre) {
|
||||
engine = buildEnginePcre(exprMap, s.name, sigName);
|
||||
#endif
|
||||
} else {
|
||||
engine = buildEngineHyperscan(exprMap, scan_mode, s.name,
|
||||
sigName, *grey);
|
||||
}
|
||||
|
||||
if (!engine) {
|
||||
printf("Error: expressions failed to compile.\n");
|
||||
exit(1);
|
||||
|
@@ -5,6 +5,14 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
SET(hscheck_SOURCES
|
||||
main.cpp
|
||||
)
|
||||
add_executable(hscheck ${hscheck_SOURCES})
|
||||
target_link_libraries(hscheck hs expressionutil pthread)
|
||||
|
||||
if (BUILD_CHIMERA)
|
||||
include_directories(${PCRE_INCLUDE_DIRS})
|
||||
add_definitions(-DHS_HYBRID)
|
||||
add_executable(hscheck ${hscheck_SOURCES})
|
||||
target_link_libraries(hscheck hs chimera ${PCRE_LDFLAGS} expressionutil pthread)
|
||||
else()
|
||||
add_executable(hscheck ${hscheck_SOURCES})
|
||||
target_link_libraries(hscheck hs expressionutil pthread)
|
||||
endif()
|
||||
|
||||
|
@@ -59,6 +59,11 @@
|
||||
#include "hs_internal.h"
|
||||
#include "ue2common.h"
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
#include <pcre.h>
|
||||
#include "chimera/ch.h"
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
@@ -77,6 +82,7 @@ namespace /* anonymous */ {
|
||||
// are we in streaming mode? (default: yes)
|
||||
bool g_streaming = true;
|
||||
bool g_vectored = false;
|
||||
bool g_hybrid = false;
|
||||
string g_exprPath("");
|
||||
string g_signatureFile("");
|
||||
bool g_allSignatures = false;
|
||||
@@ -282,34 +288,57 @@ void checkExpression(UNUSED void *threadarg) {
|
||||
|
||||
// Try and compile a database.
|
||||
const char *regexp = regex.c_str();
|
||||
const hs_expr_ext *extp = &ext;
|
||||
|
||||
hs_error_t err;
|
||||
hs_compile_error_t *compile_err;
|
||||
hs_database_t *db = nullptr;
|
||||
|
||||
if (g_hybrid) {
|
||||
#ifdef HS_HYBRID
|
||||
ch_compile_error_t *ch_compile_err;
|
||||
ch_database_t *hybrid_db = nullptr;
|
||||
err = ch_compile_multi(®exp, &flags, nullptr, 1, CH_MODE_GROUPS,
|
||||
nullptr, &hybrid_db, &ch_compile_err);
|
||||
if (err == HS_SUCCESS) {
|
||||
assert(hybrid_db);
|
||||
recordSuccess(g_exprMap, it->first);
|
||||
ch_free_database(hybrid_db);
|
||||
} else {
|
||||
assert(!hybrid_db);
|
||||
assert(ch_compile_err);
|
||||
recordFailure(g_exprMap, it->first, ch_compile_err->message);
|
||||
ch_free_compile_error(ch_compile_err);
|
||||
}
|
||||
#else
|
||||
cerr << "Hybrid mode not available in this build." << endl;
|
||||
exit(1);
|
||||
#endif // HS_HYBRID
|
||||
} else {
|
||||
const hs_expr_ext *extp = &ext;
|
||||
hs_compile_error_t *compile_err;
|
||||
hs_database_t *db = nullptr;
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
// This variant is available in non-release builds and allows us to
|
||||
// modify greybox settings.
|
||||
err = hs_compile_multi_int(®exp, &flags, nullptr, &extp, 1, mode,
|
||||
nullptr, &db, &compile_err, *g_grey);
|
||||
// This variant is available in non-release builds and allows us to
|
||||
// modify greybox settings.
|
||||
err = hs_compile_multi_int(®exp, &flags, nullptr, &extp, 1, mode,
|
||||
nullptr, &db, &compile_err, *g_grey);
|
||||
#else
|
||||
err = hs_compile_ext_multi(®exp, &flags, nullptr, &extp, 1, mode,
|
||||
nullptr, &db, &compile_err);
|
||||
err = hs_compile_ext_multi(®exp, &flags, nullptr, &extp, 1, mode,
|
||||
nullptr, &db, &compile_err);
|
||||
#endif
|
||||
|
||||
if (err == HS_SUCCESS) {
|
||||
assert(db);
|
||||
recordSuccess(g_exprMap, it->first);
|
||||
hs_free_database(db);
|
||||
if (check_logical) {
|
||||
cacheSubExpr(it->first, regex, flags, ext);
|
||||
if (err == HS_SUCCESS) {
|
||||
assert(db);
|
||||
recordSuccess(g_exprMap, it->first);
|
||||
hs_free_database(db);
|
||||
if (check_logical) {
|
||||
cacheSubExpr(it->first, regex, flags, ext);
|
||||
}
|
||||
} else {
|
||||
assert(!db);
|
||||
assert(compile_err);
|
||||
recordFailure(g_exprMap, it->first, compile_err->message);
|
||||
hs_free_compile_error(compile_err);
|
||||
}
|
||||
} else {
|
||||
assert(!db);
|
||||
assert(compile_err);
|
||||
recordFailure(g_exprMap, it->first, compile_err->message);
|
||||
hs_free_compile_error(compile_err);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -429,6 +458,9 @@ void usage() {
|
||||
#endif
|
||||
<< " -V Operate in vectored mode." << endl
|
||||
<< " -N Operate in block mode (default: streaming)." << endl
|
||||
#ifdef HS_HYBRID
|
||||
<< " -H Operate in hybrid mode." << endl
|
||||
#endif
|
||||
<< " -L Pass HS_FLAG_SOM_LEFTMOST for all expressions (default: off)." << endl
|
||||
<< " -8 Force UTF8 mode on all patterns." << endl
|
||||
<< " -T NUM Run with NUM threads." << endl
|
||||
@@ -440,7 +472,7 @@ void usage() {
|
||||
|
||||
static
|
||||
void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
|
||||
const char options[] = "e:E:s:z:hLNV8G:T:BC";
|
||||
const char options[] = "e:E:s:z:hHLNV8G:T:BC";
|
||||
bool signatureSet = false;
|
||||
|
||||
for (;;) {
|
||||
@@ -492,6 +524,9 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
|
||||
g_streaming = false;
|
||||
g_vectored = true;
|
||||
break;
|
||||
case 'H':
|
||||
g_hybrid = true;
|
||||
break;
|
||||
case 'T':
|
||||
num_of_threads = atoi(optarg);
|
||||
break;
|
||||
|
@@ -1,9 +1,3 @@
|
||||
# we have a fixed requirement for PCRE
|
||||
set(PCRE_REQUIRED_MAJOR_VERSION 8)
|
||||
set(PCRE_REQUIRED_MINOR_VERSION 41)
|
||||
set(PCRE_REQUIRED_VERSION ${PCRE_REQUIRED_MAJOR_VERSION}.${PCRE_REQUIRED_MINOR_VERSION})
|
||||
|
||||
include (${CMAKE_MODULE_PATH}/pcre.cmake)
|
||||
if (NOT CORRECT_PCRE_VERSION)
|
||||
message(STATUS "PCRE ${PCRE_REQUIRED_VERSION} not found, not building hscollider")
|
||||
return()
|
||||
@@ -29,6 +23,8 @@ set_source_files_properties(
|
||||
|
||||
ragelmaker(ColliderCorporaParser.rl)
|
||||
|
||||
add_definitions(-DHS_HYBRID)
|
||||
|
||||
# only set these after all tests are done
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
|
||||
@@ -69,7 +65,7 @@ add_dependencies(hscollider ragel_ColliderCorporaParser)
|
||||
add_dependencies(hscollider pcre)
|
||||
|
||||
if(NOT WIN32)
|
||||
target_link_libraries(hscollider hs ${PCRE_LDFLAGS} databaseutil
|
||||
target_link_libraries(hscollider hs chimera ${PCRE_LDFLAGS} databaseutil
|
||||
expressionutil corpusomatic crosscompileutil pthread
|
||||
"${BACKTRACE_LDFLAGS}")
|
||||
|
||||
@@ -78,7 +74,7 @@ if(HAVE_BACKTRACE)
|
||||
"${BACKTRACE_CFLAGS}")
|
||||
endif()
|
||||
else() # WIN32
|
||||
target_link_libraries(hscollider hs ${PCRE_LDFLAGS} databaseutil
|
||||
target_link_libraries(hscollider hs chimera ${PCRE_LDFLAGS} databaseutil
|
||||
expressionutil corpusomatic crosscompileutil)
|
||||
endif()
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -54,10 +54,10 @@ public:
|
||||
explicit DatabaseProxy(const std::set<unsigned> &expr_ids)
|
||||
: ids(expr_ids) {}
|
||||
|
||||
explicit DatabaseProxy(std::shared_ptr<HyperscanDB> built_db)
|
||||
explicit DatabaseProxy(std::shared_ptr<BaseDB> built_db)
|
||||
: db(built_db) {}
|
||||
|
||||
std::shared_ptr<HyperscanDB> get(const UltimateTruth &ultimate) {
|
||||
std::shared_ptr<BaseDB> get(const UltimateTruth &ultimate) {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (failed) {
|
||||
// We have previously failed to compile this database.
|
||||
@@ -80,7 +80,7 @@ public:
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
std::shared_ptr<HyperscanDB> db;
|
||||
std::shared_ptr<BaseDB> db;
|
||||
std::set<unsigned> ids;
|
||||
bool failed = false; // Database failed compilation.
|
||||
};
|
||||
|
@@ -187,6 +187,14 @@ string pcreErrStr(int err) {
|
||||
}
|
||||
}
|
||||
|
||||
/* that is, a mode provided by native hyperscan */
|
||||
static
|
||||
bool isStandardMode(unsigned int mode) {
|
||||
return mode == MODE_BLOCK
|
||||
|| mode == MODE_STREAMING
|
||||
|| mode == MODE_VECTORED;
|
||||
}
|
||||
|
||||
GroundTruth::GroundTruth(ostream &os, const ExpressionMap &expr,
|
||||
unsigned long int limit,
|
||||
unsigned long int limit_recursion)
|
||||
@@ -194,8 +202,10 @@ GroundTruth::GroundTruth(ostream &os, const ExpressionMap &expr,
|
||||
matchLimitRecursion(limit_recursion) {}
|
||||
|
||||
void GroundTruth::global_prep() {
|
||||
// We're using pcre callouts
|
||||
pcre_callout = &pcreCallOut;
|
||||
if (isStandardMode(colliderMode)) {
|
||||
// We're using pcre callouts
|
||||
pcre_callout = &pcreCallOut;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
@@ -262,11 +272,17 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
throw PcreCompileFailure("Unsupported extended flags.");
|
||||
}
|
||||
|
||||
// Hybrid mode implies SOM.
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
assert(!use_NFA);
|
||||
som = true;
|
||||
}
|
||||
|
||||
// SOM flags might be set globally.
|
||||
som |= !!somFlags;
|
||||
|
||||
// For traditional Hyperscan, add global callout to pattern.
|
||||
if (!combination && !no_callouts) {
|
||||
if (!combination && !no_callouts && isStandardMode(colliderMode)) {
|
||||
addCallout(re);
|
||||
}
|
||||
|
||||
@@ -403,6 +419,79 @@ int scanBasic(const CompiledPcre &compiled, const string &buffer,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
bool isUtf8(const CompiledPcre &compiled) {
|
||||
unsigned long int options = 0;
|
||||
pcre_fullinfo(compiled.bytecode, NULL, PCRE_INFO_OPTIONS, &options);
|
||||
return options & PCRE_UTF8;
|
||||
}
|
||||
|
||||
static
|
||||
CaptureVec makeCaptureVec(const vector<int> &ovector, int ret) {
|
||||
assert(ret > 0);
|
||||
|
||||
CaptureVec cap;
|
||||
|
||||
if (no_groups) {
|
||||
return cap; // No group info requested.
|
||||
}
|
||||
|
||||
cap.reserve(ret * 2);
|
||||
for (int i = 0; i < ret * 2; i += 2) {
|
||||
int from = ovector[i], to = ovector[i + 1];
|
||||
cap.push_back(make_pair(from, to));
|
||||
}
|
||||
return cap;
|
||||
}
|
||||
|
||||
static
|
||||
int scanHybrid(const CompiledPcre &compiled, const string &buffer,
|
||||
const pcre_extra &extra, vector<int> &ovector,
|
||||
ResultSet &rs, ostream &out) {
|
||||
int len = (int)buffer.length();
|
||||
int startoffset = 0;
|
||||
bool utf8 = isUtf8(compiled);
|
||||
|
||||
int flags = 0;
|
||||
int ret;
|
||||
do {
|
||||
ret = pcre_exec(compiled.bytecode, &extra, buffer.c_str(), len,
|
||||
startoffset, flags, &ovector[0], ovector.size());
|
||||
|
||||
if (ret <= PCRE_ERROR_NOMATCH) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
int from = ovector.at(0);
|
||||
int to = ovector.at(1);
|
||||
rs.addMatch(from, to, makeCaptureVec(ovector, ret));
|
||||
|
||||
if (echo_matches) {
|
||||
out << "PCRE Match @ (" << from << "," << to << ")" << endl;
|
||||
}
|
||||
|
||||
// If we only wanted a single match, we're done.
|
||||
if (compiled.highlander) break;
|
||||
|
||||
// Next scan starts at the first codepoint after the match. It's
|
||||
// possible that we have a vacuous match, in which case we must step
|
||||
// past it to ensure that we always progress.
|
||||
if (from != to) {
|
||||
startoffset = to;
|
||||
} else if (utf8) {
|
||||
startoffset = to + 1;
|
||||
while (startoffset < len
|
||||
&& ((buffer[startoffset] & 0xc0) == UTF_CONT_BYTE_HEADER)) {
|
||||
++startoffset;
|
||||
}
|
||||
} else {
|
||||
startoffset = to + 1;
|
||||
}
|
||||
} while (startoffset <= len);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
int scanOffset(const CompiledPcre &compiled, const string &buffer,
|
||||
const pcre_extra &extra, vector<int> &ovector,
|
||||
@@ -532,15 +621,24 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
pcre_extra extra;
|
||||
extra.flags = 0;
|
||||
|
||||
// Switch on callouts.
|
||||
extra.flags |= PCRE_EXTRA_CALLOUT_DATA;
|
||||
extra.callout_data = &ctx;
|
||||
// If running in traditional HyperScan mode, switch on callouts.
|
||||
bool usingCallouts = isStandardMode(colliderMode);
|
||||
if (usingCallouts) {
|
||||
// Switch on callouts.
|
||||
extra.flags |= PCRE_EXTRA_CALLOUT_DATA;
|
||||
extra.callout_data = &ctx;
|
||||
}
|
||||
|
||||
// Set the match_limit (in order to bound execution time on very complex
|
||||
// patterns)
|
||||
extra.flags |= (PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION);
|
||||
extra.match_limit = matchLimit;
|
||||
extra.match_limit_recursion = matchLimitRecursion;
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
extra.match_limit = 10000000;
|
||||
extra.match_limit_recursion = 1500;
|
||||
} else {
|
||||
extra.match_limit = matchLimit;
|
||||
extra.match_limit_recursion = matchLimitRecursion;
|
||||
}
|
||||
|
||||
#ifdef PCRE_NO_START_OPTIMIZE
|
||||
// Switch off optimizations that may result in callouts not occurring.
|
||||
@@ -553,6 +651,7 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
ovector.resize(ovecsize);
|
||||
|
||||
int ret;
|
||||
bool hybrid = false;
|
||||
switch (colliderMode) {
|
||||
case MODE_BLOCK:
|
||||
case MODE_STREAMING:
|
||||
@@ -563,6 +662,10 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
ret = scanBasic(compiled, buffer, extra, ovector, ctx);
|
||||
}
|
||||
break;
|
||||
case MODE_HYBRID:
|
||||
ret = scanHybrid(compiled, buffer, extra, ovector, rs, out);
|
||||
hybrid = true;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
ret = PCRE_ERROR_NULL;
|
||||
@@ -595,7 +698,7 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (compiled.som) {
|
||||
if (compiled.som && !hybrid) {
|
||||
filterLeftmostSom(rs);
|
||||
}
|
||||
|
||||
|
@@ -35,25 +35,36 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// Type for capturing groups: a vector of (from, to) offsets, with both set to
|
||||
// -1 for inactive groups (like pcre's ovector). Used by hybrid modes.
|
||||
typedef std::vector<std::pair<int, int> > CaptureVec;
|
||||
|
||||
// Class representing a single match, encapsulating to/from offsets.
|
||||
class MatchResult {
|
||||
public:
|
||||
MatchResult(unsigned long long start, unsigned long long end)
|
||||
: from(start), to(end) {}
|
||||
MatchResult(unsigned long long start, unsigned long long end,
|
||||
const CaptureVec &cap)
|
||||
: from(start), to(end), captured(cap) {}
|
||||
|
||||
bool operator<(const MatchResult &a) const {
|
||||
if (from != a.from) {
|
||||
return from < a.from;
|
||||
}
|
||||
return to < a.to;
|
||||
if (to != a.to) {
|
||||
return to < a.to;
|
||||
}
|
||||
return captured < a.captured;
|
||||
}
|
||||
|
||||
bool operator==(const MatchResult &a) const {
|
||||
return from == a.from && to == a.to;
|
||||
return from == a.from && to == a.to && captured == a.captured;
|
||||
}
|
||||
|
||||
unsigned long long from;
|
||||
unsigned long long to;
|
||||
CaptureVec captured;
|
||||
};
|
||||
|
||||
enum ResultSource {
|
||||
@@ -114,6 +125,19 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// Add a match (with capturing vector)
|
||||
void addMatch(unsigned long long from, unsigned long long to,
|
||||
const CaptureVec &cap, int block = 0) {
|
||||
MatchResult m(from, to, cap);
|
||||
matches.insert(m);
|
||||
|
||||
if (matches_by_block[block].find(m) != matches_by_block[block].end()) {
|
||||
dupe_matches.insert(m);
|
||||
} else {
|
||||
matches_by_block[block].insert(m);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear all matches.
|
||||
void clear() {
|
||||
matches.clear();
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -90,19 +90,14 @@ hs_error_t open_magic_stream(const hs_database_t *db, unsigned flags,
|
||||
|
||||
#endif // RELEASE_BUILD
|
||||
|
||||
class HyperscanDB : boost::noncopyable {
|
||||
class BaseDB : boost::noncopyable {
|
||||
public:
|
||||
// Constructor takes iterators over a container of pattern IDs.
|
||||
template <class Iter>
|
||||
HyperscanDB(hs_database_t *db_in, Iter ids_begin, Iter ids_end)
|
||||
: db(db_in), ids(ids_begin, ids_end) {}
|
||||
BaseDB(Iter ids_begin, Iter ids_end)
|
||||
: ids(ids_begin, ids_end) {}
|
||||
|
||||
~HyperscanDB() {
|
||||
hs_free_database(db);
|
||||
}
|
||||
|
||||
// Underlying Hyperscan database pointer.
|
||||
hs_database_t *db;
|
||||
virtual ~BaseDB();
|
||||
|
||||
// The set of expression IDs that must return their matches in order.
|
||||
unordered_set<unsigned> ordered;
|
||||
@@ -111,15 +106,55 @@ public:
|
||||
unordered_set<unsigned> ids;
|
||||
};
|
||||
|
||||
BaseDB::~BaseDB() { }
|
||||
|
||||
class HyperscanDB : public BaseDB {
|
||||
public:
|
||||
// Constructor takes iterators over a container of pattern IDs.
|
||||
template <class Iter>
|
||||
HyperscanDB(hs_database_t *db_in, Iter ids_begin, Iter ids_end)
|
||||
: BaseDB(ids_begin, ids_end), db(db_in) {}
|
||||
|
||||
~HyperscanDB();
|
||||
|
||||
// Underlying Hyperscan database pointer.
|
||||
hs_database_t *db;
|
||||
};
|
||||
|
||||
HyperscanDB::~HyperscanDB() {
|
||||
hs_free_database(db);
|
||||
}
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
|
||||
class HybridDB : public BaseDB {
|
||||
public:
|
||||
// Constructor takes iterators over a container of pattern IDs.
|
||||
template <class Iter>
|
||||
HybridDB(ch_database_t *db_in, Iter ids_begin, Iter ids_end)
|
||||
: BaseDB(ids_begin, ids_end), db(db_in) {}
|
||||
|
||||
~HybridDB();
|
||||
|
||||
// Underlying Hyperscan database pointer.
|
||||
ch_database_t *db;
|
||||
};
|
||||
|
||||
HybridDB::~HybridDB() {
|
||||
ch_free_database(db);
|
||||
}
|
||||
|
||||
#endif // HS_HYBRID
|
||||
|
||||
// Used to track the ID and result set.
|
||||
namespace {
|
||||
struct MultiContext {
|
||||
MultiContext(unsigned int id_in, const HyperscanDB &db_in, ResultSet *rs_in,
|
||||
MultiContext(unsigned int id_in, const BaseDB &db_in, ResultSet *rs_in,
|
||||
bool single_in, ostream &os)
|
||||
: id(id_in), db(db_in), rs(rs_in), single(single_in), out(os) {}
|
||||
unsigned int id;
|
||||
int block = 0;
|
||||
const HyperscanDB &db;
|
||||
const BaseDB &db;
|
||||
ResultSet *rs;
|
||||
u64a lastRawMatch = 0; /* store last known unadjusted match location */
|
||||
u64a lastOrderMatch = 0;
|
||||
@@ -230,6 +265,75 @@ int callbackMulti(unsigned int id, unsigned long long from,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
|
||||
// Hybrid matcher callback.
|
||||
static
|
||||
ch_callback_t callbackHybrid(unsigned id, unsigned long long from,
|
||||
unsigned long long to, unsigned, unsigned size,
|
||||
const ch_capture_t *captured, void *ctx) {
|
||||
MultiContext *mctx = static_cast<MultiContext *>(ctx);
|
||||
assert(mctx);
|
||||
assert(mctx->rs);
|
||||
assert(mctx->in_scan_call);
|
||||
|
||||
ostream &out = mctx->out;
|
||||
|
||||
to -= g_corpora_prefix.size();
|
||||
|
||||
if (mctx->terminated) {
|
||||
out << "UE2 Match @ (" << from << "," << to << ") for " << id
|
||||
<< " after termination" << endl;
|
||||
mctx->rs->match_after_halt = true;
|
||||
}
|
||||
|
||||
if (mctx->single || id == mctx->id) {
|
||||
CaptureVec cap;
|
||||
for (unsigned int i = 0; i < size; i++) {
|
||||
if (!(captured[i].flags & CH_CAPTURE_FLAG_ACTIVE)) {
|
||||
cap.push_back(make_pair(-1, -1));
|
||||
} else {
|
||||
cap.push_back(make_pair(captured[i].from, captured[i].to));
|
||||
}
|
||||
}
|
||||
mctx->rs->addMatch(from, to, cap);
|
||||
}
|
||||
|
||||
if (echo_matches) {
|
||||
out << "Match @ [" << from << "," << to << "] for " << id << endl;
|
||||
out << " Captured " << size << " groups: ";
|
||||
for (unsigned int i = 0; i < size; i++) {
|
||||
if (!(captured[i].flags & CH_CAPTURE_FLAG_ACTIVE)) {
|
||||
out << "{} ";
|
||||
} else {
|
||||
out << "{" << captured[i].from << "," << captured[i].to << "} ";
|
||||
}
|
||||
}
|
||||
out << endl;
|
||||
}
|
||||
|
||||
if (limit_matches && mctx->rs->matches.size() == limit_matches) {
|
||||
mctx->terminated = true;
|
||||
return CH_CALLBACK_TERMINATE;
|
||||
}
|
||||
|
||||
return CH_CALLBACK_CONTINUE;
|
||||
}
|
||||
|
||||
// Hybrid matcher error callback.
|
||||
static
|
||||
ch_callback_t errorCallback(UNUSED ch_error_event_t errorType, UNUSED unsigned int id, void *,
|
||||
void *ctx) {
|
||||
UNUSED MultiContext *mctx = static_cast<MultiContext *>(ctx);
|
||||
assert(mctx);
|
||||
assert(mctx->rs);
|
||||
assert(mctx->in_scan_call);
|
||||
|
||||
return CH_CALLBACK_SKIP_PATTERN;
|
||||
}
|
||||
|
||||
#endif // HS_HYBRID
|
||||
|
||||
static
|
||||
void filterLeftmostSom(ResultSet &rs) {
|
||||
if (rs.matches.size() <= 1) {
|
||||
@@ -252,6 +356,9 @@ UltimateTruth::UltimateTruth(ostream &os, const ExpressionMap &expr,
|
||||
const Grey &grey_in, unsigned int streamBlocks)
|
||||
: grey(grey_in), out(os), m_expr(expr), m_xcompile(false),
|
||||
m_streamBlocks(streamBlocks), scratch(nullptr),
|
||||
#ifdef HS_HYBRID
|
||||
chimeraScratch(nullptr),
|
||||
#endif
|
||||
platform(plat) {
|
||||
// Build our mode flags.
|
||||
|
||||
@@ -265,15 +372,27 @@ UltimateTruth::UltimateTruth(ostream &os, const ExpressionMap &expr,
|
||||
case MODE_VECTORED:
|
||||
m_mode = HS_MODE_VECTORED;
|
||||
break;
|
||||
case MODE_HYBRID:
|
||||
m_mode = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// Set desired SOM precision, if we're in streaming mode.
|
||||
if (colliderMode == MODE_STREAMING) {
|
||||
m_mode |= somPrecisionMode;
|
||||
}
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
if (colliderMode == MODE_HYBRID && !no_groups) {
|
||||
m_mode |= CH_MODE_GROUPS;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
UltimateTruth::~UltimateTruth() {
|
||||
#ifdef HS_HYBRID
|
||||
ch_free_scratch(chimeraScratch);
|
||||
#endif
|
||||
hs_free_scratch(scratch);
|
||||
}
|
||||
|
||||
@@ -327,13 +446,13 @@ void mangle_scratch(hs_scratch_t *scratch) {
|
||||
scratch->fdr_conf_offset = 0xe4;
|
||||
}
|
||||
|
||||
bool UltimateTruth::blockScan(const HyperscanDB &hdb, const string &buffer,
|
||||
bool UltimateTruth::blockScan(const BaseDB &bdb, const string &buffer,
|
||||
size_t align, match_event_handler callback,
|
||||
void *ctx_in, ResultSet *) {
|
||||
assert(colliderMode == MODE_BLOCK);
|
||||
assert(!m_xcompile);
|
||||
|
||||
const hs_database_t *db = hdb.db;
|
||||
const hs_database_t *db = reinterpret_cast<const HyperscanDB &>(bdb).db;
|
||||
assert(db);
|
||||
MultiContext *ctx = (MultiContext *)ctx_in;
|
||||
|
||||
@@ -438,13 +557,13 @@ hs_stream_t *compressAndResetExpandStream(const hs_database_t *db,
|
||||
return out;
|
||||
}
|
||||
|
||||
bool UltimateTruth::streamingScan(const HyperscanDB &hdb, const string &buffer,
|
||||
bool UltimateTruth::streamingScan(const BaseDB &bdb, const string &buffer,
|
||||
size_t align, match_event_handler callback,
|
||||
void *ctx_in, ResultSet *rs) {
|
||||
assert(colliderMode == MODE_STREAMING);
|
||||
assert(!m_xcompile);
|
||||
|
||||
const hs_database_t *db = hdb.db;
|
||||
const hs_database_t *db = reinterpret_cast<const HyperscanDB &>(bdb).db;
|
||||
assert(db);
|
||||
MultiContext *ctx = (MultiContext *)ctx_in;
|
||||
|
||||
@@ -594,13 +713,13 @@ bool UltimateTruth::streamingScan(const HyperscanDB &hdb, const string &buffer,
|
||||
return ret == HS_SUCCESS;
|
||||
}
|
||||
|
||||
bool UltimateTruth::vectoredScan(const HyperscanDB &hdb, const string &buffer,
|
||||
bool UltimateTruth::vectoredScan(const BaseDB &bdb, const string &buffer,
|
||||
size_t align, match_event_handler callback,
|
||||
void *ctx_in, ResultSet *rs) {
|
||||
assert(colliderMode == MODE_VECTORED);
|
||||
assert(!m_xcompile);
|
||||
|
||||
const hs_database_t *db = hdb.db;
|
||||
const hs_database_t *db = reinterpret_cast<const HyperscanDB &>(bdb).db;
|
||||
assert(db);
|
||||
MultiContext *ctx = (MultiContext *)ctx_in;
|
||||
|
||||
@@ -682,19 +801,67 @@ bool UltimateTruth::vectoredScan(const HyperscanDB &hdb, const string &buffer,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool UltimateTruth::run(unsigned int id, shared_ptr<const HyperscanDB> hdb,
|
||||
#ifdef HS_HYBRID
|
||||
bool UltimateTruth::hybridScan(const BaseDB &bdb, const string &buffer,
|
||||
size_t align, ch_match_event_handler callback,
|
||||
ch_error_event_handler error_callback,
|
||||
void *ctx_in, ResultSet *) {
|
||||
assert(colliderMode == MODE_HYBRID);
|
||||
assert(!m_xcompile);
|
||||
|
||||
const ch_database_t *db = reinterpret_cast<const HybridDB &>(bdb).db;
|
||||
assert(db);
|
||||
MultiContext *ctx = (MultiContext *)ctx_in;
|
||||
|
||||
char *realigned = setupScanBuffer(buffer.c_str(), buffer.size(), align);
|
||||
if (!realigned) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (use_copy_scratch && !cloneScratch()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ctx->in_scan_call = true;
|
||||
ch_error_t ret =
|
||||
ch_scan(db, realigned, buffer.size(), 0, chimeraScratch, callback,
|
||||
error_callback, ctx);
|
||||
ctx->in_scan_call = false;
|
||||
|
||||
if (g_verbose) {
|
||||
out << "Scan call returned " << ret << endl;
|
||||
}
|
||||
|
||||
if (ctx->terminated) {
|
||||
if (g_verbose && ret != CH_SCAN_TERMINATED) {
|
||||
out << "Scan should have returned CH_SCAN_TERMINATED, returned "
|
||||
<< ret << " instead." << endl;
|
||||
}
|
||||
return ret == CH_SCAN_TERMINATED;
|
||||
}
|
||||
|
||||
if (g_verbose && ret != CH_SUCCESS) {
|
||||
out << "Scan should have returned CH_SUCCESS, returned " << ret
|
||||
<< " instead." << endl;
|
||||
}
|
||||
|
||||
return ret == CH_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool UltimateTruth::run(unsigned int id, shared_ptr<const BaseDB> bdb,
|
||||
const string &buffer, bool single_pattern,
|
||||
unsigned int align, ResultSet &rs) {
|
||||
assert(!m_xcompile);
|
||||
assert(hdb);
|
||||
assert(bdb);
|
||||
|
||||
// Ensure that scratch is appropriate for this database.
|
||||
if (!allocScratch(hdb)) {
|
||||
if (!allocScratch(bdb)) {
|
||||
out << "Scratch alloc failed." << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
MultiContext ctx(id, *hdb, &rs, single_pattern, out);
|
||||
MultiContext ctx(id, *bdb, &rs, single_pattern, out);
|
||||
if (!g_corpora_suffix.empty()) {
|
||||
ctx.use_max_offset = true;
|
||||
ctx.max_offset = buffer.size() - g_corpora_suffix.size();
|
||||
@@ -702,11 +869,20 @@ bool UltimateTruth::run(unsigned int id, shared_ptr<const HyperscanDB> hdb,
|
||||
|
||||
switch (colliderMode) {
|
||||
case MODE_BLOCK:
|
||||
return blockScan(*hdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
return blockScan(*bdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
case MODE_STREAMING:
|
||||
return streamingScan(*hdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
return streamingScan(*bdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
case MODE_VECTORED:
|
||||
return vectoredScan(*hdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
return vectoredScan(*bdb, buffer, align, callbackMulti, &ctx, &rs);
|
||||
case MODE_HYBRID:
|
||||
#ifdef HS_HYBRID
|
||||
return hybridScan(*bdb, buffer, align, callbackHybrid, errorCallback,
|
||||
&ctx, &rs);
|
||||
#else
|
||||
cerr << "Hybrid mode not available in this build." << endl;
|
||||
abort();
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
@@ -739,7 +915,7 @@ bool isOrdered(const string &expr, unsigned int flags) {
|
||||
return ordered;
|
||||
}
|
||||
|
||||
static unique_ptr<HyperscanDB>
|
||||
static unique_ptr<BaseDB>
|
||||
compileHyperscan(vector<const char *> &patterns, vector<unsigned> &flags,
|
||||
vector<unsigned> &idsvec, ptr_vector<hs_expr_ext> &ext,
|
||||
unsigned mode, const hs_platform_info *platform, string &error,
|
||||
@@ -762,7 +938,30 @@ compileHyperscan(vector<const char *> &patterns, vector<unsigned> &flags,
|
||||
return ue2::make_unique<HyperscanDB>(db, idsvec.begin(), idsvec.end());
|
||||
}
|
||||
|
||||
shared_ptr<HyperscanDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
#ifdef HS_HYBRID
|
||||
static unique_ptr<BaseDB>
|
||||
compileHybrid(vector<const char *> &patterns,
|
||||
vector<unsigned> &flags, vector<unsigned> &idsvec,
|
||||
unsigned mode, const hs_platform_info *platform, string &error) {
|
||||
const unsigned count = patterns.size();
|
||||
ch_database_t *db = nullptr;
|
||||
ch_compile_error_t *compile_err;
|
||||
|
||||
ch_error_t err = ch_compile_multi(&patterns[0], &flags[0],
|
||||
&idsvec[0], count, mode, platform, &db,
|
||||
&compile_err);
|
||||
|
||||
if (err != HS_SUCCESS) {
|
||||
error = compile_err->message;
|
||||
ch_free_compile_error(compile_err);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return ue2::make_unique<HybridDB>(db, idsvec.begin(), idsvec.end());
|
||||
}
|
||||
#endif
|
||||
|
||||
shared_ptr<BaseDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
string &error) const {
|
||||
// Build our vectors for compilation
|
||||
const size_t count = ids.size();
|
||||
@@ -811,6 +1010,17 @@ shared_ptr<HyperscanDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
ext[n].edit_distance = edit_distance;
|
||||
}
|
||||
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
if (ext[n].flags) {
|
||||
error = "Hybrid does not support extended parameters.";
|
||||
return nullptr;
|
||||
}
|
||||
// We can also strip some other flags in the hybrid matcher.
|
||||
flags[n] &= ~HS_FLAG_PREFILTER; // prefilter always used
|
||||
flags[n] &= ~HS_FLAG_ALLOWEMPTY; // empty always allowed
|
||||
flags[n] &= ~HS_FLAG_SOM_LEFTMOST; // SOM always on
|
||||
}
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
@@ -827,8 +1037,18 @@ shared_ptr<HyperscanDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
idsvec.push_back(0);
|
||||
}
|
||||
|
||||
auto db = compileHyperscan(patterns, flags, idsvec, ext, m_mode, platform,
|
||||
error, grey);
|
||||
unique_ptr<BaseDB> db;
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
#ifdef HS_HYBRID
|
||||
db = compileHybrid(patterns, flags, idsvec, m_mode, platform, error);
|
||||
#else
|
||||
error = "Hybrid mode not available in this build.";
|
||||
#endif
|
||||
} else {
|
||||
db = compileHyperscan(patterns, flags, idsvec, ext, m_mode,
|
||||
platform, error, grey);
|
||||
}
|
||||
|
||||
if (!db) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -850,18 +1070,29 @@ shared_ptr<HyperscanDB> UltimateTruth::compile(const set<unsigned> &ids,
|
||||
return move(db);
|
||||
}
|
||||
|
||||
bool UltimateTruth::allocScratch(shared_ptr<const HyperscanDB> db) {
|
||||
bool UltimateTruth::allocScratch(shared_ptr<const BaseDB> db) {
|
||||
assert(db);
|
||||
|
||||
// We explicitly avoid running scratch allocators for the same HyperscanDB
|
||||
// We explicitly avoid running scratch allocators for the same BaseDB
|
||||
// over and over again by retaining a shared_ptr to the last one we saw.
|
||||
if (db == last_db) {
|
||||
return true;
|
||||
}
|
||||
|
||||
hs_error_t err = hs_alloc_scratch(db.get()->db, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
return false;
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
#ifdef HS_HYBRID
|
||||
ch_error_t err = ch_alloc_scratch(
|
||||
reinterpret_cast<const HybridDB *>(db.get())->db, &chimeraScratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
#endif // HS_HYBRID
|
||||
} else {
|
||||
hs_error_t err = hs_alloc_scratch(
|
||||
reinterpret_cast<const HyperscanDB *>(db.get())->db, &scratch);
|
||||
if (err != HS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
last_db = db;
|
||||
@@ -869,20 +1100,40 @@ bool UltimateTruth::allocScratch(shared_ptr<const HyperscanDB> db) {
|
||||
}
|
||||
|
||||
bool UltimateTruth::cloneScratch(void) {
|
||||
hs_scratch_t *old_scratch = scratch;
|
||||
hs_scratch_t *new_scratch;
|
||||
hs_error_t ret = hs_clone_scratch(scratch, &new_scratch);
|
||||
if (ret != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("failure to clone %d\n", ret);
|
||||
return false;
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
#ifdef HS_HYBRID
|
||||
ch_scratch_t *old_scratch = chimeraScratch;
|
||||
ch_scratch_t *new_scratch;
|
||||
ch_error_t ret = ch_clone_scratch(chimeraScratch, &new_scratch);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("failure to clone %d\n", ret);
|
||||
return false;
|
||||
}
|
||||
chimeraScratch = new_scratch;
|
||||
ret = ch_free_scratch(old_scratch);
|
||||
if (ret != CH_SUCCESS) {
|
||||
DEBUG_PRINTF("failure to free %d\n", ret);
|
||||
return false;
|
||||
}
|
||||
DEBUG_PRINTF("hybrid scratch cloned from %p to %p\n",
|
||||
old_scratch, chimeraScratch);
|
||||
#endif // HS_HYBRID
|
||||
} else {
|
||||
hs_scratch_t *old_scratch = scratch;
|
||||
hs_scratch_t *new_scratch;
|
||||
hs_error_t ret = hs_clone_scratch(scratch, &new_scratch);
|
||||
if (ret != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("failure to clone %d\n", ret);
|
||||
return false;
|
||||
}
|
||||
scratch = new_scratch;
|
||||
ret = hs_free_scratch(old_scratch);
|
||||
if (ret != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("failure to free %d\n", ret);
|
||||
return false;
|
||||
}
|
||||
DEBUG_PRINTF("scratch cloned from %p to %p\n", old_scratch, scratch);
|
||||
}
|
||||
scratch = new_scratch;
|
||||
ret = hs_free_scratch(old_scratch);
|
||||
if (ret != HS_SUCCESS) {
|
||||
DEBUG_PRINTF("failure to free %d\n", ret);
|
||||
return false;
|
||||
}
|
||||
DEBUG_PRINTF("scratch cloned from %p to %p\n", old_scratch, scratch);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -947,20 +1198,35 @@ char *UltimateTruth::setupVecScanBuffer(const char *begin, size_t len,
|
||||
return ptr;
|
||||
}
|
||||
|
||||
bool UltimateTruth::saveDatabase(const HyperscanDB &hdb,
|
||||
bool UltimateTruth::saveDatabase(const BaseDB &bdb,
|
||||
const string &filename) const {
|
||||
return ::saveDatabase(hdb.db, filename.c_str(), g_verbose);
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
cerr << "Hybrid mode doesn't support serialization." << endl;
|
||||
abort();
|
||||
} else {
|
||||
return ::saveDatabase(reinterpret_cast<const HyperscanDB *>(&bdb)->db,
|
||||
filename.c_str(), g_verbose);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
shared_ptr<HyperscanDB>
|
||||
shared_ptr<BaseDB>
|
||||
UltimateTruth::loadDatabase(const string &filename,
|
||||
const std::set<unsigned> &ids) const {
|
||||
hs_database_t *hs_db = ::loadDatabase(filename.c_str(), g_verbose);
|
||||
if (!hs_db) {
|
||||
return nullptr;
|
||||
shared_ptr<BaseDB> db;
|
||||
|
||||
if (colliderMode == MODE_HYBRID) {
|
||||
cerr << "Hybrid mode doesn't support deserialization." << endl;
|
||||
abort();
|
||||
} else {
|
||||
hs_database_t *hs_db = ::loadDatabase(filename.c_str(), g_verbose);
|
||||
if (!hs_db) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
db = make_shared<HyperscanDB>(hs_db, ids.begin(), ids.end());
|
||||
}
|
||||
|
||||
auto db = make_shared<HyperscanDB>(hs_db, ids.begin(), ids.end());
|
||||
assert(db);
|
||||
|
||||
// Fill db::ordered with the expressions that require the ordered flag.
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -33,6 +33,10 @@
|
||||
|
||||
#include "hs.h"
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
#include "chimera/ch.h"
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <set>
|
||||
@@ -47,7 +51,7 @@ struct Grey;
|
||||
|
||||
} // namespace ue2
|
||||
|
||||
class HyperscanDB;
|
||||
class BaseDB;
|
||||
class ResultSet;
|
||||
|
||||
// Wrapper around ue2 to generate results for an expression and corpus.
|
||||
@@ -59,13 +63,13 @@ public:
|
||||
|
||||
~UltimateTruth();
|
||||
|
||||
std::shared_ptr<HyperscanDB> compile(const std::set<unsigned> &ids,
|
||||
std::shared_ptr<BaseDB> compile(const std::set<unsigned> &ids,
|
||||
std::string &error) const;
|
||||
|
||||
bool saveDatabase(const HyperscanDB &db,
|
||||
bool saveDatabase(const BaseDB &db,
|
||||
const std::string &filename) const;
|
||||
|
||||
std::shared_ptr<HyperscanDB>
|
||||
std::shared_ptr<BaseDB>
|
||||
loadDatabase(const std::string &filename,
|
||||
const std::set<unsigned> &ids) const;
|
||||
|
||||
@@ -74,7 +78,7 @@ public:
|
||||
return !m_xcompile;
|
||||
}
|
||||
|
||||
bool run(unsigned id, std::shared_ptr<const HyperscanDB> db,
|
||||
bool run(unsigned id, std::shared_ptr<const BaseDB> db,
|
||||
const std::string &buffer, bool single_pattern, unsigned align,
|
||||
ResultSet &rs);
|
||||
|
||||
@@ -84,22 +88,28 @@ public:
|
||||
std::string dbFilename(const std::set<unsigned int> &ids) const;
|
||||
|
||||
private:
|
||||
bool blockScan(const HyperscanDB &db, const std::string &buffer,
|
||||
bool blockScan(const BaseDB &db, const std::string &buffer,
|
||||
size_t align, match_event_handler callback, void *ctx,
|
||||
ResultSet *rs);
|
||||
bool streamingScan(const HyperscanDB &db, const std::string &buffer,
|
||||
bool streamingScan(const BaseDB &db, const std::string &buffer,
|
||||
size_t align, match_event_handler callback, void *ctx,
|
||||
ResultSet *rs);
|
||||
bool vectoredScan(const HyperscanDB &db, const std::string &buffer,
|
||||
bool vectoredScan(const BaseDB &db, const std::string &buffer,
|
||||
size_t align, match_event_handler callback, void *ctx,
|
||||
ResultSet *rs);
|
||||
#ifdef HS_HYBRID
|
||||
bool hybridScan(const BaseDB &db, const std::string &buffer,
|
||||
size_t align, ch_match_event_handler callback,
|
||||
ch_error_event_handler error_callback,
|
||||
void *ctx, ResultSet *rs);
|
||||
#endif // HS_HYBRID
|
||||
|
||||
char *setupScanBuffer(const char *buf, size_t len, size_t align);
|
||||
|
||||
char *setupVecScanBuffer(const char *buf, size_t len, size_t align,
|
||||
unsigned int block_id);
|
||||
|
||||
bool allocScratch(std::shared_ptr<const HyperscanDB> db);
|
||||
bool allocScratch(std::shared_ptr<const BaseDB> db);
|
||||
|
||||
bool cloneScratch(void);
|
||||
|
||||
@@ -126,6 +136,11 @@ private:
|
||||
// Scratch space for Hyperscan.
|
||||
hs_scratch_t *scratch;
|
||||
|
||||
#ifdef HS_HYBRID
|
||||
// Scratch space for Chimera.
|
||||
ch_scratch_t *chimeraScratch;
|
||||
#endif // HS_HYBRID
|
||||
|
||||
// Temporary scan buffer used for realigned scanning
|
||||
std::vector<char> m_scanBuf;
|
||||
|
||||
@@ -134,7 +149,7 @@ private:
|
||||
|
||||
// Last database we successfully allocated scratch for, so that we can
|
||||
// avoid unnecessarily reallocating for it.
|
||||
std::shared_ptr<const HyperscanDB> last_db;
|
||||
std::shared_ptr<const BaseDB> last_db;
|
||||
|
||||
const hs_platform_info *platform;
|
||||
};
|
||||
|
@@ -76,6 +76,7 @@ void usage(const char *name, const char *error) {
|
||||
"blocks.\n");
|
||||
printf(" -V NUM Use vectored mode, split data into ~NUM "
|
||||
"blocks.\n");
|
||||
printf(" -H Use hybrid mode.\n");
|
||||
printf(" -Z {R or 0-%d} Only test one alignment, either as given or "
|
||||
"'R' for random.\n", MAX_MAX_UE2_ALIGN - 1);
|
||||
printf(" -q Quiet; display only match differences, no other "
|
||||
@@ -90,6 +91,7 @@ void usage(const char *name, const char *error) {
|
||||
printf(" -E DISTANCE Match all patterns within edit distance"
|
||||
" DISTANCE.\n");
|
||||
printf(" --prefilter Apply HS_FLAG_PREFILTER to all patterns.\n");
|
||||
printf(" --no-groups Disable capturing in Hybrid mode.\n");
|
||||
printf("\n");
|
||||
printf("Testing mode options:\n");
|
||||
printf("\n");
|
||||
@@ -157,7 +159,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
vector<string> *corpora, UNUSED Grey *grey,
|
||||
unique_ptr<hs_platform_info> *plat_out) {
|
||||
static const char options[]
|
||||
= "-ab:cC:d:D:e:E:G:hi:k:Lm:M:n:o:O:p:P:qr:R:S:s:t:T:vV:w:x:X:Y:z:Z:8";
|
||||
= "-ab:cC:d:D:e:E:G:hHi:k:Lm:M:n:o:O:p:P:qr:R:S:s:t:T:vV:w:x:X:Y:z:Z:8";
|
||||
s32 in_multi = 0;
|
||||
s32 in_corpora = 0;
|
||||
int pcreFlag = 1;
|
||||
@@ -180,6 +182,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
{"no-signal-handler", 0, &no_signal_handler, 1},
|
||||
{"compress-expand", 0, &compressFlag, 1},
|
||||
{"compress-reset-expand", 0, &compressResetFlag, 1},
|
||||
{"no-groups", 0, &no_groups, 1},
|
||||
{nullptr, 0, nullptr, 0}};
|
||||
|
||||
for (;;) {
|
||||
@@ -271,6 +274,15 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
case 'h':
|
||||
usage(argv[0], nullptr);
|
||||
exit(0);
|
||||
case 'H':
|
||||
if (colliderMode != MODE_BLOCK) {
|
||||
usage(argv[0], "You can only use one mode at a time!");
|
||||
exit(1);
|
||||
}
|
||||
colliderMode = MODE_HYBRID;
|
||||
// Disable graph truth in hybrid mode
|
||||
nfaFlag = 0;
|
||||
break;
|
||||
case 'i':
|
||||
loadDatabases = true;
|
||||
serializePath = optarg;
|
||||
@@ -542,6 +554,11 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop,
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (colliderMode == MODE_HYBRID && !ue2Flag) {
|
||||
usage(argv[0], "You cannot disable UE2 engine in Hybrid mode.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// need at least two pattern engines active
|
||||
if (nfaFlag + pcreFlag + ue2Flag < 2) {
|
||||
usage(argv[0], "At least two pattern engines should be active.");
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -36,7 +36,8 @@
|
||||
enum ColliderMode {
|
||||
MODE_BLOCK,
|
||||
MODE_STREAMING,
|
||||
MODE_VECTORED
|
||||
MODE_VECTORED,
|
||||
MODE_HYBRID
|
||||
};
|
||||
|
||||
extern unsigned numThreads;
|
||||
@@ -68,6 +69,7 @@ extern unsigned max_ue2_align;
|
||||
extern size_t g_memoryLimit;
|
||||
extern bool force_utf8;
|
||||
extern int force_prefilter;
|
||||
extern int no_groups;
|
||||
extern unsigned somPrecisionMode;
|
||||
extern unsigned limit_matches;
|
||||
extern unsigned randomSeed;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -448,6 +448,9 @@ void printMode(void) {
|
||||
case MODE_VECTORED:
|
||||
cout << "Vectored-" << g_streamBlocks;
|
||||
break;
|
||||
case MODE_HYBRID:
|
||||
cout << "Hybrid";
|
||||
break;
|
||||
}
|
||||
|
||||
if (use_copy_scratch) {
|
||||
@@ -690,7 +693,7 @@ shared_ptr<DatabaseProxy> constructDatabase(const set<unsigned int> &ids,
|
||||
|
||||
if (loadDatabases) {
|
||||
string filename = ultimate.dbFilename(ids);
|
||||
shared_ptr<HyperscanDB> db = ultimate.loadDatabase(filename, ids);
|
||||
shared_ptr<BaseDB> db = ultimate.loadDatabase(filename, ids);
|
||||
if (!db) {
|
||||
if (!g_quiet) {
|
||||
cout << "FAILED: could not load database " << filename << endl;
|
||||
@@ -706,7 +709,7 @@ shared_ptr<DatabaseProxy> constructDatabase(const set<unsigned int> &ids,
|
||||
// If we're not runnable (i.e. we're cross-compiling), let's at least
|
||||
// try to build the database.
|
||||
if (!ultimate.runnable()) {
|
||||
shared_ptr<HyperscanDB> db = ue2->get(ultimate);
|
||||
shared_ptr<BaseDB> db = ue2->get(ultimate);
|
||||
assert(db); // throws otherwise
|
||||
}
|
||||
|
||||
@@ -872,7 +875,7 @@ void runTestUnit(ostream &out, GroundTruth &ground, GraphTruth &graph,
|
||||
assert(use_UE2);
|
||||
Corpus &corpus = unit.corpus;
|
||||
|
||||
shared_ptr<const HyperscanDB> db;
|
||||
shared_ptr<const BaseDB> db;
|
||||
if (use_UE2) {
|
||||
// Acquire UE2 database.
|
||||
debug_stage = STAGE_UE2_COMPILE;
|
||||
@@ -1648,6 +1651,7 @@ void printSettingsV(const vector<string> &corporaFiles,
|
||||
case MODE_BLOCK: cout << "block mode"; break;
|
||||
case MODE_STREAMING: cout << "streaming mode"; break;
|
||||
case MODE_VECTORED: cout << "vectored mode"; break;
|
||||
case MODE_HYBRID: cout << "hybrid mode"; break;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
@@ -1746,6 +1750,7 @@ void printSettingsQ(const vector<string> &corporaFiles,
|
||||
case MODE_BLOCK: cout << "block mode"; break;
|
||||
case MODE_STREAMING: cout << "streaming mode"; break;
|
||||
case MODE_VECTORED: cout << "vectored mode"; break;
|
||||
case MODE_HYBRID: cout << "hybrid mode"; break;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
|
Reference in New Issue
Block a user