mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-06-28 16:41:01 +03:00
325 lines
10 KiB
C++
325 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2018, Intel Corporation
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "config.h"
|
|
|
|
#include "ExpressionParser.h"
|
|
#include "common.h"
|
|
#include "engine_chimera.h"
|
|
#include "expressions.h"
|
|
#include "heapstats.h"
|
|
#include "sqldb.h"
|
|
#include "timer.h"
|
|
|
|
#include "chimera/ch_database.h"
|
|
|
|
using namespace std;
|
|
|
|
EngineCHContext::EngineCHContext(const ch_database_t *db) {
|
|
ch_alloc_scratch(db, &scratch);
|
|
assert(scratch);
|
|
}
|
|
|
|
EngineCHContext::~EngineCHContext() {
|
|
ch_free_scratch(scratch);
|
|
}
|
|
|
|
namespace /* anonymous */ {
|
|
|
|
/** Scan context structure passed to the onMatch callback function. */
|
|
struct ScanCHContext {
|
|
ScanCHContext(unsigned id_in, ResultEntry &result_in)
|
|
: id(id_in), result(result_in) {}
|
|
unsigned id;
|
|
ResultEntry &result;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
/**
|
|
* Callback function called for every match that Chimera produces, used when
|
|
* "echo matches" is off.
|
|
*/
|
|
static
|
|
int HS_CDECL onMatch(unsigned int, unsigned long long, unsigned long long,
|
|
unsigned int, unsigned int, const ch_capture_t *,
|
|
void *ctx) {
|
|
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
|
|
assert(sc);
|
|
sc->result.matches++;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Callback function called for every match that Chimera produces when "echo
|
|
* matches" is enabled.
|
|
*/
|
|
static
|
|
int HS_CDECL onMatchEcho(unsigned int id, unsigned long long,
|
|
unsigned long long to, unsigned int, unsigned int,
|
|
const ch_capture_t *, void *ctx) {
|
|
ScanCHContext *sc = static_cast<ScanCHContext *>(ctx);
|
|
assert(sc);
|
|
sc->result.matches++;
|
|
|
|
printf("Match @%u:%llu for %u\n", sc->id, to, id);
|
|
|
|
return 0;
|
|
}
|
|
|
|
EngineChimera::EngineChimera(ch_database_t *db_in, CompileCHStats cs)
|
|
: db(db_in), compile_stats(move(cs)) {
|
|
assert(db);
|
|
}
|
|
|
|
EngineChimera::~EngineChimera() {
|
|
ch_free_database(db);
|
|
}
|
|
|
|
unique_ptr<EngineContext> EngineChimera::makeContext() const {
|
|
return std::make_unique<EngineCHContext>(db);
|
|
}
|
|
|
|
void EngineChimera::scan(const char *data, unsigned int len, unsigned int id,
|
|
ResultEntry &result, EngineContext &ectx) const {
|
|
assert(data);
|
|
|
|
auto &ctx = static_cast<EngineCHContext &>(ectx);
|
|
ScanCHContext sc(id, result);
|
|
auto callback = echo_matches ? onMatchEcho : onMatch;
|
|
ch_error_t rv = ch_scan(db, data, len, 0, ctx.scratch, callback, nullptr,
|
|
&sc);
|
|
|
|
if (rv != CH_SUCCESS) {
|
|
printf("Fatal error: ch_scan returned error %d\n", rv);
|
|
abort();
|
|
}
|
|
}
|
|
|
|
// vectoring scan
|
|
void EngineChimera::scan_vectored(UNUSED const char *const *data,
|
|
UNUSED const unsigned int *len,
|
|
UNUSED unsigned int count,
|
|
UNUSED unsigned int streamId,
|
|
UNUSED ResultEntry &result,
|
|
UNUSED EngineContext &ectx) const {
|
|
printf("Hybrid matcher can't support vectored mode.\n");
|
|
abort();
|
|
}
|
|
|
|
unique_ptr<EngineStream> EngineChimera::streamOpen(UNUSED EngineContext &ectx,
|
|
UNUSED unsigned id) const {
|
|
printf("Hybrid matcher can't stream.\n");
|
|
abort();
|
|
}
|
|
|
|
void EngineChimera::streamClose(UNUSED unique_ptr<EngineStream> stream,
|
|
UNUSED ResultEntry &result) const {
|
|
printf("Hybrid matcher can't stream.\n");
|
|
abort();
|
|
}
|
|
|
|
void EngineChimera::streamScan(UNUSED EngineStream &stream,
|
|
UNUSED const char *data,
|
|
UNUSED unsigned len, UNUSED unsigned id,
|
|
UNUSED ResultEntry &result) const {
|
|
printf("Hybrid matcher can't stream.\n");
|
|
abort();
|
|
}
|
|
|
|
void EngineChimera::streamCompressExpand(UNUSED EngineStream &stream,
|
|
UNUSED vector<char> &temp) const {
|
|
printf("Hybrid matcher can't stream.\n");
|
|
abort();
|
|
}
|
|
|
|
void EngineChimera::printStats() const {
|
|
// Output summary information.
|
|
if (!compile_stats.sigs_name.empty()) {
|
|
printf("Signature set: %s\n", compile_stats.sigs_name.c_str());
|
|
}
|
|
printf("Signatures: %s\n", compile_stats.signatures.c_str());
|
|
printf("Chimera info: %s\n", compile_stats.db_info.c_str());
|
|
printf("Expression count: %'zu\n", compile_stats.expressionCount);
|
|
printf("Bytecode size: %'zu bytes\n", compile_stats.compiledSize);
|
|
printf("Database CRC: 0x%x\n", compile_stats.crc32);
|
|
printf("Scratch size: %'zu bytes\n", compile_stats.scratchSize);
|
|
printf("Compile time: %'0.3Lf seconds\n", compile_stats.compileSecs);
|
|
printf("Peak heap usage: %'u bytes\n", compile_stats.peakMemorySize);
|
|
}
|
|
|
|
void EngineChimera::printCsvStats() const {
|
|
printf(",\"%s\"", compile_stats.signatures.c_str());
|
|
printf(",\"%zu\"", compile_stats.expressionCount);
|
|
printf(",\"0x%x\"", compile_stats.crc32);
|
|
printf(",\"%zu\"", compile_stats.compiledSize);
|
|
printf(",\"%zu\"", compile_stats.scratchSize);
|
|
printf(",\"%0.3Lf\"", compile_stats.compileSecs);
|
|
printf(",\"%u\"", compile_stats.peakMemorySize);
|
|
}
|
|
|
|
void EngineChimera::sqlStats(SqlDB &sqldb) const {
|
|
ostringstream crc;
|
|
crc << "0x" << hex << compile_stats.crc32;
|
|
|
|
static const string Q =
|
|
"INSERT INTO Compile ("
|
|
"sigsName, signatures, dbInfo, exprCount, dbSize, crc,"
|
|
"scratchSize, compileSecs, peakMemory) "
|
|
"VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)";
|
|
|
|
sqldb.insert_all(Q, compile_stats.sigs_name, compile_stats.signatures,
|
|
compile_stats.db_info, compile_stats.expressionCount,
|
|
compile_stats.compiledSize, crc.str(),
|
|
compile_stats.scratchSize, compile_stats.compileSecs,
|
|
compile_stats.peakMemorySize);
|
|
}
|
|
|
|
unique_ptr<EngineChimera>
|
|
buildEngineChimera(const ExpressionMap &expressions, const string &name,
|
|
const string &sigs_name) {
|
|
if (expressions.empty()) {
|
|
assert(0);
|
|
return nullptr;
|
|
}
|
|
|
|
long double compileSecs = 0.0;
|
|
size_t compiledSize = 0.0;
|
|
size_t scratchSize = 0;
|
|
unsigned int peakMemorySize = 0;
|
|
string db_info;
|
|
|
|
ch_database_t *db;
|
|
ch_error_t err;
|
|
|
|
const unsigned int count = expressions.size();
|
|
|
|
vector<string> exprs;
|
|
vector<unsigned int> flags, ids;
|
|
vector<hs_expr_ext> ext;
|
|
|
|
for (const auto &m : expressions) {
|
|
string expr;
|
|
unsigned int f = 0;
|
|
hs_expr_ext extparam; // unused
|
|
extparam.flags = 0;
|
|
if (!readExpression(m.second, expr, &f, &extparam)) {
|
|
printf("Error parsing PCRE: %s (id %u)\n", m.second.c_str(),
|
|
m.first);
|
|
return nullptr;
|
|
}
|
|
|
|
if (extparam.flags) {
|
|
printf("Error parsing PCRE with extended flags: %s (id %u)\n",
|
|
m.second.c_str(), m.first);
|
|
return nullptr;
|
|
}
|
|
exprs.push_back(expr);
|
|
ids.push_back(m.first);
|
|
flags.push_back(f);
|
|
}
|
|
|
|
// Our compiler takes an array of plain ol' C strings.
|
|
vector<const char *> patterns(count);
|
|
for (unsigned int i = 0; i < count; i++) {
|
|
patterns[i] = exprs[i].c_str();
|
|
}
|
|
|
|
Timer timer;
|
|
timer.start();
|
|
|
|
// Capture groups by default
|
|
unsigned int mode = CH_MODE_GROUPS;
|
|
ch_compile_error_t *compile_err;
|
|
err = ch_compile_multi(patterns.data(), flags.data(), ids.data(),
|
|
count, mode, nullptr, &db, &compile_err);
|
|
|
|
timer.complete();
|
|
compileSecs = timer.seconds();
|
|
peakMemorySize = getPeakHeap();
|
|
|
|
if (err == CH_COMPILER_ERROR) {
|
|
if (compile_err->expression >= 0) {
|
|
printf("Compile error for signature #%u: %s\n",
|
|
compile_err->expression, compile_err->message);
|
|
} else {
|
|
printf("Compile error: %s\n", compile_err->message);
|
|
}
|
|
ch_free_compile_error(compile_err);
|
|
return nullptr;
|
|
}
|
|
|
|
err = ch_database_size(db, &compiledSize);
|
|
if (err != CH_SUCCESS) {
|
|
return nullptr;
|
|
}
|
|
assert(compiledSize > 0);
|
|
|
|
char *info;
|
|
err = ch_database_info(db, &info);
|
|
if (err != CH_SUCCESS) {
|
|
return nullptr;
|
|
} else {
|
|
db_info = string(info);
|
|
free(info);
|
|
}
|
|
|
|
// Allocate scratch temporarily to find its size: this is a good test
|
|
// anyway.
|
|
ch_scratch_t *scratch = nullptr;
|
|
err = ch_alloc_scratch(db, &scratch);
|
|
if (err != HS_SUCCESS) {
|
|
return nullptr;
|
|
}
|
|
|
|
err = ch_scratch_size(scratch, &scratchSize);
|
|
if (err != CH_SUCCESS) {
|
|
return nullptr;
|
|
}
|
|
ch_free_scratch(scratch);
|
|
|
|
// Collect summary information.
|
|
CompileCHStats cs;
|
|
cs.sigs_name = sigs_name;
|
|
if (!sigs_name.empty()) {
|
|
const auto pos = name.find_last_of('/');
|
|
cs.signatures = name.substr(pos + 1);
|
|
} else {
|
|
cs.signatures = name;
|
|
}
|
|
cs.db_info = db_info;
|
|
cs.expressionCount = expressions.size();
|
|
cs.compiledSize = compiledSize;
|
|
cs.scratchSize = scratchSize;
|
|
cs.compileSecs = compileSecs;
|
|
cs.peakMemorySize = peakMemorySize;
|
|
|
|
return std::make_unique<EngineChimera>(db, move(cs));
|
|
}
|