mirror of
https://github.com/VectorCamp/vectorscan.git
synced 2025-09-29 11:16:29 +03:00
Logical Combination of patterns.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -82,12 +82,30 @@ string g_signatureFile("");
|
||||
bool g_allSignatures = false;
|
||||
bool g_forceEditDistance = false;
|
||||
bool build_sigs = false;
|
||||
bool check_logical = false;
|
||||
unsigned int g_signature;
|
||||
unsigned int g_editDistance;
|
||||
unsigned int globalFlags = 0;
|
||||
unsigned int num_of_threads = 1;
|
||||
unsigned int countFailures = 0;
|
||||
|
||||
class ParsedExpr {
|
||||
public:
|
||||
ParsedExpr(string regex_in, unsigned int flags_in, hs_expr_ext ext_in)
|
||||
: regex(regex_in), flags(flags_in), ext(ext_in) {}
|
||||
~ParsedExpr() {}
|
||||
string regex;
|
||||
unsigned int flags;
|
||||
hs_expr_ext ext;
|
||||
};
|
||||
|
||||
typedef map<unsigned int, ParsedExpr> ExprExtMap;
|
||||
ExprExtMap g_combs;
|
||||
ExprExtMap g_validSubs;
|
||||
|
||||
// Iterator pointing to next logical expression to process.
|
||||
ExprExtMap::const_iterator comb_read_it;
|
||||
|
||||
// Global greybox structure, used in non-release builds.
|
||||
unique_ptr<Grey> g_grey;
|
||||
|
||||
@@ -106,6 +124,12 @@ std::mutex lk_read;
|
||||
// Mutex serialising access to output map and stdout.
|
||||
std::mutex lk_output;
|
||||
|
||||
// Mutex guarding access to write g_combs.
|
||||
std::mutex lk_write_comb;
|
||||
|
||||
// Mutex guarding access to write g_validSubs.
|
||||
std::mutex lk_write_sub;
|
||||
|
||||
// Possible values for pattern check results.
|
||||
enum ExprStatus {NOT_PROCESSED, SUCCESS, FAILURE};
|
||||
|
||||
@@ -126,6 +150,32 @@ bool getNextExpressionId(ExpressionMap::const_iterator &it) {
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool getNextLogicalExpression(ExprExtMap::const_iterator &it) {
|
||||
lock_guard<mutex> lock(lk_read);
|
||||
if (comb_read_it != g_combs.end()) {
|
||||
it = comb_read_it;
|
||||
++comb_read_it;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void cacheCombExpr(unsigned id, const string ®ex, unsigned int flags,
|
||||
const hs_expr_ext &ext) {
|
||||
lock_guard<mutex> lock(lk_write_comb);
|
||||
g_combs.emplace(id, ParsedExpr(regex, flags, ext));
|
||||
}
|
||||
|
||||
static
|
||||
void cacheSubExpr(unsigned id, const string ®ex, unsigned int flags,
|
||||
const hs_expr_ext &ext) {
|
||||
lock_guard<mutex> lock(lk_write_sub);
|
||||
g_validSubs.emplace(id, ParsedExpr(regex, flags, ext));
|
||||
}
|
||||
|
||||
// This function prints the Pattern IDs order
|
||||
// It creates the output for build sigs
|
||||
// Caller is required to hold lk_output when calling this function
|
||||
@@ -221,6 +271,15 @@ void checkExpression(UNUSED void *threadarg) {
|
||||
ext.flags |= HS_EXT_FLAG_EDIT_DISTANCE;
|
||||
}
|
||||
|
||||
if (flags & HS_FLAG_COMBINATION) {
|
||||
if (check_logical) {
|
||||
cacheCombExpr(it->first, regex, flags, ext);
|
||||
} else {
|
||||
recordFailure(g_exprMap, it->first, "Unsupported flag used.");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try and compile a database.
|
||||
const char *regexp = regex.c_str();
|
||||
const hs_expr_ext *extp = &ext;
|
||||
@@ -239,6 +298,112 @@ void checkExpression(UNUSED void *threadarg) {
|
||||
nullptr, &db, &compile_err);
|
||||
#endif
|
||||
|
||||
if (err == HS_SUCCESS) {
|
||||
assert(db);
|
||||
recordSuccess(g_exprMap, it->first);
|
||||
hs_free_database(db);
|
||||
if (check_logical) {
|
||||
cacheSubExpr(it->first, regex, flags, ext);
|
||||
}
|
||||
} else {
|
||||
assert(!db);
|
||||
assert(compile_err);
|
||||
recordFailure(g_exprMap, it->first, compile_err->message);
|
||||
hs_free_compile_error(compile_err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
bool fetchSubIds(const char *logical, vector<unsigned> &ids) {
|
||||
unsigned mult = 1;
|
||||
unsigned id = 0;
|
||||
for (int i = strlen(logical) - 1; i >= 0; i--) {
|
||||
if (isdigit(logical[i])) {
|
||||
if (mult > 100000000) {
|
||||
return false;
|
||||
}
|
||||
id += (logical[i] - '0') * mult;
|
||||
mult *= 10;
|
||||
} else if (mult > 1) {
|
||||
ids.push_back(id);
|
||||
mult = 1;
|
||||
id = 0;
|
||||
}
|
||||
}
|
||||
if (mult > 1) {
|
||||
ids.push_back(id);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static
|
||||
void checkLogicalExpression(UNUSED void *threadarg) {
|
||||
unsigned int mode = g_streaming ? HS_MODE_STREAM
|
||||
: g_vectored ? HS_MODE_VECTORED
|
||||
: HS_MODE_BLOCK;
|
||||
if (g_streaming) {
|
||||
// Use SOM mode, for permissiveness' sake.
|
||||
mode |= HS_MODE_SOM_HORIZON_LARGE;
|
||||
}
|
||||
|
||||
ExprExtMap::const_iterator it;
|
||||
while (getNextLogicalExpression(it)) {
|
||||
const ParsedExpr &comb = it->second;
|
||||
|
||||
vector<unsigned> subIds;
|
||||
if (!fetchSubIds(comb.regex.c_str(), subIds)) {
|
||||
recordFailure(g_exprMap, it->first, "Sub-expression id too large.");
|
||||
continue;
|
||||
}
|
||||
|
||||
vector<const char *> regexv;
|
||||
vector<unsigned> flagsv;
|
||||
vector<unsigned> idv;
|
||||
vector<const hs_expr_ext *> extv;
|
||||
bool valid = true;
|
||||
|
||||
for (const auto i : subIds) {
|
||||
ExprExtMap::const_iterator jt = g_validSubs.find(i);
|
||||
if (jt != g_validSubs.end()) {
|
||||
const ParsedExpr &sub = jt->second;
|
||||
regexv.push_back(sub.regex.c_str());
|
||||
flagsv.push_back(sub.flags);
|
||||
idv.push_back(i);
|
||||
extv.push_back(&sub.ext);
|
||||
} else {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (valid) {
|
||||
regexv.push_back(comb.regex.c_str());
|
||||
flagsv.push_back(comb.flags);
|
||||
idv.push_back(it->first);
|
||||
extv.push_back(&comb.ext);
|
||||
} else {
|
||||
recordFailure(g_exprMap, it->first, "Sub-expression id not valid.");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try and compile a database.
|
||||
hs_error_t err;
|
||||
hs_compile_error_t *compile_err;
|
||||
hs_database_t *db = nullptr;
|
||||
|
||||
#if !defined(RELEASE_BUILD)
|
||||
// This variant is available in non-release builds and allows us to
|
||||
// modify greybox settings.
|
||||
err = hs_compile_multi_int(regexv.data(), flagsv.data(), idv.data(),
|
||||
extv.data(), regexv.size(), mode,
|
||||
nullptr, &db, &compile_err, *g_grey);
|
||||
#else
|
||||
err = hs_compile_ext_multi(regexv.data(), flagsv.data(), idv.data(),
|
||||
extv.data(), regexv.size(), mode,
|
||||
nullptr, &db, &compile_err);
|
||||
#endif
|
||||
|
||||
if (err == HS_SUCCESS) {
|
||||
assert(db);
|
||||
recordSuccess(g_exprMap, it->first);
|
||||
@@ -269,12 +434,13 @@ void usage() {
|
||||
<< " -T NUM Run with NUM threads." << endl
|
||||
<< " -h Display this help." << endl
|
||||
<< " -B Build signature set." << endl
|
||||
<< " -C Check logical combinations (default: off)." << endl
|
||||
<< endl;
|
||||
}
|
||||
|
||||
static
|
||||
void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
|
||||
const char options[] = "e:E:s:z:hLNV8G:T:B";
|
||||
const char options[] = "e:E:s:z:hLNV8G:T:BC";
|
||||
bool signatureSet = false;
|
||||
|
||||
for (;;) {
|
||||
@@ -332,6 +498,9 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr<Grey> &grey) {
|
||||
case 'B':
|
||||
build_sigs = true;
|
||||
break;
|
||||
case 'C':
|
||||
check_logical = true;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
exit(1);
|
||||
@@ -468,6 +637,18 @@ int main(int argc, char **argv) {
|
||||
threads[i].join();
|
||||
}
|
||||
|
||||
if (check_logical) {
|
||||
comb_read_it = g_combs.begin();
|
||||
|
||||
for (unsigned int i = 0; i < num_of_threads; i++) {
|
||||
threads[i] = thread(checkLogicalExpression, nullptr);
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < num_of_threads; i++) {
|
||||
threads[i].join();
|
||||
}
|
||||
}
|
||||
|
||||
if (!g_exprMap.empty() && !build_sigs) {
|
||||
cout << "SUMMARY: " << countFailures << " of "
|
||||
<< g_exprMap.size() << " failed." << endl;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -48,6 +48,7 @@
|
||||
#include "nfagraph/ng_util.h"
|
||||
#include "parser/Parser.h"
|
||||
#include "parser/unsupported.h"
|
||||
#include "parser/logical_combination.h"
|
||||
#include "util/compile_context.h"
|
||||
#include "util/make_unique.h"
|
||||
#include "util/report_manager.h"
|
||||
@@ -69,8 +70,11 @@ public:
|
||||
CompiledNG(unique_ptr<NGHolder> g_in,
|
||||
unique_ptr<ReportManager> rm_in)
|
||||
: g(std::move(g_in)), rm(std::move(rm_in)) {}
|
||||
CompiledNG(unique_ptr<ParsedLogical> pl_in)
|
||||
: pl(std::move(pl_in)) {}
|
||||
unique_ptr<ue2::NGHolder> g;
|
||||
unique_ptr<ue2::ReportManager> rm;
|
||||
unique_ptr<ue2::ParsedLogical> pl;
|
||||
};
|
||||
|
||||
static
|
||||
@@ -126,6 +130,14 @@ void CNGInfo::compile() {
|
||||
}
|
||||
|
||||
try {
|
||||
if (combination) {
|
||||
auto pl = ue2::make_unique<ParsedLogical>();
|
||||
pl->parseLogicalCombination(id, re.c_str(), ~0U, 0, ~0ULL);
|
||||
pl->logicalKeyRenumber();
|
||||
cng = make_unique<CompiledNG>(move(pl));
|
||||
return;
|
||||
}
|
||||
|
||||
bool isStreaming = colliderMode == MODE_STREAMING;
|
||||
bool isVectored = colliderMode == MODE_VECTORED;
|
||||
CompileContext cc(isStreaming, isVectored, get_current_target(),
|
||||
@@ -199,6 +211,8 @@ unique_ptr<CNGInfo> GraphTruth::preprocess(unsigned id,
|
||||
bool highlander = false;
|
||||
bool prefilter = false;
|
||||
bool som = false;
|
||||
bool combination = false;
|
||||
bool quiet = false;
|
||||
|
||||
auto i = m_expr.find(id);
|
||||
if (i == m_expr.end()) {
|
||||
@@ -214,7 +228,8 @@ unique_ptr<CNGInfo> GraphTruth::preprocess(unsigned id,
|
||||
throw NGCompileFailure("Cannot parse expression flags.");
|
||||
}
|
||||
// read PCRE flags
|
||||
if (!getPcreFlags(hs_flags, &flags, &highlander, &prefilter, &som)) {
|
||||
if (!getPcreFlags(hs_flags, &flags, &highlander, &prefilter, &som,
|
||||
&combination, &quiet)) {
|
||||
throw NGCompileFailure("Cannot get PCRE flags.");
|
||||
}
|
||||
if (force_utf8) {
|
||||
@@ -247,6 +262,8 @@ unique_ptr<CNGInfo> GraphTruth::preprocess(unsigned id,
|
||||
cngi->highlander = highlander;
|
||||
cngi->prefilter = prefilter;
|
||||
cngi->som = som;
|
||||
cngi->combination = combination;
|
||||
cngi->quiet = quiet;
|
||||
cngi->min_offset = ext.min_offset;
|
||||
cngi->max_offset = ext.max_offset;
|
||||
cngi->min_length = ext.min_length;
|
||||
@@ -256,8 +273,95 @@ unique_ptr<CNGInfo> GraphTruth::preprocess(unsigned id,
|
||||
return cngi;
|
||||
}
|
||||
|
||||
/** \brief Returns 1 if compliant to all logical combinations. */
|
||||
static
|
||||
char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
|
||||
size_t lkeyCount, unsigned start, unsigned result) {
|
||||
assert(start <= result);
|
||||
for (unsigned i = start; i <= result; i++) {
|
||||
const LogicalOp &op = comb[i - lkeyCount];
|
||||
assert(i == op.id);
|
||||
switch (op.op) {
|
||||
case LOGICAL_OP_NOT:
|
||||
lv[op.id] = !lv[op.ro];
|
||||
break;
|
||||
case LOGICAL_OP_AND:
|
||||
lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
|
||||
break;
|
||||
case LOGICAL_OP_OR:
|
||||
lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return lv[result];
|
||||
}
|
||||
|
||||
bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi,
|
||||
const string &buffer, ResultSet &rs, string &) {
|
||||
const string &buffer, ResultSet &rs, string &error) {
|
||||
if (cngi.quiet) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (cngi.combination) {
|
||||
// Compile and run sub-expressions, store match results.
|
||||
map<unsigned long long, set<MatchResult>> offset_to_matches;
|
||||
map<unsigned long long, set<unsigned>> offset_to_lkeys;
|
||||
set<unsigned> sub_exps;
|
||||
const auto &m_lkey = cng.pl->getLkeyMap();
|
||||
for (const auto &it_lkey : m_lkey) {
|
||||
if (sub_exps.find(it_lkey.first) == sub_exps.end()) {
|
||||
sub_exps.emplace(it_lkey.first);
|
||||
ResultSet sub_rs(RESULT_FROM_PCRE);
|
||||
shared_ptr<CNGInfo> sub_cngi = preprocess(it_lkey.first);
|
||||
const CompiledNG *sub_cng;
|
||||
try {
|
||||
sub_cng = sub_cngi->get();
|
||||
}
|
||||
catch (const NGCompileFailure &err) {
|
||||
return false;
|
||||
}
|
||||
catch (const NGUnsupportedFailure &err) {
|
||||
return false;
|
||||
}
|
||||
sub_cngi->quiet = false; // force not quiet in sub-exp.
|
||||
if (!run(it_lkey.first, *sub_cng, *sub_cngi, buffer, sub_rs, error)) {
|
||||
rs.clear();
|
||||
return false;
|
||||
}
|
||||
for (const auto &it_mr : sub_rs.matches) {
|
||||
offset_to_matches[it_mr.to].emplace(it_mr);
|
||||
offset_to_lkeys[it_mr.to].emplace(it_lkey.second);
|
||||
if (sub_cngi->highlander) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Calculate rs for combination expression.
|
||||
vector<char> lv;
|
||||
const auto &comb = cng.pl->getLogicalTree();
|
||||
lv.resize(m_lkey.size() + comb.size());
|
||||
const auto &li = cng.pl->getCombInfoById(cngi.id);
|
||||
for (const auto &it : offset_to_lkeys) {
|
||||
for (auto report : it.second) {
|
||||
lv[report] = 1;
|
||||
}
|
||||
if (isLogicalCombination(lv, comb, m_lkey.size(),
|
||||
li.start, li.result)) {
|
||||
for (const auto &mr : offset_to_matches.at(it.first)) {
|
||||
if ((mr.to >= cngi.min_offset) &&
|
||||
(mr.to <= cngi.max_offset)) {
|
||||
rs.addMatch(mr.from, mr.to);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
set<pair<size_t, size_t>> matches;
|
||||
|
||||
if (g_streamOffset) {
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -106,6 +106,10 @@ public:
|
||||
bool highlander = false;
|
||||
bool prefilter = false;
|
||||
bool som = false;
|
||||
bool combination = false;
|
||||
bool quiet = false;
|
||||
|
||||
unsigned id;
|
||||
private:
|
||||
void compile();
|
||||
// If NFA graph scan failed for some reason, we mark it as bad and skip
|
||||
@@ -116,8 +120,6 @@ private:
|
||||
std::unique_ptr<CompiledNG> cng; // compiled NFA graph
|
||||
std::mutex cng_mutex; // serialised accesses to NFA graph
|
||||
|
||||
unsigned id;
|
||||
|
||||
// Our expression map
|
||||
const ExpressionMap &m_expr;
|
||||
};
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -100,7 +100,8 @@ int pcreCallOut(pcre_callout_block *block) {
|
||||
|
||||
static
|
||||
bool decodeExprPcre(string &expr, unsigned *flags, bool *highlander,
|
||||
bool *prefilter, bool *som, hs_expr_ext *ext) {
|
||||
bool *prefilter, bool *som, bool *combination,
|
||||
bool *quiet, hs_expr_ext *ext) {
|
||||
string regex;
|
||||
unsigned int hs_flags = 0;
|
||||
if (!readExpression(expr, regex, &hs_flags, ext)) {
|
||||
@@ -109,7 +110,8 @@ bool decodeExprPcre(string &expr, unsigned *flags, bool *highlander,
|
||||
|
||||
expr.swap(regex);
|
||||
|
||||
if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som)) {
|
||||
if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som,
|
||||
combination, quiet)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -221,6 +223,8 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
bool highlander = false;
|
||||
bool prefilter = false;
|
||||
bool som = false;
|
||||
bool combination = false;
|
||||
bool quiet = false;
|
||||
|
||||
// we can still match approximate matching patterns with PCRE if edit
|
||||
// distance 0 is requested
|
||||
@@ -238,7 +242,8 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
hs_expr_ext ext;
|
||||
|
||||
// Decode the flags
|
||||
if (!decodeExprPcre(re, &flags, &highlander, &prefilter, &som, &ext)) {
|
||||
if (!decodeExprPcre(re, &flags, &highlander, &prefilter, &som,
|
||||
&combination, &quiet, &ext)) {
|
||||
throw PcreCompileFailure("Unable to decode flags.");
|
||||
}
|
||||
|
||||
@@ -261,7 +266,7 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
som |= !!somFlags;
|
||||
|
||||
// For traditional Hyperscan, add global callout to pattern.
|
||||
if (!no_callouts) {
|
||||
if (!combination && !no_callouts) {
|
||||
addCallout(re);
|
||||
}
|
||||
|
||||
@@ -275,12 +280,22 @@ GroundTruth::compile(unsigned id, bool no_callouts) {
|
||||
compiled->highlander = highlander;
|
||||
compiled->prefilter = prefilter;
|
||||
compiled->som = som;
|
||||
compiled->combination = combination;
|
||||
compiled->quiet = quiet;
|
||||
compiled->min_offset = ext.min_offset;
|
||||
compiled->max_offset = ext.max_offset;
|
||||
compiled->min_length = ext.min_length;
|
||||
compiled->expression = i->second; // original PCRE
|
||||
flags |= PCRE_NO_AUTO_POSSESS;
|
||||
|
||||
if (compiled->combination) {
|
||||
compiled->pl.parseLogicalCombination(id, re.c_str(), ~0U, 0, ~0ULL);
|
||||
compiled->pl.logicalKeyRenumber();
|
||||
compiled->report = id;
|
||||
return compiled;
|
||||
}
|
||||
|
||||
|
||||
compiled->bytecode =
|
||||
pcre_compile2(re.c_str(), flags, &errcode, &errptr, &errloc, nullptr);
|
||||
|
||||
@@ -424,8 +439,94 @@ int scanOffset(const CompiledPcre &compiled, const string &buffer,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** \brief Returns 1 if compliant to all logical combinations. */
|
||||
static
|
||||
char isLogicalCombination(vector<char> &lv, const vector<LogicalOp> &comb,
|
||||
size_t lkeyCount, unsigned start, unsigned result) {
|
||||
assert(start <= result);
|
||||
for (unsigned i = start; i <= result; i++) {
|
||||
const LogicalOp &op = comb[i - lkeyCount];
|
||||
assert(i == op.id);
|
||||
switch (op.op) {
|
||||
case LOGICAL_OP_NOT:
|
||||
lv[op.id] = !lv[op.ro];
|
||||
break;
|
||||
case LOGICAL_OP_AND:
|
||||
lv[op.id] = lv[op.lo] & lv[op.ro]; // &&
|
||||
break;
|
||||
case LOGICAL_OP_OR:
|
||||
lv[op.id] = lv[op.lo] | lv[op.ro]; // ||
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return lv[result];
|
||||
}
|
||||
|
||||
bool GroundTruth::run(unsigned, const CompiledPcre &compiled,
|
||||
const string &buffer, ResultSet &rs, string &error) {
|
||||
if (compiled.quiet) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (compiled.combination) {
|
||||
// Compile and run sub-expressions, store match results.
|
||||
map<unsigned long long, set<MatchResult>> offset_to_matches;
|
||||
map<unsigned long long, set<unsigned>> offset_to_lkeys;
|
||||
set<unsigned> sub_exps;
|
||||
const auto &m_lkey = compiled.pl.getLkeyMap();
|
||||
for (const auto &it_lkey : m_lkey) {
|
||||
if (sub_exps.find(it_lkey.first) == sub_exps.end()) {
|
||||
sub_exps.emplace(it_lkey.first);
|
||||
ResultSet sub_rs(RESULT_FROM_PCRE);
|
||||
shared_ptr<CompiledPcre> sub_pcre;
|
||||
try {
|
||||
sub_pcre = compile(it_lkey.first);
|
||||
}
|
||||
catch (const SoftPcreCompileFailure &err) {
|
||||
return false;
|
||||
}
|
||||
catch (const PcreCompileFailure &err) {
|
||||
return false;
|
||||
}
|
||||
sub_pcre->quiet = false; // force not quiet in sub-exp.
|
||||
if (!run(it_lkey.first, *sub_pcre, buffer, sub_rs, error)) {
|
||||
rs.clear();
|
||||
return false;
|
||||
}
|
||||
for (const auto &it_mr : sub_rs.matches) {
|
||||
offset_to_matches[it_mr.to].emplace(it_mr);
|
||||
offset_to_lkeys[it_mr.to].emplace(it_lkey.second);
|
||||
if (sub_pcre->highlander) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Calculate rs for combination expression.
|
||||
vector<char> lv;
|
||||
const auto &comb = compiled.pl.getLogicalTree();
|
||||
lv.resize(m_lkey.size() + comb.size());
|
||||
const auto &li = compiled.pl.getCombInfoById(compiled.report);
|
||||
for (const auto &it : offset_to_lkeys) {
|
||||
for (auto report : it.second) {
|
||||
lv[report] = 1;
|
||||
}
|
||||
if (isLogicalCombination(lv, comb, m_lkey.size(),
|
||||
li.start, li.result)) {
|
||||
for (const auto &mr : offset_to_matches.at(it.first)) {
|
||||
if ((mr.to >= compiled.min_offset) &&
|
||||
(mr.to <= compiled.max_offset)) {
|
||||
rs.addMatch(mr.from, mr.to);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
CalloutContext ctx(out);
|
||||
|
||||
pcre_extra extra;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -31,6 +31,7 @@
|
||||
|
||||
#include "expressions.h"
|
||||
#include "ResultSet.h"
|
||||
#include "parser/logical_combination.h"
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
@@ -85,6 +86,14 @@ public:
|
||||
bool highlander = false;
|
||||
bool prefilter = false;
|
||||
bool som = false;
|
||||
bool combination = false;
|
||||
bool quiet = false;
|
||||
|
||||
// Parsed logical combinations.
|
||||
ue2::ParsedLogical pl;
|
||||
|
||||
// Combination expression report id.
|
||||
unsigned report;
|
||||
|
||||
private:
|
||||
// If a PCRE has hit its match recursion limit when scanning a corpus, we
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -80,6 +80,39 @@ void NfaGeneratedCorpora::generate(unsigned id, vector<Corpus> &data) {
|
||||
throw CorpusFailure("Expression could not be read: " + i->second);
|
||||
}
|
||||
|
||||
// Combination's corpus is consist of sub-expressions' corpuses.
|
||||
if (hs_flags & HS_FLAG_COMBINATION) {
|
||||
ParsedLogical pl;
|
||||
pl.parseLogicalCombination(id, re.c_str(), ~0U, 0, ~0ULL);
|
||||
pl.logicalKeyRenumber();
|
||||
const auto &m_lkey = pl.getLkeyMap();
|
||||
assert(!m_lkey.empty());
|
||||
u32 a_subid; // arbitrary sub id
|
||||
unordered_map<u32, vector<Corpus>> m_data;
|
||||
for (const auto &it : m_lkey) {
|
||||
a_subid = it.first;
|
||||
vector<Corpus> sub_data;
|
||||
generate(a_subid, sub_data);
|
||||
m_data.emplace(a_subid, move(sub_data));
|
||||
}
|
||||
assert(!m_data.empty());
|
||||
size_t num_corpus = m_data[a_subid].size();
|
||||
data.reserve(data.size() + num_corpus);
|
||||
while (num_corpus) {
|
||||
string cc; // 1 combination corpus
|
||||
for (const auto &it : m_lkey) {
|
||||
assert(!m_data[it.first].empty());
|
||||
cc += m_data[it.first].back().data;
|
||||
if (m_data[it.first].size() > 1) {
|
||||
m_data[it.first].pop_back();
|
||||
}
|
||||
}
|
||||
data.push_back(Corpus(cc));
|
||||
num_corpus--;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (force_utf8_mode) {
|
||||
hs_flags |= HS_FLAG_UTF8;
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -114,6 +114,13 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// Clear all matches.
|
||||
void clear() {
|
||||
matches.clear();
|
||||
dupe_matches.clear();
|
||||
matches_by_block.clear();
|
||||
}
|
||||
|
||||
// Unexpected out of order match seen.
|
||||
bool uoom = false;
|
||||
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -34,7 +34,8 @@
|
||||
#include <pcre.h> /* for pcre flags */
|
||||
|
||||
bool getPcreFlags(unsigned int hs_flags, unsigned int *flags,
|
||||
bool *highlander, bool *prefilter, bool *som) {
|
||||
bool *highlander, bool *prefilter, bool *som,
|
||||
bool *combination, bool *quiet) {
|
||||
assert(flags);
|
||||
assert(highlander);
|
||||
assert(prefilter);
|
||||
@@ -76,6 +77,14 @@ bool getPcreFlags(unsigned int hs_flags, unsigned int *flags,
|
||||
*som = true;
|
||||
hs_flags &= ~HS_FLAG_SOM_LEFTMOST;
|
||||
}
|
||||
if (hs_flags & HS_FLAG_COMBINATION) {
|
||||
*combination = true;
|
||||
hs_flags &= ~HS_FLAG_COMBINATION;
|
||||
}
|
||||
if (hs_flags & HS_FLAG_QUIET) {
|
||||
*quiet = true;
|
||||
hs_flags &= ~HS_FLAG_QUIET;
|
||||
}
|
||||
|
||||
// Flags that are irrelevant to PCRE.
|
||||
hs_flags &= ~HS_FLAG_ALLOWEMPTY;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2017, Intel Corporation
|
||||
* Copyright (c) 2015-2018, Intel Corporation
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -35,7 +35,8 @@
|
||||
* Returns false if an unknown hyperscan flag is encountered.
|
||||
*/
|
||||
bool getPcreFlags(unsigned int hs_flags, unsigned int *pcre_flags,
|
||||
bool *highlander, bool *prefilter, bool *som);
|
||||
bool *highlander, bool *prefilter, bool *som,
|
||||
bool *combination = nullptr, bool *quiet = nullptr);
|
||||
|
||||
#endif /* PCRE_UTIL_H */
|
||||
|
||||
|
Reference in New Issue
Block a user